drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j..@apache.org
Subject [1/2] incubator-drill git commit: DRILL-1470 : cast into varchar should recognize the length parameter in varchar. Fix casting function implementation: the length parameter should mean # of chars, not # of bytes.
Date Wed, 12 Nov 2014 00:05:04 GMT
Repository: incubator-drill
Updated Branches:
  refs/heads/master 66cf6e274 -> c305c794a


DRILL-1470 : cast into varchar should recognize the length parameter in varchar. Fix casting
function implementation: the length parameter should mean # of chars, not # of bytes.

New unit test case to verify the result from cast function.

Fix bug in cast into varchar. When target length = 0, it means we want to keep the input .

code clean up.

Include change for varbinary cast as well.


Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/73f9827b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/73f9827b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/73f9827b

Branch: refs/heads/master
Commit: 73f9827bff061912d4cc81169488eda4dfa17234
Parents: 66cf6e2
Author: Jinfeng Ni <jni@maprtech.com>
Authored: Tue Nov 4 07:22:43 2014 -0800
Committer: Jinfeng Ni <jni@maprtech.com>
Committed: Tue Nov 11 13:14:59 2014 -0800

----------------------------------------------------------------------
 exec/java-exec/src/main/codegen/data/Casts.tdd  |  2 ++
 .../CastFunctionsSrcVarLenTargetVarLen.java     | 27 ++++++++++++++++--
 .../exec/expr/ExpressionTreeMaterializer.java   | 14 ++++++----
 .../org/apache/drill/TestExampleQueries.java    | 29 ++++++++++++++++++++
 4 files changed, 64 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/73f9827b/exec/java-exec/src/main/codegen/data/Casts.tdd
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/codegen/data/Casts.tdd b/exec/java-exec/src/main/codegen/data/Casts.tdd
index a4f5fb1..cdd6218 100644
--- a/exec/java-exec/src/main/codegen/data/Casts.tdd
+++ b/exec/java-exec/src/main/codegen/data/Casts.tdd
@@ -50,7 +50,9 @@
     {from: "Float8", to: "VarBinary", major: "TargetVarlen", javaType: "Double", bufferLength:"100"},
    
     
     {from: "VarBinary", to: "VarChar", major: "SrcVarlenTargetVarlen"},
+    {from: "VarChar", to: "VarChar", major: "SrcVarlenTargetVarlen"},
     {from: "VarChar", to: "VarBinary", major: "SrcVarlenTargetVarlen"},
+    {from: "VarBinary", to: "VarBinary", major: "SrcVarlenTargetVarlen"},
 
     {from: "Date", to: "TimeStamp", major: "Date"},
     {from: "Date", to: "TimeStampTZ", major: "Date"},

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/73f9827b/exec/java-exec/src/main/codegen/templates/CastFunctionsSrcVarLenTargetVarLen.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/codegen/templates/CastFunctionsSrcVarLenTargetVarLen.java
b/exec/java-exec/src/main/codegen/templates/CastFunctionsSrcVarLenTargetVarLen.java
index 0096be8..cd8f7bd 100644
--- a/exec/java-exec/src/main/codegen/templates/CastFunctionsSrcVarLenTargetVarLen.java
+++ b/exec/java-exec/src/main/codegen/templates/CastFunctionsSrcVarLenTargetVarLen.java
@@ -47,22 +47,43 @@ public class Cast${type.from}${type.to} implements DrillSimpleFunc{
 
   @Param ${type.from}Holder in;
   @Param BigIntHolder length;
-  @Workspace ByteBuf buffer;     
   @Output ${type.to}Holder out;
 
   public void setup(RecordBatch incoming) {
   }
 
   public void eval() {
+
+  <#if type.to == 'VarChar'>
+
+    //Do 1st scan to counter # of character in string.
+    int charCount = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(in.buffer,
in.start, in.end);
+
+    //if input length <= target_type length, do nothing
+    //else if target length = 0, it means cast wants all the characters in the input. Do
nothing.
+    //else truncate based on target_type length.
+    out.buffer = in.buffer;
+    out.start =  in.start;
+    if (charCount <= length.value || length.value == 0 ) {
+      out.end = in.end;
+    } else {
+      out.end = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharPosition(in.buffer,
in.start, in.end, (int)length.value);
+    }
+
+  <#elseif type.to == 'VarBinary'>
+
     //if input length <= target_type length, do nothing
-    //else truncate based on target_type length.     
+    //else if target length = 0, it means cast wants all the bytes in the input. Do nothing.
+    //else truncate based on target_type length.
     out.buffer = in.buffer;
     out.start =  in.start;
-    if (in.end - in.start <= length.value) {
+    if (in.end - in.start <= length.value || length.value == 0 ) {
       out.end = in.end;
     } else {
       out.end = out.start + (int) length.value;
     }
+  </#if>
+
   }      
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/73f9827b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
index 9cec3a4..a2d5e10 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
@@ -523,7 +523,7 @@ public class ExpressionTreeMaterializer {
       MajorType newMajor = e.getMajorType();
       MinorType newMinor = input.getMajorType().getMinorType();
 
-      if (castEqual(e.getPosition(), newMajor, input.getMajorType())) {
+      if (castEqual(e.getPosition(), input.getMajorType(), newMajor)) {
         return input; // don't do pointless cast.
       }
 
@@ -601,11 +601,15 @@ public class ExpressionTreeMaterializer {
       case VAR16CHAR:
       case VARBINARY:
       case VARCHAR:
-        if (to.getWidth() < from.getWidth() && to.getWidth() > 0) {
-          this.errorCollector.addGeneralError(pos, "Casting from a longer variable length
type to a shorter variable length type is not currently supported.");
-          return false;
-        } else {
+        // We could avoid redundant cast:
+        // 1) when "to" length is no smaller than "from" length and "from" length is known
(>0),
+        // 2) or "to" length is unknown (0 means unknown length?).
+        // Case 1 and case 2 mean that cast will do nothing.
+        // In other cases, cast is required to trim the "from" according to "to" length.
+        if ( (to.getWidth() >= from.getWidth() && from.getWidth() > 0) || to.getWidth()
== 0) {
           return true;
+        } else {
+          return false;
         }
 
       default:

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/73f9827b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
index cd70b3c..e134a73 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
@@ -542,4 +542,33 @@ public class TestExampleQueries extends BaseTestQuery{
     assertEquals(String.format("Received unexepcted number of rows in output: expected=%d,
received=%s",
         expectedRecordCount, actualRecordCount), expectedRecordCount, actualRecordCount);
   }
+
+  @Test // DRILL-1470
+  public void testCastToVarcharWithLength() throws Exception {
+    // cast from varchar with unknown length to a fixed length.
+    int actualRecordCount = testSql("select first_name from cp.`employee.json` where cast(first_name
as varchar(2)) = 'Sh'");
+    int expectedRecordCount = 27;
+    assertEquals(String.format("Received unexepcted number of rows in output: expected=%d,
received=%s",
+        expectedRecordCount, actualRecordCount), expectedRecordCount, actualRecordCount);
+
+    // cast from varchar with unknown length to varchar(5), then to varchar(10), then to
varchar(2). Should produce the same result as the first query.
+    actualRecordCount = testSql("select first_name from cp.`employee.json` where cast(cast(cast(first_name
as varchar(5)) as varchar(10)) as varchar(2)) = 'Sh'");
+    expectedRecordCount = 27;
+    assertEquals(String.format("Received unexepcted number of rows in output: expected=%d,
received=%s",
+        expectedRecordCount, actualRecordCount), expectedRecordCount, actualRecordCount);
+
+
+    // this long nested cast expression should be essentially equal to substr(), meaning
the query should return every row in the table.
+    actualRecordCount = testSql("select first_name from cp.`employee.json` where cast(cast(cast(first_name
as varchar(5)) as varchar(10)) as varchar(2)) = substr(first_name, 1, 2)");
+    expectedRecordCount = 1155;
+    assertEquals(String.format("Received unexepcted number of rows in output: expected=%d,
received=%s",
+        expectedRecordCount, actualRecordCount), expectedRecordCount, actualRecordCount);
+
+    // cast is applied to a column from parquet file.
+    actualRecordCount = testSql("select n_name from cp.`tpch/nation.parquet` where cast(n_name
as varchar(2)) = 'UN'");
+    expectedRecordCount = 2;
+    assertEquals(String.format("Received unexepcted number of rows in output: expected=%d,
received=%s",
+        expectedRecordCount, actualRecordCount), expectedRecordCount, actualRecordCount);
+  }
+
 }


Mime
View raw message