hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1525674 - in /hive/branches/vectorization/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/ java/org/apache/hadoop/hive/ql/exec/vector/expressions/ java/org/apache/hadoop/hive/ql/optimizer/physical/ test/org/apache/hadoop/hive/ql/exec/ve...
Date Mon, 23 Sep 2013 19:00:19 GMT
Author: hashutosh
Date: Mon Sep 23 19:00:18 2013
New Revision: 1525674

URL: http://svn.apache.org/r1525674
Log:
HIVE-4823 : implement vectorized TRIM(), LTRIM(), RTRIM() (Eric Hanson via Ashutosh Chauhan)

Added:
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java
Modified:
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
    hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1525674&r1=1525673&r2=1525674&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
(original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
Mon Sep 23 19:00:18 2013
@@ -83,6 +83,7 @@ import org.apache.hadoop.hive.ql.plan.ap
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
 import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
 import org.apache.hadoop.hive.ql.udf.UDFHour;
+import org.apache.hadoop.hive.ql.udf.UDFLTrim;
 import org.apache.hadoop.hive.ql.udf.UDFLength;
 import org.apache.hadoop.hive.ql.udf.UDFLike;
 import org.apache.hadoop.hive.ql.udf.UDFMinute;
@@ -94,8 +95,10 @@ import org.apache.hadoop.hive.ql.udf.UDF
 import org.apache.hadoop.hive.ql.udf.UDFOPNegative;
 import org.apache.hadoop.hive.ql.udf.UDFOPPlus;
 import org.apache.hadoop.hive.ql.udf.UDFOPPositive;
+import org.apache.hadoop.hive.ql.udf.UDFRTrim;
 import org.apache.hadoop.hive.ql.udf.UDFSecond;
 import org.apache.hadoop.hive.ql.udf.UDFSubstr;
+import org.apache.hadoop.hive.ql.udf.UDFTrim;
 import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
 import org.apache.hadoop.hive.ql.udf.UDFYear;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
@@ -458,6 +461,7 @@ public class VectorizationContext {
     } else if (udf instanceof GenericUDFConcat) {
       return getConcatExpression(childExpr);
     }
+
     throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
   }
 
@@ -514,6 +518,12 @@ public class VectorizationContext {
       return getUnaryStringExpression("StringLength", "Long", childExpr);
     } else if (cl.equals(UDFSubstr.class)) {
       return getSubstrExpression(childExpr);
+    } else if (cl.equals(UDFLTrim.class)) {
+      return getUnaryStringExpression("StringLTrim", "String", childExpr);
+    } else if (cl.equals(UDFRTrim.class)) {
+      return getUnaryStringExpression("StringRTrim", "String", childExpr);
+    } else if (cl.equals(UDFTrim.class)) {
+      return getUnaryStringExpression("StringTrim", "String", childExpr);
     }
 
     throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");

Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java?rev=1525674&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
(added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
Mon Sep 23 19:00:18 2013
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+public class StringLTrim extends StringUnaryUDFDirect {
+  private static final long serialVersionUID = 1L;
+
+  public StringLTrim(int inputColumn, int outputColumn) {
+    super(inputColumn, outputColumn);
+  }
+
+  public StringLTrim() {
+    super();
+  }
+
+  /**
+   * LTRIM element i of the vector, and place the result in outV.
+   * Operate on the data in place, and set the output by reference
+   * to improve performance. Ignore null handling. That will be handled separately.
+   */
+  protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length,
int i) {
+    int j = start[i];
+
+    // skip past blank characters
+    while(j < start[i] + vector[i].length && vector[i][j] == 0x20) {
+      j++;
+    }
+
+    outV.setVal(i, vector[i], j, length[i] - (j - start[i]));
+  }
+}

Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java?rev=1525674&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
(added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
Mon Sep 23 19:00:18 2013
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+public class StringRTrim extends StringUnaryUDFDirect {
+  private static final long serialVersionUID = 1L;
+
+  public StringRTrim(int inputColumn, int outputColumn) {
+    super(inputColumn, outputColumn);
+  }
+
+  public StringRTrim() {
+    super();
+  }
+
+  /**
+   * RTRIM element i of the vector, and place the result in outV.
+   * Operate on the data in place, and set the output by reference
+   * to improve performance. Ignore null handling. That will be handled separately.
+   */
+  protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length,
int i) {
+    int j = start[i] + length[i] - 1;
+
+    // skip trailing blank characters
+    while(j >= start[i] && vector[i][j] == 0x20) {
+      j--;
+    }
+
+    // set output vector
+    outV.setVal(i, vector[i], start[i], (j - start[i]) + 1);
+  }
+}

Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java?rev=1525674&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java
(added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java
Mon Sep 23 19:00:18 2013
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+public class StringTrim extends StringUnaryUDFDirect {
+  private static final long serialVersionUID = 1L;
+
+  public StringTrim(int inputColumn, int outputColumn) {
+    super(inputColumn, outputColumn);
+  }
+
+  public StringTrim() {
+    super();
+  }
+
+  /**
+   * TRIM element i of the vector, eliminating blanks from the left
+   * and right sides of the string, and place the result in outV.
+   * Operate on the data in place, and set the output by reference
+   * to improve performance. Ignore null handling. That will be handled separately.
+   */
+  protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length,
int i) {
+    int l = start[i];
+    int r = start[i] + length[i] - 1;
+
+    // skip blank character on left
+    while(l <= r && vector[i][l] == 0x20) {
+      l++;
+    }
+
+    // skip blank characters on right
+    while(l <= r && vector[i][r] == 0x20) {
+      r--;
+    }
+
+    outV.setVal(i, vector[i], l, (r - l) + 1);
+  }
+}

Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java?rev=1525674&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java
(added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java
Mon Sep 23 19:00:18 2013
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * This is a superclass for unary string functions that operate directly on the
+ * input and set the output. It is suitable for direct, in-place operations on
+ * strings, such as for fast implementations of TRIM(), LTRIM(), and RTRIM().
+ */
+abstract public class StringUnaryUDFDirect extends VectorExpression {
+  private static final long serialVersionUID = 1L;
+  int inputColumn;
+  int outputColumn;
+
+  public StringUnaryUDFDirect(int inputColumn, int outputColumn) {
+    this.inputColumn = inputColumn;
+    this.outputColumn = outputColumn;
+  }
+
+  public StringUnaryUDFDirect() {
+    super();
+  }
+
+  abstract protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[]
length, int i);
+
+  @Override
+  public void evaluate(VectorizedRowBatch batch) {
+
+    if (childExpressions != null) {
+      super.evaluateChildren(batch);
+    }
+
+    BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputColumn];
+    int[] sel = batch.selected;
+    int n = batch.size;
+    byte[][] vector = inputColVector.vector;
+    int start[] = inputColVector.start;
+    int length[] = inputColVector.length;
+    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn];
+    outV.initBuffer();
+
+    if (n == 0) {
+      //Nothing to do
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      outV.noNulls = true;
+      if (inputColVector.isRepeating) {
+        outV.isRepeating = true;
+        func(outV, vector, start, length, 0);
+      } else if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          func(outV, vector, start, length, i);
+        }
+        outV.isRepeating = false;
+      } else {
+        for(int i = 0; i != n; i++) {
+          func(outV, vector, start, length, i);
+        }
+        outV.isRepeating = false;
+      }
+    } else {
+
+      // Handle case with nulls. Don't do function if the value is null,
+      // because the data may be undefined for a null value.
+      outV.noNulls = false;
+      if (inputColVector.isRepeating) {
+        outV.isRepeating = true;
+        outV.isNull[0] = inputColVector.isNull[0];
+        if (!inputColVector.isNull[0]) {
+          func(outV, vector, start, length, 0);
+        }
+      } else if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outV.isNull[i] = inputColVector.isNull[i];
+          if (!inputColVector.isNull[i]) {
+            func(outV, vector, start, length, i);
+          }
+        }
+        outV.isRepeating = false;
+      } else {
+        System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+        for(int i = 0; i != n; i++) {
+          if (!inputColVector.isNull[i]) {
+            func(outV, vector, start, length, i);
+          }
+        }
+        outV.isRepeating = false;
+      }
+    }
+  }
+
+
+  @Override
+  public int getOutputColumn() {
+    return outputColumn;
+  }
+
+  public void setOutputColumn(int outputColumn) {
+    this.outputColumn = outputColumn;
+  }
+
+  public int getInputColumn() {
+    return inputColumn;
+  }
+
+  public void setInputColumn(int inputColumn) {
+    this.inputColumn = inputColumn;
+  }
+
+  @Override
+  public String getOutputType() {
+    return "String";
+  }
+
+}
\ No newline at end of file

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java?rev=1525674&r1=1525673&r2=1525674&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java
(original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java
Mon Sep 23 19:00:18 2013
@@ -37,7 +37,7 @@ public abstract class VectorExpression i
    * This is the primary method to implement expression logic.
    * @param vrg
    */
-  public abstract void evaluate(VectorizedRowBatch vrg);
+  public abstract void evaluate(VectorizedRowBatch batch);
 
   /**
    * Returns the index of the output column in the array

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1525674&r1=1525673&r2=1525674&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
(original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
Mon Sep 23 19:00:18 2013
@@ -70,6 +70,7 @@ import org.apache.hadoop.hive.ql.plan.Pa
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
 import org.apache.hadoop.hive.ql.udf.UDFHour;
+import org.apache.hadoop.hive.ql.udf.UDFLTrim;
 import org.apache.hadoop.hive.ql.udf.UDFLength;
 import org.apache.hadoop.hive.ql.udf.UDFLike;
 import org.apache.hadoop.hive.ql.udf.UDFMinute;
@@ -80,8 +81,10 @@ import org.apache.hadoop.hive.ql.udf.UDF
 import org.apache.hadoop.hive.ql.udf.UDFOPNegative;
 import org.apache.hadoop.hive.ql.udf.UDFOPPlus;
 import org.apache.hadoop.hive.ql.udf.UDFOPPositive;
+import org.apache.hadoop.hive.ql.udf.UDFRTrim;
 import org.apache.hadoop.hive.ql.udf.UDFSecond;
 import org.apache.hadoop.hive.ql.udf.UDFSubstr;
+import org.apache.hadoop.hive.ql.udf.UDFTrim;
 import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
 import org.apache.hadoop.hive.ql.udf.UDFYear;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
@@ -161,6 +164,10 @@ public class Vectorizer implements Physi
 
     supportedGenericUDFs.add(UDFLike.class);
     supportedGenericUDFs.add(UDFSubstr.class);
+    supportedGenericUDFs.add(UDFLTrim.class);
+    supportedGenericUDFs.add(UDFRTrim.class);
+    supportedGenericUDFs.add(UDFTrim.class);
+
     supportedGenericUDFs.add(GenericUDFLower.class);
     supportedGenericUDFs.add(GenericUDFUpper.class);
     supportedGenericUDFs.add(GenericUDFConcat.class);

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java?rev=1525674&r1=1525673&r2=1525674&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
(original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
Mon Sep 23 19:00:18 2013
@@ -55,6 +55,10 @@ public class TestVectorStringExpressions
   private static byte[] mixedUpUpper;
   private static byte[] multiByte;
   private static byte[] mixPercentPattern;
+  private static byte[] blanksLeft;
+  private static byte[] blanksRight;
+  private static byte[] blanksBoth;
+  private static byte[] blankString;
 
   static {
     try {
@@ -72,6 +76,10 @@ public class TestVectorStringExpressions
       mixPercentPattern = "mix%".getBytes("UTF-8"); // for use as wildcard pattern to test
LIKE
       multiByte = new byte[100];
       addMultiByteChars(multiByte);
+      blanksLeft = "  foo".getBytes("UTF-8");
+      blanksRight = "foo  ".getBytes("UTF-8");
+      blanksBoth = "  foo  ".getBytes("UTF-8");
+      blankString = "  ".getBytes("UTF-8");
     } catch (UnsupportedEncodingException e) {
       e.printStackTrace();
     }
@@ -1405,4 +1413,81 @@ public class TestVectorStringExpressions
         )
     );
   }
+  
+  @Test
+  public void testVectorLTrim() {
+    VectorizedRowBatch b = makeTrimBatch();
+    VectorExpression expr = new StringLTrim(0, 1);
+    expr.evaluate(b);
+    BytesColumnVector outV = (BytesColumnVector) b.cols[1];
+    Assert.assertEquals(0,
+        StringExpr.compare(emptyString, 0, 0, outV.vector[0], 0, 0));
+    Assert.assertEquals(0,
+        StringExpr.compare(blanksLeft, 2, 3, outV.vector[1], outV.start[1], outV.length[1]));
+    Assert.assertEquals(0, 
+        StringExpr.compare(blanksRight, 0, 5, outV.vector[2], outV.start[2], outV.length[2]));
+    Assert.assertEquals(0,
+        StringExpr.compare(blanksBoth, 2, 5, outV.vector[3], outV.start[3], outV.length[3]));
+    Assert.assertEquals(0,
+        StringExpr.compare(red, 0, 3, outV.vector[4], outV.start[4], outV.length[4]));
+    Assert.assertEquals(0,
+        StringExpr.compare(blankString, 0, 0, outV.vector[5], outV.start[5], outV.length[5]));
+  }
+
+  @Test
+  public void testVectorRTrim() {
+    VectorizedRowBatch b = makeTrimBatch();
+    VectorExpression expr = new StringRTrim(0, 1);
+    expr.evaluate(b);
+    BytesColumnVector outV = (BytesColumnVector) b.cols[1];
+    Assert.assertEquals(0,
+        StringExpr.compare(emptyString, 0, 0, outV.vector[0], 0, 0));
+    Assert.assertEquals(0,
+        StringExpr.compare(blanksLeft, 0, 5, outV.vector[1], outV.start[1], outV.length[1]));
+    Assert.assertEquals(0, 
+        StringExpr.compare(blanksRight, 0, 3, outV.vector[2], outV.start[2], outV.length[2]));
+    Assert.assertEquals(0,
+        StringExpr.compare(blanksBoth, 0, 5, outV.vector[3], outV.start[3], outV.length[3]));
+    Assert.assertEquals(0,
+        StringExpr.compare(red, 0, 3, outV.vector[4], outV.start[4], outV.length[4]));
+    Assert.assertEquals(0,
+        StringExpr.compare(blankString, 0, 0, outV.vector[5], outV.start[5], outV.length[5]));
+  }
+  
+  @Test
+  public void testVectorTrim() {
+    VectorizedRowBatch b = makeTrimBatch();
+    VectorExpression expr = new StringTrim(0, 1);
+    expr.evaluate(b);
+    BytesColumnVector outV = (BytesColumnVector) b.cols[1];
+    Assert.assertEquals(0,
+        StringExpr.compare(emptyString, 0, 0, outV.vector[0], 0, 0));
+    Assert.assertEquals(0,
+        StringExpr.compare(blanksLeft, 2, 3, outV.vector[1], outV.start[1], outV.length[1]));
+    Assert.assertEquals(0, 
+        StringExpr.compare(blanksRight, 0, 3, outV.vector[2], outV.start[2], outV.length[2]));
+    Assert.assertEquals(0,
+        StringExpr.compare(blanksBoth, 2, 3, outV.vector[3], outV.start[3], outV.length[3]));
+    Assert.assertEquals(0,
+        StringExpr.compare(red, 0, 3, outV.vector[4], outV.start[4], outV.length[4]));
+    Assert.assertEquals(0,
+        StringExpr.compare(blankString, 0, 0, outV.vector[5], outV.start[5], outV.length[5]));
+  }
+  
+  // Make a batch to test the trim functions.
+  private VectorizedRowBatch makeTrimBatch() {
+    VectorizedRowBatch b = new VectorizedRowBatch(2);
+    BytesColumnVector inV = new BytesColumnVector();
+    BytesColumnVector outV = new BytesColumnVector();
+    b.cols[0] = inV;
+    b.cols[1] = outV;
+    inV.setRef(0, emptyString, 0, 0);
+    inV.setRef(1, blanksLeft, 0, blanksLeft.length);
+    inV.setRef(2, blanksRight, 0, blanksRight.length);
+    inV.setRef(3, blanksBoth, 0, blanksBoth.length);
+    inV.setRef(4, red, 0, red.length);
+    inV.setRef(5, blankString, 0, blankString.length);
+    b.size = 5;
+    return b;
+  }
  }



Mime
View raw message