Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 9D90C100C3 for ; Wed, 17 Jul 2013 20:25:25 +0000 (UTC) Received: (qmail 36218 invoked by uid 500); 17 Jul 2013 20:25:24 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 36177 invoked by uid 500); 17 Jul 2013 20:25:24 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 36164 invoked by uid 99); 17 Jul 2013 20:25:23 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 17 Jul 2013 20:25:23 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 17 Jul 2013 20:25:21 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 74D762388906; Wed, 17 Jul 2013 20:25:01 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1504265 - in /hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector: ./ expressions/ Date: Wed, 17 Jul 2013 20:25:01 -0000 To: commits@hive.apache.org From: hashutosh@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20130717202501.74D762388906@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: hashutosh Date: Wed Jul 17 20:25:00 2013 New Revision: 1504265 URL: http://svn.apache.org/r1504265 Log: HIVE-4787 : make vectorized LOWER(), UPPER(), LENGTH() work end-to-end; support expression input for vectorized LIKE (Eric Hanson via Ashutosh Chauhan) Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1504265&r1=1504264&r2=1504265&view=diff ============================================================================== --- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original) +++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Wed Jul 17 20:25:00 2013 @@ -72,7 +72,9 @@ import org.apache.hadoop.hive.ql.plan.Ex import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth; import org.apache.hadoop.hive.ql.udf.UDFHour; +import org.apache.hadoop.hive.ql.udf.UDFLength; import org.apache.hadoop.hive.ql.udf.UDFLike; +import org.apache.hadoop.hive.ql.udf.UDFLower; import org.apache.hadoop.hive.ql.udf.UDFMinute; import org.apache.hadoop.hive.ql.udf.UDFMonth; import org.apache.hadoop.hive.ql.udf.UDFOPDivide; @@ -83,6 +85,7 @@ import org.apache.hadoop.hive.ql.udf.UDF import org.apache.hadoop.hive.ql.udf.UDFOPPlus; import org.apache.hadoop.hive.ql.udf.UDFOPPositive; import org.apache.hadoop.hive.ql.udf.UDFSecond; +import org.apache.hadoop.hive.ql.udf.UDFUpper; import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; import org.apache.hadoop.hive.ql.udf.UDFYear; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; @@ -453,21 +456,80 @@ public class VectorizationContext { return getTimestampFieldExpression(cl.getSimpleName(), childExpr); } else if (cl.equals(UDFLike.class)) { return getLikeExpression(childExpr); + } else if (cl.equals(UDFLower.class)) { + return getUnaryStringExpression("StringLower", "String", childExpr); + } else if (cl.equals(UDFUpper.class)) { + return getUnaryStringExpression("StringUpper", "String", childExpr); + } else if (cl.equals(UDFLength.class)) { + return getUnaryStringExpression("StringLength", "Long", childExpr); } throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported"); } + /* Return a unary string vector expression. This is used for functions like + * UPPER() and LOWER(). + */ + private VectorExpression getUnaryStringExpression(String vectorExprClassName, + String resultType, // result type name + List childExprList) throws HiveException { + + /* Create an instance of the class vectorExprClassName for the input column or expression result + * and return it. + */ + + ExprNodeDesc childExpr = childExprList.get(0); + int inputCol; + VectorExpression v1 = null; + if (childExpr instanceof ExprNodeGenericFuncDesc) { + v1 = getVectorExpression(childExpr); + inputCol = v1.getOutputColumn(); + } else if (childExpr instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr; + inputCol = getInputColumnIndex(colDesc.getColumn()); + } else { + // TODO? add code to handle constant argument case + throw new HiveException("Expression not supported: "+childExpr); + } + String outputColumnType = getNormalizedTypeName(resultType); + int outputCol = ocm.allocateOutputColumn(outputColumnType); + String className = "org.apache.hadoop.hive.ql.exec.vector.expressions." + + vectorExprClassName; + VectorExpression expr; + try { + expr = (VectorExpression) Class.forName(className). + getDeclaredConstructors()[0].newInstance(inputCol, outputCol); + } catch (Exception ex) { + throw new HiveException(ex); + } + if (v1 != null) { + expr.setChildExpressions(new VectorExpression [] {v1}); + ocm.freeOutputColumn(v1.getOutputColumn()); + } + return expr; + } + private VectorExpression getLikeExpression(List childExpr) throws HiveException { ExprNodeDesc leftExpr = childExpr.get(0); ExprNodeDesc rightExpr = childExpr.get(1); + VectorExpression v1 = null; VectorExpression expr = null; + int inputCol; + ExprNodeConstantDesc constDesc; + if ((leftExpr instanceof ExprNodeColumnDesc) && (rightExpr instanceof ExprNodeConstantDesc) ) { ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr; - ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr; - int inputCol = getInputColumnIndex(leftColDesc.getColumn()); + constDesc = (ExprNodeConstantDesc) rightExpr; + inputCol = getInputColumnIndex(leftColDesc.getColumn()); + expr = (VectorExpression) new FilterStringColLikeStringScalar(inputCol, + new Text((byte[]) getScalarValue(constDesc))); + } else if ((leftExpr instanceof ExprNodeGenericFuncDesc) && + (rightExpr instanceof ExprNodeConstantDesc)) { + v1 = getVectorExpression(leftExpr); + inputCol = v1.getOutputColumn(); + constDesc = (ExprNodeConstantDesc) rightExpr; expr = (VectorExpression) new FilterStringColLikeStringScalar(inputCol, new Text((byte[]) getScalarValue(constDesc))); } @@ -475,6 +537,10 @@ public class VectorizationContext { if (expr == null) { throw new HiveException("Vector LIKE filter expression could not be initialized"); } + if (v1 != null) { + expr.setChildExpressions(new VectorExpression [] {v1}); + ocm.freeOutputColumn(v1.getOutputColumn()); + } return expr; } Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java?rev=1504265&r1=1504264&r2=1504265&view=diff ============================================================================== --- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java (original) +++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColLikeStringScalar.java Wed Jul 17 20:25:00 2013 @@ -264,6 +264,11 @@ public class FilterStringColLikeStringSc @Override public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; boolean[] nullPos = inputColVector.isNull; Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java?rev=1504265&r1=1504264&r2=1504265&view=diff ============================================================================== --- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java (original) +++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java Wed Jul 17 20:25:00 2013 @@ -26,7 +26,7 @@ public class StringLength extends Vector private int colNum; private int outputColumn; - StringLength (int colNum, int outputColumn) { + public StringLength (int colNum, int outputColumn) { this.colNum = colNum; this.outputColumn = outputColumn; } @@ -34,6 +34,11 @@ public class StringLength extends Vector // Calculate the length of the UTF-8 strings in input vector and place results in output vector. @Override public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; int[] sel = batch.selected; @@ -127,7 +132,7 @@ public class StringLength extends Vector @Override public String getOutputType() { - return "String"; + return "Long"; } Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java?rev=1504265&r1=1504264&r2=1504265&view=diff ============================================================================== --- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java (original) +++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLower.java Wed Jul 17 20:25:00 2013 @@ -21,7 +21,7 @@ import org.apache.hadoop.hive.ql.udf.UDF import org.apache.hadoop.hive.ql.udf.IUDFUnaryString; public class StringLower extends StringUnaryUDF { - StringLower(int colNum, int outputColumn) { + public StringLower(int colNum, int outputColumn) { super(colNum, outputColumn, (IUDFUnaryString) new UDFLower()); } } Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java?rev=1504265&r1=1504264&r2=1504265&view=diff ============================================================================== --- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java (original) +++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java Wed Jul 17 20:25:00 2013 @@ -38,6 +38,11 @@ public class StringUnaryUDF extends Vect @Override public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; int n = batch.size; Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java?rev=1504265&r1=1504264&r2=1504265&view=diff ============================================================================== --- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java (original) +++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUpper.java Wed Jul 17 20:25:00 2013 @@ -22,7 +22,7 @@ import org.apache.hadoop.hive.ql.udf.UDF import org.apache.hadoop.hive.ql.udf.IUDFUnaryString; public class StringUpper extends StringUnaryUDF { - StringUpper(int colNum, int outputColumn) { + public StringUpper(int colNum, int outputColumn) { super(colNum, outputColumn, (IUDFUnaryString) new UDFUpper()); } }