Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 8C3FD200BF4 for ; Thu, 22 Dec 2016 09:32:34 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 8AC9C160B26; Thu, 22 Dec 2016 08:32:34 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 72F27160B40 for ; Thu, 22 Dec 2016 09:32:32 +0100 (CET) Received: (qmail 34013 invoked by uid 500); 22 Dec 2016 08:32:31 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 33476 invoked by uid 99); 22 Dec 2016 08:32:30 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 22 Dec 2016 08:32:30 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 76AC6F1825; Thu, 22 Dec 2016 08:32:30 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: mmccline@apache.org To: commits@hive.apache.org Date: Thu, 22 Dec 2016 08:32:38 -0000 Message-Id: <4b2fe1a4303c4152af2495f989d73048@git.apache.org> In-Reply-To: <96e63bcb66d240d3adfca04a9190978c@git.apache.org> References: <96e63bcb66d240d3adfca04a9190978c@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [09/10] hive git commit: HIVE-15335: Fast Decimal (Matt McCline, reviewed by Sergey Shelukhin, Prasanth Jayachandran, Owen O'Malley) archived-at: Thu, 22 Dec 2016 08:32:34 -0000 http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index d2f5408..f6b6447 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -383,20 +383,20 @@ public class VectorizationContext { //Vectorized row batch for processing. The index in the row batch is //equal to the index in this array plus initialOutputCol. //Start with size 100 and double when needed. - private String [] outputColumnsTypes = new String[100]; + private String [] scratchVectorTypeNames = new String[100]; private final Set usedOutputColumns = new HashSet(); - int allocateOutputColumn(String hiveTypeName) throws HiveException { + int allocateOutputColumn(TypeInfo typeInfo) throws HiveException { if (initialOutputCol < 0) { - // This is a test + // This is a test calling. return 0; } - // We need to differentiate DECIMAL columns by their precision and scale... - String normalizedTypeName = getNormalizedName(hiveTypeName); - int relativeCol = allocateOutputColumnInternal(normalizedTypeName); - // LOG.info("allocateOutputColumn for hiveTypeName " + hiveTypeName + " column " + (initialOutputCol + relativeCol)); + // CONCERN: We currently differentiate DECIMAL columns by their precision and scale..., + // which could lead to a lot of extra unnecessary scratch columns. + String vectorTypeName = getScratchName(typeInfo); + int relativeCol = allocateOutputColumnInternal(vectorTypeName); return initialOutputCol + relativeCol; } @@ -405,7 +405,7 @@ public class VectorizationContext { // Re-use an existing, available column of the same required type. if (usedOutputColumns.contains(i) || - !(outputColumnsTypes)[i].equalsIgnoreCase(columnType)) { + !(scratchVectorTypeNames)[i].equalsIgnoreCase(columnType)) { continue; } //Use i @@ -413,16 +413,16 @@ public class VectorizationContext { return i; } //Out of allocated columns - if (outputColCount < outputColumnsTypes.length) { + if (outputColCount < scratchVectorTypeNames.length) { int newIndex = outputColCount; - outputColumnsTypes[outputColCount++] = columnType; + scratchVectorTypeNames[outputColCount++] = columnType; usedOutputColumns.add(newIndex); return newIndex; } else { //Expand the array - outputColumnsTypes = Arrays.copyOf(outputColumnsTypes, 2*outputColCount); + scratchVectorTypeNames = Arrays.copyOf(scratchVectorTypeNames, 2*outputColCount); int newIndex = outputColCount; - outputColumnsTypes[outputColCount++] = columnType; + scratchVectorTypeNames[outputColCount++] = columnType; usedOutputColumns.add(newIndex); return newIndex; } @@ -448,8 +448,8 @@ public class VectorizationContext { } } - public int allocateScratchColumn(String hiveTypeName) throws HiveException { - return ocm.allocateOutputColumn(hiveTypeName); + public int allocateScratchColumn(TypeInfo typeInfo) throws HiveException { + return ocm.allocateOutputColumn(typeInfo); } public int[] currentScratchColumns() { @@ -1044,7 +1044,7 @@ public class VectorizationContext { } int outCol = -1; if (mode == VectorExpressionDescriptor.Mode.PROJECTION) { - outCol = ocm.allocateOutputColumn(typeName); + outCol = ocm.allocateOutputColumn(typeInfo); } if (constantValue == null) { return new ConstantVectorExpression(outCol, typeName, true); @@ -1286,24 +1286,26 @@ public class VectorizationContext { // Additional argument is needed, which is the outputcolumn. Object [] newArgs = null; try { - String outType; - - // Special handling for decimal because decimal types need scale and precision parameter. - // This special handling should be avoided by using returnType uniformly for all cases. - if (returnType != null) { - outType = getNormalizedName(returnType.getTypeName()).toLowerCase(); - if (outType == null) { - throw new HiveException("No vector type for type name " + returnType); + String returnTypeName; + if (returnType == null) { + returnTypeName = ((VectorExpression) vclass.newInstance()).getOutputType().toLowerCase(); + if (returnTypeName.equals("long")) { + returnTypeName = "bigint"; } + returnType = TypeInfoUtils.getTypeInfoFromTypeString(returnTypeName); } else { - outType = ((VectorExpression) vclass.newInstance()).getOutputType(); + returnTypeName = returnType.getTypeName(); } - int outputCol = ocm.allocateOutputColumn(outType); + + // Special handling for decimal because decimal types need scale and precision parameter. + // This special handling should be avoided by using returnType uniformly for all cases. + int outputCol = ocm.allocateOutputColumn(returnType); + newArgs = Arrays.copyOf(args, numParams); newArgs[numParams-1] = outputCol; ve = (VectorExpression) ctor.newInstance(newArgs); - ve.setOutputType(outType); + ve.setOutputType(returnTypeName); } catch (Exception ex) { throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + StringUtils.stringifyException(ex)); @@ -1398,7 +1400,7 @@ public class VectorizationContext { inputColumns[i++] = ve.getOutputColumn(); } - int outColumn = ocm.allocateOutputColumn(returnType.getTypeName()); + int outColumn = ocm.allocateOutputColumn(returnType); VectorCoalesce vectorCoalesce = new VectorCoalesce(inputColumns, outColumn); vectorCoalesce.setOutputType(returnType.getTypeName()); vectorCoalesce.setChildExpressions(vectorChildren); @@ -1425,7 +1427,7 @@ public class VectorizationContext { inputColumns[i++] = ve.getOutputColumn(); } - int outColumn = ocm.allocateOutputColumn(returnType.getTypeName()); + int outColumn = ocm.allocateOutputColumn(returnType); VectorElt vectorElt = new VectorElt(inputColumns, outColumn); vectorElt.setOutputType(returnType.getTypeName()); vectorElt.setChildExpressions(vectorChildren); @@ -1607,7 +1609,7 @@ public class VectorizationContext { // Create a single child representing the scratch column where we will // generate the serialized keys of the batch. - int scratchBytesCol = ocm.allocateOutputColumn("string"); + int scratchBytesCol = ocm.allocateOutputColumn(TypeInfoFactory.stringTypeInfo); Class cl = (mode == VectorExpressionDescriptor.Mode.FILTER ? FilterStructColumnInList.class : StructColumnInList.class); @@ -1729,6 +1731,20 @@ public class VectorizationContext { return (byte[]) o; } + private PrimitiveCategory getAnyIntegerPrimitiveCategoryFromUdfClass(Class udfClass) { + if (udfClass.equals(UDFToByte.class)) { + return PrimitiveCategory.BYTE; + } else if (udfClass.equals(UDFToShort.class)) { + return PrimitiveCategory.SHORT; + } else if (udfClass.equals(UDFToInteger.class)) { + return PrimitiveCategory.INT; + } else if (udfClass.equals(UDFToLong.class)) { + return PrimitiveCategory.LONG; + } else { + throw new RuntimeException("Unexpected any integery UDF class " + udfClass.getName()); + } + } + /** * Invoke special handling for expressions that can't be vectorized by regular * descriptor based lookup. @@ -1738,7 +1754,9 @@ public class VectorizationContext { Class cl = udf.getUdfClass(); VectorExpression ve = null; if (isCastToIntFamily(cl)) { - ve = getCastToLongExpression(childExpr); + PrimitiveCategory integerPrimitiveCategory = + getAnyIntegerPrimitiveCategoryFromUdfClass(cl); + ve = getCastToLongExpression(childExpr, integerPrimitiveCategory); } else if (cl.equals(UDFToBoolean.class)) { ve = getCastToBoolean(childExpr); } else if (isCastToFloatFamily(cl)) { @@ -1838,7 +1856,8 @@ public class VectorizationContext { } } - private Long castConstantToLong(Object scalar, TypeInfo type) throws HiveException { + private Long castConstantToLong(Object scalar, TypeInfo type, + PrimitiveCategory integerPrimitiveCategory) throws HiveException { if (null == scalar) { return null; } @@ -1854,7 +1873,36 @@ public class VectorizationContext { return ((Number) scalar).longValue(); case DECIMAL: HiveDecimal decimalVal = (HiveDecimal) scalar; - return decimalVal.longValueExact(); + switch (integerPrimitiveCategory) { + case BYTE: + if (!decimalVal.isByte()) { + // Accurate byte value cannot be obtained. + return null; + } + break; + case SHORT: + if (!decimalVal.isShort()) { + // Accurate short value cannot be obtained. + return null; + } + break; + case INT: + if (!decimalVal.isInt()) { + // Accurate int value cannot be obtained. + return null; + } + break; + case LONG: + if (!decimalVal.isLong()) { + // Accurate long value cannot be obtained. + return null; + } + break; + default: + throw new RuntimeException("Unexpected integer primitive type " + integerPrimitiveCategory); + } + // We only store longs in our LongColumnVector. + return decimalVal.longValue(); default: throw new HiveException("Unsupported type "+typename+" for cast to Long"); } @@ -2004,7 +2052,7 @@ public class VectorizationContext { VectorExpression lenExpr = createVectorExpression(StringLength.class, childExpr, VectorExpressionDescriptor.Mode.PROJECTION, null); - int outputCol = ocm.allocateOutputColumn("Long"); + int outputCol = ocm.allocateOutputColumn(TypeInfoFactory.longTypeInfo); VectorExpression lenToBoolExpr = new CastLongToBooleanViaLongToLong(lenExpr.getOutputColumn(), outputCol); lenToBoolExpr.setChildExpressions(new VectorExpression[] {lenExpr}); @@ -2014,14 +2062,14 @@ public class VectorizationContext { return null; } - private VectorExpression getCastToLongExpression(List childExpr) + private VectorExpression getCastToLongExpression(List childExpr, PrimitiveCategory integerPrimitiveCategory) throws HiveException { ExprNodeDesc child = childExpr.get(0); String inputType = childExpr.get(0).getTypeString(); if (child instanceof ExprNodeConstantDesc) { // Return a constant vector expression Object constantValue = ((ExprNodeConstantDesc) child).getValue(); - Long longValue = castConstantToLong(constantValue, child.getTypeInfo()); + Long longValue = castConstantToLong(constantValue, child.getTypeInfo(), integerPrimitiveCategory); return getConstantVectorExpression(longValue, TypeInfoFactory.longTypeInfo, VectorExpressionDescriptor.Mode.PROJECTION); } // Float family, timestamp are handled via descriptor based lookup, int family needs @@ -2196,12 +2244,10 @@ public class VectorizationContext { int outputCol = -1; String resultTypeName = expr.getTypeInfo().getTypeName(); - outputCol = ocm.allocateOutputColumn(resultTypeName); + outputCol = ocm.allocateOutputColumn(expr.getTypeInfo()); // Make vectorized operator - String normalizedName = getNormalizedName(resultTypeName); - - VectorExpression ve = new VectorUDFAdaptor(expr, outputCol, normalizedName, argDescs); + VectorExpression ve = new VectorUDFAdaptor(expr, outputCol, resultTypeName, argDescs); // Set child expressions VectorExpression[] childVEs = null; @@ -2395,36 +2441,15 @@ public class VectorizationContext { } } - static String getNormalizedName(String hiveTypeName) throws HiveException { - VectorExpressionDescriptor.ArgumentType argType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(hiveTypeName); - switch (argType) { - case INT_FAMILY: - return "Long"; - case FLOAT_FAMILY: - return "Double"; - case DECIMAL: - //Return the decimal type as is, it includes scale and precision. - return hiveTypeName; - case STRING: - return "String"; - case CHAR: - //Return the CHAR type as is, it includes maximum length - return hiveTypeName; - case VARCHAR: - //Return the VARCHAR type as is, it includes maximum length. - return hiveTypeName; - case BINARY: - return "Binary"; - case DATE: - return "Date"; - case TIMESTAMP: - return "Timestamp"; - case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY_TIME: - return hiveTypeName; - default: - throw new HiveException("Unexpected hive type name " + hiveTypeName); + static String getScratchName(TypeInfo typeInfo) throws HiveException { + // For now, leave DECIMAL precision/scale in the name so DecimalColumnVector scratch columns + // don't need their precision/scale adjusted... + if (typeInfo.getCategory() == Category.PRIMITIVE && + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.DECIMAL) { + return typeInfo.getTypeName(); } + Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + return columnVectorType.name().toLowerCase(); } static String getUndecoratedName(String hiveTypeName) throws HiveException { @@ -2702,9 +2727,16 @@ public class VectorizationContext { public String[] getScratchColumnTypeNames() { String[] result = new String[ocm.outputColCount]; for (int i = 0; i < ocm.outputColCount; i++) { - String typeName = ocm.outputColumnsTypes[i]; - if (typeName.equalsIgnoreCase("long")) { - typeName = "bigint"; // Convert our synonym to a real Hive type name. + String vectorTypeName = ocm.scratchVectorTypeNames[i]; + String typeName; + if (vectorTypeName.equalsIgnoreCase("bytes")) { + // Use hive type name. + typeName = "string"; + } else if (vectorTypeName.equalsIgnoreCase("long")) { + // Use hive type name. + typeName = "bigint"; + } else { + typeName = vectorTypeName; } result[i] = typeName; } http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToBoolean.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToBoolean.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToBoolean.java index 9621cd3..ac52373 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToBoolean.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToBoolean.java @@ -41,6 +41,11 @@ public class CastDecimalToBoolean extends FuncDecimalToLong { * Otherwise, return 1 for true. */ protected void func(LongColumnVector outV, DecimalColumnVector inV, int i) { - outV.vector[i] = inV.vector[i].getHiveDecimal().signum() == 0 ? 0 : 1; + outV.vector[i] = inV.vector[i].signum() == 0 ? 0 : 1; + } + + @Override + public String getOutputType() { + return "Boolean"; } } http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java index aab3e70..e753a6e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java @@ -37,8 +37,8 @@ public class CastDecimalToChar extends CastDecimalToString implements TruncStrin } @Override - protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { - StringExpr.rightTrimAndTruncate(outV, i, bytes, 0, length, maxLength); + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int offset, int length) { + StringExpr.rightTrimAndTruncate(outV, i, bytes, offset, length, maxLength); } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDouble.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDouble.java index 63d878d..9cf97f4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDouble.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDouble.java @@ -34,6 +34,6 @@ public class CastDecimalToDouble extends FuncDecimalToDouble { } protected void func(DoubleColumnVector outV, DecimalColumnVector inV, int i) { - outV.vector[i] = inV.vector[i].getHiveDecimal().doubleValue(); + outV.vector[i] = inV.vector[i].doubleValue(); } } http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java index 2ff6b79..28a2d74 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; /** * Type cast decimal to long @@ -37,6 +38,47 @@ public class CastDecimalToLong extends FuncDecimalToLong { @Override protected void func(LongColumnVector outV, DecimalColumnVector inV, int i) { - outV.vector[i] = inV.vector[i].getHiveDecimal().longValue(); // TODO: lossy conversion! + HiveDecimalWritable decWritable = inV.vector[i]; + + // Check based on the Hive integer type we need to test with isByte, isShort, isInt, isLong + // so we do not use corrupted (truncated) values for the Hive integer type. + boolean isInRange; + switch (integerPrimitiveCategory) { + case BYTE: + isInRange = decWritable.isByte(); + break; + case SHORT: + isInRange = decWritable.isShort(); + break; + case INT: + isInRange = decWritable.isInt(); + break; + case LONG: + isInRange = decWritable.isLong(); + break; + default: + throw new RuntimeException("Unexpected integer primitive category " + integerPrimitiveCategory); + } + if (!isInRange) { + outV.isNull[i] = true; + outV.noNulls = false; + return; + } + switch (integerPrimitiveCategory) { + case BYTE: + outV.vector[i] = decWritable.byteValue(); + break; + case SHORT: + outV.vector[i] = decWritable.shortValue(); + break; + case INT: + outV.vector[i] = decWritable.intValue(); + break; + case LONG: + outV.vector[i] = decWritable.longValue(); + break; + default: + throw new RuntimeException("Unexpected integer primitive category " + integerPrimitiveCategory); + } } } http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java index 243a807..ca58890 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java @@ -18,8 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; /** * To support vectorized cast of decimal to string. @@ -28,29 +30,28 @@ public class CastDecimalToString extends DecimalToStringUnaryUDF { private static final long serialVersionUID = 1L; + // We use a scratch buffer with the HiveDecimalWritable toBytes method so + // we don't incur poor performance creating a String result. + private byte[] scratchBuffer; + public CastDecimalToString() { super(); } public CastDecimalToString(int inputColumn, int outputColumn) { super(inputColumn, outputColumn); + scratchBuffer = new byte[HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES]; } // The assign method will be overridden for CHAR and VARCHAR. - protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { - outV.setVal(i, bytes, 0, length); + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int offset, int length) { + outV.setVal(i, bytes, offset, length); } @Override protected void func(BytesColumnVector outV, DecimalColumnVector inV, int i) { - String s = inV.vector[i].getHiveDecimal().toString(); - byte[] b = null; - try { - b = s.getBytes("UTF-8"); - } catch (Exception e) { - // This should never happen. If it does, there is a bug. - throw new RuntimeException("Internal error: unable to convert decimal to string", e); - } - assign(outV, i, b, b.length); + HiveDecimalWritable decWritable = inV.vector[i]; + final int byteIndex = decWritable.toBytes(scratchBuffer); + assign(outV, i, scratchBuffer, byteIndex, HiveDecimal.SCRATCH_BUFFER_LEN_TO_BYTES - byteIndex); } } http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java index 8963449..dfd9802 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java @@ -23,6 +23,7 @@ import java.sql.Timestamp; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.util.TimestampUtils; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; /** * Type cast decimal to timestamp. The decimal value is interpreted @@ -32,8 +33,13 @@ import org.apache.hadoop.hive.ql.util.TimestampUtils; public class CastDecimalToTimestamp extends FuncDecimalToTimestamp { private static final long serialVersionUID = 1L; + private HiveDecimalWritable scratchHiveDecimalWritable1; + private HiveDecimalWritable scratchHiveDecimalWritable2; + public CastDecimalToTimestamp(int inputColumn, int outputColumn) { super(inputColumn, outputColumn); + scratchHiveDecimalWritable1 = new HiveDecimalWritable(); + scratchHiveDecimalWritable2 = new HiveDecimalWritable(); } public CastDecimalToTimestamp() { @@ -41,7 +47,10 @@ public class CastDecimalToTimestamp extends FuncDecimalToTimestamp { @Override protected void func(TimestampColumnVector outV, DecimalColumnVector inV, int i) { - Timestamp timestamp = TimestampUtils.decimalToTimestamp(inV.vector[i].getHiveDecimal()); + Timestamp timestamp = + TimestampUtils.decimalToTimestamp( + inV.vector[i], + scratchHiveDecimalWritable1, scratchHiveDecimalWritable2); outV.set(i, timestamp); } } http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java index 267b0b1..3a2c2d0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java @@ -37,8 +37,8 @@ public class CastDecimalToVarChar extends CastDecimalToString implements TruncSt } @Override - protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { - StringExpr.truncate(outV, i, bytes, 0, length, maxLength); + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int offset, int length) { + StringExpr.truncate(outV, i, bytes, offset, length, maxLength); } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java index 6d6b588..79478b9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToDecimal.java @@ -18,9 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; /** * Cast input double to a decimal. Get target value scale from output column vector. @@ -39,7 +39,11 @@ public class CastDoubleToDecimal extends FuncDoubleToDecimal { @Override protected void func(DecimalColumnVector outV, DoubleColumnVector inV, int i) { - String s = ((Double) inV.vector[i]).toString(); - outV.vector[i].set(HiveDecimal.create(s)); + HiveDecimalWritable decWritable = outV.vector[i]; + decWritable.setFromDouble(inV.vector[i]); + if (!decWritable.isSet()) { + outV.isNull[i] = true; + outV.noNulls = false; + } } } http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java index 0601c66..d4d8fea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java @@ -37,7 +37,10 @@ public class DecimalColumnInList extends VectorExpression implements IDecimalInE private int outputColumn; // The set object containing the IN list. - private transient HashSet inSet; + // We use a HashSet of HiveDecimalWritable objects instead of HiveDecimal objects so + // we can lookup DecimalColumnVector HiveDecimalWritable quickly without creating + // a HiveDecimal lookup object. + private transient HashSet inSet; public DecimalColumnInList() { super(); @@ -61,9 +64,9 @@ public class DecimalColumnInList extends VectorExpression implements IDecimalInE } if (inSet == null) { - inSet = new HashSet(inListValues.length); + inSet = new HashSet(inListValues.length); for (HiveDecimal val : inListValues) { - inSet.add(val); + inSet.add(new HiveDecimalWritable(val)); } } @@ -88,16 +91,16 @@ public class DecimalColumnInList extends VectorExpression implements IDecimalInE // All must be selected otherwise size would be zero // Repeating property will not change. - outputVector[0] = inSet.contains(vector[0].getHiveDecimal()) ? 1 : 0; + outputVector[0] = inSet.contains(vector[0]) ? 1 : 0; outputColVector.isRepeating = true; } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = inSet.contains(vector[i].getHiveDecimal()) ? 1 : 0; + outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } else { for(int i = 0; i != n; i++) { - outputVector[i] = inSet.contains(vector[i].getHiveDecimal()) ? 1 : 0; + outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } } else { @@ -106,7 +109,7 @@ public class DecimalColumnInList extends VectorExpression implements IDecimalInE //All must be selected otherwise size would be zero //Repeating property will not change. if (!nullPos[0]) { - outputVector[0] = inSet.contains(vector[0].getHiveDecimal()) ? 1 : 0; + outputVector[0] = inSet.contains(vector[0]) ? 1 : 0; outNulls[0] = false; } else { outNulls[0] = true; @@ -117,14 +120,14 @@ public class DecimalColumnInList extends VectorExpression implements IDecimalInE int i = sel[j]; outNulls[i] = nullPos[i]; if (!nullPos[i]) { - outputVector[i] = inSet.contains(vector[i].getHiveDecimal()) ? 1 : 0; + outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } } else { System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { if (!nullPos[i]) { - outputVector[i] = inSet.contains(vector[i].getHiveDecimal()) ? 1 : 0; + outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } } http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java index a01f7a2..ba4646f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalUtil.java @@ -34,15 +34,17 @@ public class DecimalUtil { } public static int compare(HiveDecimal left, HiveDecimalWritable writableRight) { - return left.compareTo(writableRight.getHiveDecimal()); + return HiveDecimalWritable.compareTo(left, writableRight); } // Addition with overflow check. Overflow produces NULL output. public static void addChecked(int i, HiveDecimal left, HiveDecimal right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.add(right)); - } catch (ArithmeticException e) { // catch on overflow + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateAdd(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -50,9 +52,11 @@ public class DecimalUtil { public static void addChecked(int i, HiveDecimalWritable left, HiveDecimalWritable right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.getHiveDecimal().add(right.getHiveDecimal())); - } catch (ArithmeticException e) { // catch on overflow + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateAdd(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -60,9 +64,11 @@ public class DecimalUtil { public static void addChecked(int i, HiveDecimalWritable left, HiveDecimal right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.getHiveDecimal().add(right)); - } catch (ArithmeticException e) { // catch on overflow + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateAdd(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -70,9 +76,11 @@ public class DecimalUtil { public static void addChecked(int i, HiveDecimal left, HiveDecimalWritable right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.add(right.getHiveDecimal())); - } catch (ArithmeticException e) { // catch on overflow + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateAdd(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -81,9 +89,11 @@ public class DecimalUtil { // Subtraction with overflow check. Overflow produces NULL output. public static void subtractChecked(int i, HiveDecimal left, HiveDecimal right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.subtract(right)); - } catch (ArithmeticException e) { // catch on overflow + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateSubtract(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -91,9 +101,11 @@ public class DecimalUtil { public static void subtractChecked(int i, HiveDecimalWritable left, HiveDecimalWritable right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.getHiveDecimal().subtract(right.getHiveDecimal())); - } catch (ArithmeticException e) { // catch on overflow + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateSubtract(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -101,9 +113,11 @@ public class DecimalUtil { public static void subtractChecked(int i, HiveDecimalWritable left, HiveDecimal right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.getHiveDecimal().subtract(right)); - } catch (ArithmeticException e) { // catch on overflow + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateSubtract(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -111,9 +125,11 @@ public class DecimalUtil { public static void subtractChecked(int i, HiveDecimal left, HiveDecimalWritable right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.subtract(right.getHiveDecimal())); - } catch (ArithmeticException e) { // catch on overflow + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateSubtract(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -122,9 +138,11 @@ public class DecimalUtil { // Multiplication with overflow check. Overflow produces NULL output. public static void multiplyChecked(int i, HiveDecimal left, HiveDecimal right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.multiply(right)); - } catch (ArithmeticException e) { // catch on overflow + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateMultiply(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -132,9 +150,11 @@ public class DecimalUtil { public static void multiplyChecked(int i, HiveDecimalWritable left, HiveDecimalWritable right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.getHiveDecimal().multiply(right.getHiveDecimal())); - } catch (ArithmeticException e) { // catch on overflow + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateMultiply(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -142,9 +162,11 @@ public class DecimalUtil { public static void multiplyChecked(int i, HiveDecimalWritable left, HiveDecimal right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.getHiveDecimal().multiply(right)); - } catch (ArithmeticException e) { // catch on overflow + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateMultiply(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -152,9 +174,11 @@ public class DecimalUtil { public static void multiplyChecked(int i, HiveDecimal left, HiveDecimalWritable right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.multiply(right.getHiveDecimal())); - } catch (ArithmeticException e) { // catch on overflow + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateMultiply(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -163,9 +187,11 @@ public class DecimalUtil { // Division with overflow/zero-divide check. Error produces NULL output. public static void divideChecked(int i, HiveDecimal left, HiveDecimal right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.divide(right)); - } catch (ArithmeticException e) { // catch on error + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateDivide(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -173,9 +199,11 @@ public class DecimalUtil { public static void divideChecked(int i, HiveDecimalWritable left, HiveDecimalWritable right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.getHiveDecimal().divide(right.getHiveDecimal())); - } catch (ArithmeticException e) { // catch on error + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateDivide(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -183,9 +211,11 @@ public class DecimalUtil { public static void divideChecked(int i, HiveDecimalWritable left, HiveDecimal right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.getHiveDecimal().divide(right)); - } catch (ArithmeticException e) { // catch on error + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateDivide(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -193,9 +223,11 @@ public class DecimalUtil { public static void divideChecked(int i, HiveDecimal left, HiveDecimalWritable right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.divide(right.getHiveDecimal())); - } catch (ArithmeticException e) { // catch on error + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateDivide(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -204,9 +236,11 @@ public class DecimalUtil { // Modulo operator with overflow/zero-divide check. public static void moduloChecked(int i, HiveDecimal left, HiveDecimal right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.remainder(right)); - } catch (ArithmeticException e) { // catch on error + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateRemainder(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -214,9 +248,11 @@ public class DecimalUtil { public static void moduloChecked(int i, HiveDecimalWritable left, HiveDecimalWritable right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.getHiveDecimal().remainder(right.getHiveDecimal())); - } catch (ArithmeticException e) { // catch on error + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateRemainder(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -224,9 +260,11 @@ public class DecimalUtil { public static void moduloChecked(int i, HiveDecimalWritable left, HiveDecimal right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.getHiveDecimal().remainder(right)); - } catch (ArithmeticException e) { // catch on error + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateRemainder(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -234,99 +272,122 @@ public class DecimalUtil { public static void moduloChecked(int i, HiveDecimal left, HiveDecimalWritable right, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, left.remainder(right.getHiveDecimal())); - } catch (ArithmeticException e) { // catch on error + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(left); + decWritable.mutateRemainder(right); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } } + // UNDONE: Why don't these methods take decimalPlaces? public static void floor(int i, HiveDecimal input, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, input.setScale(0, HiveDecimal.ROUND_FLOOR)); - } catch (ArithmeticException e) { + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(input); + decWritable.mutateSetScale(0, HiveDecimal.ROUND_FLOOR); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } } public static void floor(int i, HiveDecimalWritable input, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, input.getHiveDecimal().setScale(0, HiveDecimal.ROUND_FLOOR)); - } catch (ArithmeticException e) { + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(input); + decWritable.mutateSetScale(0, HiveDecimal.ROUND_FLOOR); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } } public static void ceiling(int i, HiveDecimal input, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, input.setScale(0, HiveDecimal.ROUND_CEILING)); - } catch (ArithmeticException e) { + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(input); + decWritable.mutateSetScale(0, HiveDecimal.ROUND_CEILING); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } } public static void ceiling(int i, HiveDecimalWritable input, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, input.getHiveDecimal().setScale(0, HiveDecimal.ROUND_CEILING)); - } catch (ArithmeticException e) { + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(input); + decWritable.mutateSetScale(0, HiveDecimal.ROUND_CEILING); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } } public static void round(int i, HiveDecimal input, int decimalPlaces, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, RoundUtils.round(input, decimalPlaces)); - } catch (ArithmeticException e) { + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(input); + decWritable.mutateSetScale(decimalPlaces, HiveDecimal.ROUND_HALF_UP); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } } public static void round(int i, HiveDecimalWritable input, int decimalPlaces, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, RoundUtils.round(input.getHiveDecimal(), decimalPlaces)); - } catch (ArithmeticException e) { + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(input); + decWritable.mutateSetScale(decimalPlaces, HiveDecimal.ROUND_HALF_UP); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } } public static void round(int i, HiveDecimal input, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, RoundUtils.round(input, outputColVector.scale)); - } catch (ArithmeticException e) { + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(input); + decWritable.mutateSetScale(outputColVector.scale, HiveDecimal.ROUND_HALF_UP); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } } public static void round(int i, HiveDecimalWritable input, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, RoundUtils.round(input.getHiveDecimal(), outputColVector.scale)); - } catch (ArithmeticException e) { + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(input); + decWritable.mutateSetScale(outputColVector.scale, HiveDecimal.ROUND_HALF_UP); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } } public static void bround(int i, HiveDecimalWritable input, int decimalPlaces, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, RoundUtils.bround(input.getHiveDecimal(), decimalPlaces)); - } catch (ArithmeticException e) { + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(input); + decWritable.mutateSetScale(decimalPlaces, HiveDecimal.ROUND_HALF_EVEN); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } } public static void bround(int i, HiveDecimalWritable input, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, RoundUtils.bround(input.getHiveDecimal(), outputColVector.scale)); - } catch (ArithmeticException e) { + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(input); + decWritable.mutateSetScale(outputColVector.scale, HiveDecimal.ROUND_HALF_EVEN); + decWritable.mutateEnforcePrecisionScale(outputColVector.precision, outputColVector.scale); + if (!decWritable.isSet()) { outputColVector.noNulls = false; outputColVector.isNull[i] = true; } @@ -337,42 +398,30 @@ public class DecimalUtil { } public static void sign(int i, HiveDecimalWritable input, LongColumnVector outputColVector) { - outputColVector.vector[i] = input.getHiveDecimal().signum(); + outputColVector.vector[i] = input.signum(); } public static void abs(int i, HiveDecimal input, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, input.abs()); - } catch (ArithmeticException e) { - outputColVector.noNulls = false; - outputColVector.isNull[i] = true; - } + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(input); + decWritable.mutateAbs(); } public static void abs(int i, HiveDecimalWritable input, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, input.getHiveDecimal().abs()); - } catch (ArithmeticException e) { - outputColVector.noNulls = false; - outputColVector.isNull[i] = true; - } + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(input); + decWritable.mutateAbs(); } public static void negate(int i, HiveDecimal input, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, input.negate()); - } catch (ArithmeticException e) { - outputColVector.noNulls = false; - outputColVector.isNull[i] = true; - } + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(input); + decWritable.mutateNegate(); } public static void negate(int i, HiveDecimalWritable input, DecimalColumnVector outputColVector) { - try { - outputColVector.set(i, input.getHiveDecimal().negate()); - } catch (ArithmeticException e) { - outputColVector.noNulls = false; - outputColVector.isNull[i] = true; - } + HiveDecimalWritable decWritable = outputColVector.vector[i]; + decWritable.set(input); + decWritable.mutateNegate(); } } http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimalColumnInList.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimalColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimalColumnInList.java index a865343..79d3fe3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimalColumnInList.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDecimalColumnInList.java @@ -35,7 +35,7 @@ public class FilterDecimalColumnInList extends VectorExpression implements IDeci private HiveDecimal[] inListValues; // The set object containing the IN list. - private transient HashSet inSet; + private transient HashSet inSet; public FilterDecimalColumnInList() { super(); @@ -58,9 +58,9 @@ public class FilterDecimalColumnInList extends VectorExpression implements IDeci } if (inSet == null) { - inSet = new HashSet(inListValues.length); + inSet = new HashSet(inListValues.length); for (HiveDecimal val : inListValues) { - inSet.add(val); + inSet.add(new HiveDecimalWritable(val)); } } @@ -81,7 +81,7 @@ public class FilterDecimalColumnInList extends VectorExpression implements IDeci // All must be selected otherwise size would be zero // Repeating property will not change. - if (!(inSet.contains(vector[0].getHiveDecimal()))) { + if (!(inSet.contains(vector[0]))) { //Entire batch is filtered out. batch.size = 0; } @@ -89,7 +89,7 @@ public class FilterDecimalColumnInList extends VectorExpression implements IDeci int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (inSet.contains(vector[i].getHiveDecimal())) { + if (inSet.contains(vector[i])) { sel[newSize++] = i; } } @@ -97,7 +97,7 @@ public class FilterDecimalColumnInList extends VectorExpression implements IDeci } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (inSet.contains(vector[i].getHiveDecimal())) { + if (inSet.contains(vector[i])) { sel[newSize++] = i; } } @@ -112,7 +112,7 @@ public class FilterDecimalColumnInList extends VectorExpression implements IDeci //All must be selected otherwise size would be zero //Repeating property will not change. if (!nullPos[0]) { - if (!inSet.contains(vector[0].getHiveDecimal())) { + if (!inSet.contains(vector[0])) { //Entire batch is filtered out. batch.size = 0; @@ -125,7 +125,7 @@ public class FilterDecimalColumnInList extends VectorExpression implements IDeci for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos[i]) { - if (inSet.contains(vector[i].getHiveDecimal())) { + if (inSet.contains(vector[i])) { sel[newSize++] = i; } } @@ -137,7 +137,7 @@ public class FilterDecimalColumnInList extends VectorExpression implements IDeci int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos[i]) { - if (inSet.contains(vector[i].getHiveDecimal())) { + if (inSet.contains(vector[i])) { sel[newSize++] = i; } } http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java index 70b393c..1e21fea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStructColumnInList.java @@ -112,7 +112,7 @@ public class FilterStructColumnInList extends FilterStringColumnInList implement case DECIMAL: DecimalColumnVector decColVector = ((DecimalColumnVector) colVec); binarySortableSerializeWrite.writeHiveDecimal( - decColVector.vector[adjustedIndex].getHiveDecimal(), decColVector.scale); + decColVector.vector[adjustedIndex], decColVector.scale); break; default: http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java index 4691fe1..7f005a1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java @@ -22,6 +22,10 @@ import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; /** * This is a superclass for unary decimal functions and expressions returning integers that @@ -32,6 +36,9 @@ public abstract class FuncDecimalToLong extends VectorExpression { int inputColumn; int outputColumn; + private transient boolean integerPrimitiveCategoryKnown = false; + protected transient PrimitiveCategory integerPrimitiveCategory; + public FuncDecimalToLong(int inputColumn, int outputColumn) { this.inputColumn = inputColumn; this.outputColumn = outputColumn; @@ -50,6 +57,13 @@ public abstract class FuncDecimalToLong extends VectorExpression { super.evaluateChildren(batch); } + if (!integerPrimitiveCategoryKnown) { + String typeName = getOutputType().toLowerCase(); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + integerPrimitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + integerPrimitiveCategoryKnown = true; + } + DecimalColumnVector inV = (DecimalColumnVector) batch.cols[inputColumn]; int[] sel = batch.selected; int n = batch.size; @@ -117,11 +131,6 @@ public abstract class FuncDecimalToLong extends VectorExpression { } @Override - public String getOutputType() { - return "long"; - } - - @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java index 769c70a..8134108 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StructColumnInList.java @@ -113,7 +113,7 @@ public class StructColumnInList extends StringColumnInList implements IStructInE case DECIMAL: DecimalColumnVector decColVector = ((DecimalColumnVector) colVec); binarySortableSerializeWrite.writeHiveDecimal( - decColVector.vector[adjustedIndex].getHiveDecimal(), decColVector.scale); + decColVector.vector[adjustedIndex], decColVector.scale); break; default: http://git-wip-us.apache.org/repos/asf/hive/blob/4ba713cc/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java index d0ff5fa..4f6d652 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java @@ -60,45 +60,25 @@ public class VectorUDAFAvgDecimal extends VectorAggregateExpression { transient private long count; transient private boolean isNull; - // We use this to catch overflow. - transient private boolean isOutOfRange; - - public void sumValueWithNullCheck(HiveDecimalWritable writable, short scale) { - if (isOutOfRange) { - return; - } - HiveDecimal value = writable.getHiveDecimal(); + public void sumValueWithNullCheck(HiveDecimalWritable writable) { if (isNull) { - sum.set(value); + // Make a copy since we intend to mutate sum. + sum.set(writable); count = 1; isNull = false; } else { - HiveDecimal result; - try { - result = sum.getHiveDecimal().add(value); - } catch (ArithmeticException e) { // catch on overflow - isOutOfRange = true; - return; - } - sum.set(result); + // Note that if sum is out of range, mutateAdd will ignore the call. + // At the end, sum.isSet() can be checked for null. + sum.mutateAdd(writable); count++; } } - public void sumValueNoNullCheck(HiveDecimalWritable writable, short scale) { - HiveDecimal value = writable.getHiveDecimal(); - HiveDecimal result; - try { - result = sum.getHiveDecimal().add(value); - } catch (ArithmeticException e) { // catch on overflow - isOutOfRange = true; - return; - } - sum.set(result); + public void sumValueNoNullCheck(HiveDecimalWritable writable) { + sum.mutateAdd(writable); count++; } - @Override public int getVariableSize() { throw new UnsupportedOperationException(); @@ -107,9 +87,8 @@ public class VectorUDAFAvgDecimal extends VectorAggregateExpression { @Override public void reset() { isNull = true; - isOutOfRange = false; - sum.set(HiveDecimal.ZERO); - count = 0L; + sum.setFromLong(0L); + count = 0; } } @@ -251,7 +230,7 @@ public class VectorUDAFAvgDecimal extends VectorAggregateExpression { aggregationBufferSets, bufferIndex, i); - myagg.sumValueWithNullCheck(value, this.sumScale); + myagg.sumValueWithNullCheck(value); } } @@ -267,7 +246,7 @@ public class VectorUDAFAvgDecimal extends VectorAggregateExpression { aggregationBufferSets, bufferIndex, i); - myagg.sumValueWithNullCheck(values[selection[i]], this.sumScale); + myagg.sumValueWithNullCheck(values[selection[i]]); } } @@ -281,7 +260,7 @@ public class VectorUDAFAvgDecimal extends VectorAggregateExpression { aggregationBufferSets, bufferIndex, i); - myagg.sumValueWithNullCheck(values[i], this.sumScale); + myagg.sumValueWithNullCheck(values[i]); } } @@ -302,7 +281,7 @@ public class VectorUDAFAvgDecimal extends VectorAggregateExpression { aggregationBufferSets, bufferIndex, i); - myagg.sumValueWithNullCheck(value, this.sumScale); + myagg.sumValueWithNullCheck(value); } } @@ -323,7 +302,7 @@ public class VectorUDAFAvgDecimal extends VectorAggregateExpression { aggregationBufferSets, bufferIndex, i); - myagg.sumValueWithNullCheck(value, this.sumScale); + myagg.sumValueWithNullCheck(value); } } @@ -342,7 +321,7 @@ public class VectorUDAFAvgDecimal extends VectorAggregateExpression { aggregationBufferSets, bufferIndex, j); - myagg.sumValueWithNullCheck(values[i], this.sumScale); + myagg.sumValueWithNullCheck(values[i]); } } } @@ -360,7 +339,7 @@ public class VectorUDAFAvgDecimal extends VectorAggregateExpression { aggregationBufferSets, bufferIndex, i); - myagg.sumValueWithNullCheck(values[i], this.sumScale); + myagg.sumValueWithNullCheck(values[i]); } } } @@ -389,25 +368,12 @@ public class VectorUDAFAvgDecimal extends VectorAggregateExpression { if (inputVector.noNulls) { if (myagg.isNull) { myagg.isNull = false; - myagg.sum.set(HiveDecimal.ZERO); + myagg.sum.setFromLong(0L); myagg.count = 0; } HiveDecimal value = vector[0].getHiveDecimal(); - HiveDecimal multiple; - try { - multiple = value.multiply(HiveDecimal.create(batchSize)); - } catch (ArithmeticException e) { // catch on overflow - myagg.isOutOfRange = true; - return; - } - HiveDecimal result; - try { - result = myagg.sum.getHiveDecimal().add(multiple); - } catch (ArithmeticException e) { // catch on overflow - myagg.isOutOfRange = true; - return; - } - myagg.sum.set(result); + HiveDecimal multiple = value.multiply(HiveDecimal.create(batchSize)); + myagg.sum.mutateAdd(multiple); myagg.count += batchSize; } return; @@ -437,8 +403,7 @@ public class VectorUDAFAvgDecimal extends VectorAggregateExpression { for (int j=0; j< batchSize; ++j) { int i = selected[j]; if (!isNull[i]) { - HiveDecimalWritable value = vector[i]; - myagg.sumValueWithNullCheck(value, this.sumScale); + myagg.sumValueWithNullCheck(vector[i]); } } } @@ -451,13 +416,12 @@ public class VectorUDAFAvgDecimal extends VectorAggregateExpression { if (myagg.isNull) { myagg.isNull = false; - myagg.sum.set(HiveDecimal.ZERO); + myagg.sum.setFromLong(0L); myagg.count = 0; } for (int i=0; i< batchSize; ++i) { - HiveDecimalWritable value = vector[selected[i]]; - myagg.sumValueNoNullCheck(value, this.sumScale); + myagg.sumValueNoNullCheck(vector[selected[i]]); } } @@ -469,8 +433,7 @@ public class VectorUDAFAvgDecimal extends VectorAggregateExpression { for(int i=0;i