Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 75CA41062E for ; Thu, 5 Sep 2013 18:56:43 +0000 (UTC) Received: (qmail 83990 invoked by uid 500); 5 Sep 2013 18:56:40 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 83924 invoked by uid 500); 5 Sep 2013 18:56:40 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 83243 invoked by uid 99); 5 Sep 2013 18:56:31 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 05 Sep 2013 18:56:31 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 05 Sep 2013 18:56:27 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 6D84E2388AA7; Thu, 5 Sep 2013 18:56:07 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1520385 [2/5] - in /hive/branches/vectorization/ql/src: gen/vectorization/ gen/vectorization/ExpressionTemplates/ gen/vectorization/TestTemplates/ gen/vectorization/UDAFTemplates/ gen/vectorization/org/ gen/vectorization/org/apache/ gen/ve... Date: Thu, 05 Sep 2013 18:56:06 -0000 To: commits@hive.apache.org From: hashutosh@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20130905185607.6D84E2388AA7@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Added: hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarFilterVectorExpressionEvaluation.txt URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarFilterVectorExpressionEvaluation.txt?rev=1520385&view=auto ============================================================================== --- hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarFilterVectorExpressionEvaluation.txt (added) +++ hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarFilterVectorExpressionEvaluation.txt Thu Sep 5 18:56:04 2013 @@ -0,0 +1,78 @@ + + @Test + public void () { + + Random rand = new Random(SEED); + + inputColumnVector = + VectorizedRowGroupGenUtil.generate(, + , BATCH_SIZE, rand); + + VectorizedRowBatch rowBatch = new VectorizedRowBatch(1, BATCH_SIZE); + rowBatch.cols[0] = inputColumnVector; + + scalarValue = 0; + do { + scalarValue = rand.next(); + } while(scalarValue == 0); + + vectorExpression = + new (0, scalarValue); + + vectorExpression.evaluate(rowBatch); + + + int selectedIndex = 0; + int i=0; + //check for isRepeating optimization + if(inputColumnVector.isRepeating) { + //null vector is safe to check, as it is always initialized to match the data vector + selectedIndex = + !inputColumnVector.isNull[i] && + ? BATCH_SIZE : 0; + } else { + for(i = 0; i < BATCH_SIZE; i++) { + if(!inputColumnVector.isNull[i]) { + if( ) { + assertEquals( + "Vector index that passes filter " + + + "" + + + " is not in rowBatch selected index", + i, + rowBatch.selected[selectedIndex]); + selectedIndex++; + } + } + } + } + + assertEquals("Row batch size not set to number of selected rows: " + selectedIndex, + selectedIndex, rowBatch.size); + + if(selectedIndex > 0 && selectedIndex < BATCH_SIZE) { + assertEquals( + "selectedInUse should be set when > 0 and < entire batch(" + BATCH_SIZE + ") is selected: " + + selectedIndex, + true, rowBatch.selectedInUse); + } else if(selectedIndex == BATCH_SIZE) { + assertEquals( + "selectedInUse should not be set when entire batch(" + BATCH_SIZE + ") is selected: " + + selectedIndex, + false, rowBatch.selectedInUse); + } + } \ No newline at end of file Added: hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt?rev=1520385&view=auto ============================================================================== --- hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt (added) +++ hive/branches/vectorization/ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt Thu Sep 5 18:56:04 2013 @@ -0,0 +1,59 @@ + + @Test + public void () { + + Random rand = new Random(SEED); + + outputColumnVector = + VectorizedRowGroupGenUtil.generate(, + , BATCH_SIZE, rand); + + inputColumnVector = + VectorizedRowGroupGenUtil.generate(, + , BATCH_SIZE, rand); + + VectorizedRowBatch rowBatch = new VectorizedRowBatch(2, BATCH_SIZE); + rowBatch.cols[0] = inputColumnVector; + rowBatch.cols[1] = outputColumnVector; + + scalarValue = 0; + do { + scalarValue = rand.next(); + } while(scalarValue == 0); + + vectorExpression = + new (, 1); + + vectorExpression.evaluate(rowBatch); + + assertEquals( + "Output column vector is repeating state does not match operand column", + inputColumnVector.isRepeating, outputColumnVector.isRepeating); + + assertEquals( + "Output column vector no nulls state does not match operand column", + inputColumnVector.noNulls, outputColumnVector.noNulls); + + if(!outputColumnVector.noNulls && !outputColumnVector.isRepeating) { + for(int i = 0; i < BATCH_SIZE; i++) { + //null vectors are safe to check, as they are always initialized to match the data vector + assertEquals("Output vector doesn't match input vector's is null state for index", + inputColumnVector.isNull[i], outputColumnVector.isNull[i]); + } + } + } \ No newline at end of file Added: hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt?rev=1520385&view=auto ============================================================================== --- hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt (added) +++ hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt Thu Sep 5 18:56:04 2013 @@ -0,0 +1,474 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +/** + * Generated from template VectorUDAFAvg.txt. + */ +@Description(name = "avg", + value = "_FUNC_(expr) - Returns the average value of expr (vectorized, type: )") +public class extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** class for storing the current aggregate value. */ + static class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private double sum; + transient private long count; + transient private boolean isNull; + + public void sumValue( value) { + if (isNull) { + sum = value; + count = 1; + isNull = false; + } else { + sum += value; + count++; + } + } + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + } + + private VectorExpression inputExpression; + transient private Object[] partialResult; + transient private LongWritable resultCount; + transient private DoubleWritable resultSum; + transient private StructObjectInspector soi; + + public (VectorExpression inputExpression) { + this(); + this.inputExpression = inputExpression; + } + + public () { + super(); + partialResult = new Object[2]; + resultCount = new LongWritable(); + resultSum = new DoubleWritable(); + partialResult[0] = resultCount; + partialResult[1] = resultSum; + initPartialResultInspector(); + } + + private void initPartialResultInspector() { + List foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(bufferIndex); + return myagg; + } + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + VectorizedRowBatch batch) throws HiveException { + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + inputExpression.evaluate(batch); + + inputVector = ( )batch. + cols[this.inputExpression.getOutputColumn()]; + [] vector = inputVector.vector; + + if (inputVector.noNulls) { + if (inputVector.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + vector[0], batchSize); + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + vector, batch.selected, batchSize); + } else { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + vector, batchSize); + } + } + } else { + if (inputVector.isRepeating) { + if (batch.selectedInUse) { + iterateHasNullsRepeatingSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + vector[0], batchSize, batch.selected, inputVector.isNull); + } else { + iterateHasNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + vector[0], batchSize, inputVector.isNull); + } + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + vector, batchSize, batch.selected, inputVector.isNull); + } else { + iterateHasNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + vector, batchSize, inputVector.isNull); + } + } + } + } + + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + value, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(value); + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + [] values, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(values[selection[i]]); + } + } + + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + [] values, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(values[i]); + } + } + + private void iterateHasNullsRepeatingSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + value, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[selection[i]]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(value); + } + } + + } + + private void iterateHasNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + value, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(value); + } + } + } + + private void iterateHasNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + [] values, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int j=0; j < batchSize; ++j) { + int i = selection[j]; + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + j); + myagg.sumValue(values[i]); + } + } + } + + private void iterateHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + [] values, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(values[i]); + } + } + } + + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + inputVector = + ()batch.cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + [] vector = inputVector.vector; + + if (inputVector.isRepeating) { + if (inputVector.noNulls) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; + } + myagg.sum += vector[0]*batchSize; + myagg.count += batchSize; + } + return; + } + + if (!batch.selectedInUse && inputVector.noNulls) { + iterateNoSelectionNoNulls(myagg, vector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull); + } + else if (inputVector.noNulls){ + iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected); + } + } + + private void iterateSelectionHasNulls( + Aggregation myagg, + [] vector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + int i = selected[j]; + if (!isNull[i]) { + value = vector[i]; + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; + } + myagg.sum += value; + myagg.count += 1; + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + [] vector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; + } + + for (int i=0; i< batchSize; ++i) { + value = vector[selected[i]]; + myagg.sum += value; + myagg.count += 1; + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + [] vector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i value = vector[i]; + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; + } + myagg.sum += value; + myagg.count += 1; + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + [] vector, + int batchSize) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; + } + + for (int i=0;i value = vector[i]; + myagg.sum += value; + myagg.count += 1; + } + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new Aggregation(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + Aggregation myAgg = (Aggregation) agg; + myAgg.isNull = true; + } + + @Override + public Object evaluateOutput( + AggregationBuffer agg) throws HiveException { + Aggregation myagg = (Aggregation) agg; + if (myagg.isNull) { + return null; + } + else { + assert(0 < myagg.count); + resultCount.set (myagg.count); + resultSum.set (myagg.sum); + return partialResult; + } + } + + @Override + public ObjectInspector getOutputObjectInspector() { + return soi; + } + + @Override + public int getAggregationBufferFixedSize() { + JavaDataModel model = JavaDataModel.get(); + return JavaDataModel.alignUp( + model.object() + + model.primitive2() * 2, + model.memoryAlign()); + } + + @Override + public void init(AggregationDesc desc) throws HiveException { + // No-op + } + + public VectorExpression getInputExpression() { + return inputExpression; + } + + public void setInputExpression(VectorExpression inputExpression) { + this.inputExpression = inputExpression; + } +} + Added: hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt?rev=1520385&view=auto ============================================================================== --- hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt (added) +++ hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt Thu Sep 5 18:56:04 2013 @@ -0,0 +1,441 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + +/** +* . Vectorized implementation for MIN/MAX aggregates. +*/ +@Description(name = "", + value = "") +public class extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** + * class for storing the current aggregate value. + */ + static private final class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private value; + transient private boolean isNull; + + public void checkValue( value) { + if (isNull) { + isNull = false; + this.value = value; + } else if (value this.value) { + this.value = value; + } + } + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + } + + private VectorExpression inputExpression; + private transient VectorExpressionWriter resultWriter; + + public (VectorExpression inputExpression) { + this(); + this.inputExpression = inputExpression; + } + + public () { + super(); + } + + @Override + public void init(AggregationDesc desc) throws HiveException { + resultWriter = VectorExpressionWriterFactory.genVectorExpressionWritable( + desc.getParameters().get(0)); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregrateIndex); + return myagg; + } + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + VectorizedRowBatch batch) throws HiveException { + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + inputExpression.evaluate(batch); + + inputVector = ()batch. + cols[this.inputExpression.getOutputColumn()]; + [] vector = inputVector.vector; + + if (inputVector.noNulls) { + if (inputVector.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + vector[0], batchSize); + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + vector, batch.selected, batchSize); + } else { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + vector, batchSize); + } + } + } else { + if (inputVector.isRepeating) { + if (batch.selectedInUse) { + iterateHasNullsRepeatingSelectionWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + vector[0], batchSize, batch.selected, inputVector.isNull); + } else { + iterateHasNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + vector[0], batchSize, inputVector.isNull); + } + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + vector, batchSize, batch.selected, inputVector.isNull); + } else { + iterateHasNullsWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + vector, batchSize, inputVector.isNull); + } + } + } + } + + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + value, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(value); + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + [] values, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(values[selection[i]]); + } + } + + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + [] values, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(values[i]); + } + } + + private void iterateHasNullsRepeatingSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + value, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[selection[i]]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(value); + } + } + + } + + private void iterateHasNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + value, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(value); + } + } + } + + private void iterateHasNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + [] values, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int j=0; j < batchSize; ++j) { + int i = selection[j]; + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + j); + myagg.checkValue(values[i]); + } + } + } + + private void iterateHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + [] values, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(values[i]); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + inputVector = ()batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + [] vector = inputVector.vector; + + if (inputVector.isRepeating) { + if (inputVector.noNulls && + (myagg.isNull || (vector[0] myagg.value))) { + myagg.isNull = false; + myagg.value = vector[0]; + } + return; + } + + if (!batch.selectedInUse && inputVector.noNulls) { + iterateNoSelectionNoNulls(myagg, vector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull); + } + else if (inputVector.noNulls){ + iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected); + } + } + + private void iterateSelectionHasNulls( + Aggregation myagg, + [] vector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + int i = selected[j]; + if (!isNull[i]) { + value = vector[i]; + if (myagg.isNull) { + myagg.isNull = false; + myagg.value = value; + } + else if (value myagg.value) { + myagg.value = value; + } + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + [] vector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.value = vector[selected[0]]; + myagg.isNull = false; + } + + for (int i=0; i< batchSize; ++i) { + value = vector[selected[i]]; + if (value myagg.value) { + myagg.value = value; + } + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + [] vector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i value = vector[i]; + if (myagg.isNull) { + myagg.value = value; + myagg.isNull = false; + } + else if (value myagg.value) { + myagg.value = value; + } + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + [] vector, + int batchSize) { + if (myagg.isNull) { + myagg.value = vector[0]; + myagg.isNull = false; + } + + for (int i=0;i value = vector[i]; + if (value myagg.value) { + myagg.value = value; + } + } + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new Aggregation(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + Aggregation myAgg = (Aggregation) agg; + myAgg.isNull = true; + } + + @Override + public Object evaluateOutput( + AggregationBuffer agg) throws HiveException { + Aggregation myagg = (Aggregation) agg; + if (myagg.isNull) { + return null; + } + else { + return resultWriter.writeValue(myagg.value); + } + } + + @Override + public ObjectInspector getOutputObjectInspector() { + return resultWriter.getObjectInspector(); + } + + @Override + public int getAggregationBufferFixedSize() { + JavaDataModel model = JavaDataModel.get(); + return JavaDataModel.alignUp( + model.object() + + model.primitive2(), + model.memoryAlign()); + } + + public VectorExpression getInputExpression() { + return inputExpression; + } + + public void setInputExpression(VectorExpression inputExpression) { + this.inputExpression = inputExpression; + } +} + Added: hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt?rev=1520385&view=auto ============================================================================== --- hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt (added) +++ hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt Thu Sep 5 18:56:04 2013 @@ -0,0 +1,400 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen; + +import java.util.ArrayList; +import java.util.Arrays; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.Text; + +/** +* . Vectorized implementation for MIN/MAX aggregates. +*/ +@Description(name = "", + value = "") +public class extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** + * class for storing the current aggregate value. + */ + static private final class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private final static int MIN_BUFFER_SIZE = 16; + transient private byte[] bytes = new byte[MIN_BUFFER_SIZE]; + transient private int length; + transient private boolean isNull; + + public void checkValue(byte[] bytes, int start, int length) { + if (isNull) { + isNull = false; + assign(bytes, start, length); + } else if (StringExpr.compare( + bytes, start, length, + this.bytes, 0, this.length) 0) { + assign(bytes, start, length); + } + } + + public void assign(byte[] bytes, int start, int length) { + // Avoid new allocation if possible + if (this.bytes.length < length) { + this.bytes = new byte[length]; + } + System.arraycopy(bytes, start, this.bytes, 0, length); + this.length = length; + } + @Override + public int getVariableSize() { + JavaDataModel model = JavaDataModel.get(); + return model.lengthForByteArrayOfSize(bytes.length); + } + } + + private VectorExpression inputExpression; + transient private Text result; + + public (VectorExpression inputExpression) { + this(); + this.inputExpression = inputExpression; + } + + public () { + super(); + result = new Text(); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregrateIndex); + return myagg; + } + +@Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + VectorizedRowBatch batch) throws HiveException { + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + inputExpression.evaluate(batch); + + BytesColumnVector inputColumn = (BytesColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + if (inputColumn.noNulls) { + if (inputColumn.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColumn, batchSize); + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColumn, batch.selected, batchSize); + } else { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColumn, batchSize); + } + } + } else { + if (inputColumn.isRepeating) { + // All nulls, no-op for min/max + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColumn, batchSize, batch.selected); + } else { + iterateHasNullsWithAggregationSelection( + aggregationBufferSets, aggregrateIndex, + inputColumn, batchSize); + } + } + } + } + + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + BytesColumnVector inputColumn, + int batchSize) { + + byte[] bytes = inputColumn.vector[0]; + int start = inputColumn.start[0]; + int length = inputColumn.length[0]; + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(bytes, start, length); + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + BytesColumnVector inputColumn, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + int row = selection[i]; + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(inputColumn.vector[row], + inputColumn.start[row], + inputColumn.length[row]); + } + } + + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + BytesColumnVector inputColumn, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(inputColumn.vector[i], + inputColumn.start[i], + inputColumn.length[i]); + } + } + + private void iterateHasNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + BytesColumnVector inputColumn, + int batchSize, + int[] selection) { + + for (int i=0; i < batchSize; ++i) { + int row = selection[i]; + if (!inputColumn.isNull[row]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(inputColumn.vector[row], + inputColumn.start[row], + inputColumn.length[row]); + } + } + } + + private void iterateHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregrateIndex, + BytesColumnVector inputColumn, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + if (!inputColumn.isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregrateIndex, + i); + myagg.checkValue(inputColumn.vector[i], + inputColumn.start[i], + inputColumn.length[i]); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + BytesColumnVector inputColumn = (BytesColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputColumn.isRepeating) { + if (inputColumn.noNulls) { + myagg.checkValue(inputColumn.vector[0], + inputColumn.start[0], + inputColumn.length[0]); + } + return; + } + + if (!batch.selectedInUse && inputColumn.noNulls) { + iterateNoSelectionNoNulls(myagg, inputColumn, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, inputColumn, batchSize); + } + else if (inputColumn.noNulls){ + iterateSelectionNoNulls(myagg, inputColumn, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, inputColumn, batchSize, batch.selected); + } + } + + private void iterateSelectionHasNulls( + Aggregation myagg, + BytesColumnVector inputColumn, + int batchSize, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + int i = selected[j]; + if (!inputColumn.isNull[i]) { + myagg.checkValue(inputColumn.vector[i], + inputColumn.start[i], + inputColumn.length[i]); + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + BytesColumnVector inputColumn, + int batchSize, + int[] selected) { + + for (int i=0; i< batchSize; ++i) { + myagg.checkValue(inputColumn.vector[i], + inputColumn.start[i], + inputColumn.length[i]); + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + BytesColumnVector inputColumn, + int batchSize) { + + for (int i=0; i< batchSize; ++i) { + if (!inputColumn.isNull[i]) { + myagg.checkValue(inputColumn.vector[i], + inputColumn.start[i], + inputColumn.length[i]); + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + BytesColumnVector inputColumn, + int batchSize) { + for (int i=0; i< batchSize; ++i) { + myagg.checkValue(inputColumn.vector[i], + inputColumn.start[i], + inputColumn.length[i]); + } + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new Aggregation(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + Aggregation myAgg = (Aggregation) agg; + myAgg.isNull = true; + } + + @Override + public Object evaluateOutput( + AggregationBuffer agg) throws HiveException { + Aggregation myagg = (Aggregation) agg; + if (myagg.isNull) { + return null; + } + else { + result.set(myagg.bytes, 0, myagg.length); + return result; + } + } + + @Override + public ObjectInspector getOutputObjectInspector() { + return PrimitiveObjectInspectorFactory.writableStringObjectInspector; + } + + @Override + public int getAggregationBufferFixedSize() { + JavaDataModel model = JavaDataModel.get(); + return JavaDataModel.alignUp( + model.object() + + model.ref()+ + model.primitive1()*2, + model.memoryAlign()); + } + + @Override + public boolean hasVariableSize() { + return true; + } + + @Override + public void init(AggregationDesc desc) throws HiveException { + // No-op + } + + public VectorExpression getInputExpression() { + return inputExpression; + } + + public void setInputExpression(VectorExpression inputExpression) { + this.inputExpression = inputExpression; + } +} + Added: hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt?rev=1520385&view=auto ============================================================================== --- hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt (added) +++ hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt Thu Sep 5 18:56:04 2013 @@ -0,0 +1,436 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +/** +* . Vectorized implementation for SUM aggregates. +*/ +@Description(name = "sum", + value = "_FUNC_(expr) - Returns the sum value of expr (vectorized, type: )") +public class extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** + * class for storing the current aggregate value. + */ + private static final class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private sum; + transient private boolean isNull; + + public void sumValue( value) { + if (isNull) { + sum = value; + isNull = false; + } else { + sum += value; + } + } + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + } + + private VectorExpression inputExpression; + transient private final result; + + public (VectorExpression inputExpression) { + this(); + this.inputExpression = inputExpression; + } + + public () { + super(); + result = new (); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); + return myagg; + } + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + VectorizedRowBatch batch) throws HiveException { + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + inputExpression.evaluate(batch); + + inputVector = ()batch. + cols[this.inputExpression.getOutputColumn()]; + [] vector = inputVector.vector; + + if (inputVector.noNulls) { + if (inputVector.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + vector[0], batchSize); + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + vector, batch.selected, batchSize); + } else { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + vector, batchSize); + } + } + } else { + if (inputVector.isRepeating) { + if (batch.selectedInUse) { + iterateHasNullsRepeatingSelectionWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + vector[0], batchSize, batch.selected, inputVector.isNull); + } else { + iterateHasNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + vector[0], batchSize, inputVector.isNull); + } + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + vector, batchSize, batch.selected, inputVector.isNull); + } else { + iterateHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + vector, batchSize, inputVector.isNull); + } + } + } + } + + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + value, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + myagg.sumValue(value); + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + [] values, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + myagg.sumValue(values[selection[i]]); + } + } + + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + [] values, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + myagg.sumValue(values[i]); + } + } + + private void iterateHasNullsRepeatingSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + value, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[selection[i]]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + myagg.sumValue(value); + } + } + + } + + private void iterateHasNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + value, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + myagg.sumValue(value); + } + } + } + + private void iterateHasNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + [] values, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int j=0; j < batchSize; ++j) { + int i = selection[j]; + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + j); + myagg.sumValue(values[i]); + } + } + } + + private void iterateHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + [] values, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + myagg.sumValue(values[i]); + } + } + } + + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + inputVector = ()batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + [] vector = inputVector.vector; + + if (inputVector.isRepeating) { + if (inputVector.noNulls) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + } + myagg.sum += vector[0]*batchSize; + } + return; + } + + if (!batch.selectedInUse && inputVector.noNulls) { + iterateNoSelectionNoNulls(myagg, vector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull); + } + else if (inputVector.noNulls){ + iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected); + } + } + + private void iterateSelectionHasNulls( + Aggregation myagg, + [] vector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + int i = selected[j]; + if (!isNull[i]) { + value = vector[i]; + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + } + myagg.sum += value; + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + [] vector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.sum = 0; + myagg.isNull = false; + } + + for (int i=0; i< batchSize; ++i) { + value = vector[selected[i]]; + myagg.sum += value; + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + [] vector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i value = vector[i]; + if (myagg.isNull) { + myagg.sum = 0; + myagg.isNull = false; + } + myagg.sum += value; + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + [] vector, + int batchSize) { + if (myagg.isNull) { + myagg.sum = 0; + myagg.isNull = false; + } + + for (int i=0;i value = vector[i]; + myagg.sum += value; + } + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new Aggregation(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + Aggregation myAgg = (Aggregation) agg; + myAgg.isNull = true; + } + + @Override + public Object evaluateOutput(AggregationBuffer agg) throws HiveException { + Aggregation myagg = (Aggregation) agg; + if (myagg.isNull) { + return null; + } + else { + result.set(myagg.sum); + return result; + } + } + + @Override + public ObjectInspector getOutputObjectInspector() { + return ; + } + + @Override + public int getAggregationBufferFixedSize() { + JavaDataModel model = JavaDataModel.get(); + return JavaDataModel.alignUp( + model.object(), + model.memoryAlign()); + } + + @Override + public void init(AggregationDesc desc) throws HiveException { + // No-op + } + + public VectorExpression getInputExpression() { + return inputExpression; + } + + public void setInputExpression(VectorExpression inputExpression) { + this.inputExpression = inputExpression; + } +} + Added: hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt?rev=1520385&view=auto ============================================================================== --- hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt (added) +++ hive/branches/vectorization/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt Thu Sep 5 18:56:04 2013 @@ -0,0 +1,520 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +/** +* . Vectorized implementation for VARIANCE aggregates. +*/ +@Description(name = "", + value = "") +public class extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** + /* class for storing the current aggregate value. + */ + private static final class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private double sum; + transient private long count; + transient private double variance; + transient private boolean isNull; + + public void init() { + isNull = false; + sum = 0; + count = 0; + variance = 0; + } + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + } + + private VectorExpression inputExpression; + transient private LongWritable resultCount; + transient private DoubleWritable resultSum; + transient private DoubleWritable resultVariance; + transient private Object[] partialResult; + + transient private ObjectInspector soi; + + + public (VectorExpression inputExpression) { + this(); + this.inputExpression = inputExpression; + } + + public () { + super(); + partialResult = new Object[3]; + resultCount = new LongWritable(); + resultSum = new DoubleWritable(); + resultVariance = new DoubleWritable(); + partialResult[0] = resultCount; + partialResult[1] = resultSum; + partialResult[2] = resultVariance; + initPartialResultInspector(); + } + + private void initPartialResultInspector() { + List foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("variance"); + + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); + return myagg; + } + + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + VectorizedRowBatch batch) throws HiveException { + + inputExpression.evaluate(batch); + + inputVector = ()batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + [] vector = inputVector.vector; + + if (inputVector.isRepeating) { + if (inputVector.noNulls || !inputVector.isNull[0]) { + iterateRepeatingNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, vector[0], batchSize); + } + } + else if (!batch.selectedInUse && inputVector.noNulls) { + iterateNoSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, vector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, vector, batchSize, inputVector.isNull); + } + else if (inputVector.noNulls){ + iterateSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, vector, batchSize, batch.selected); + } + else { + iterateSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, vector, batchSize, + inputVector.isNull, batch.selected); + } + + } + + private void iterateRepeatingNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + double value, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + [] vector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + j); + int i = selected[j]; + if (!isNull[i]) { + double value = vector[i]; + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + [] vector, + int batchSize, + int[] selected) { + + for (int i=0; i< batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + aggregateIndex, + i); + double value = vector[selected[i]]; + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + private void iterateNoSelectionHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + [] vector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + [] vector, + int batchSize) { + + for (int i=0; i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + inputVector = ()batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + [] vector = inputVector.vector; + + if (inputVector.isRepeating) { + if (inputVector.noNulls) { + iterateRepeatingNoNulls(myagg, vector[0], batchSize); + } + } + else if (!batch.selectedInUse && inputVector.noNulls) { + iterateNoSelectionNoNulls(myagg, vector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull); + } + else if (inputVector.noNulls){ + iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected); + } + } + + private void iterateRepeatingNoNulls( + Aggregation myagg, + double value, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + // TODO: conjure a formula w/o iterating + // + + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // We pulled out i=0 so we can remove the count > 1 check in the loop + for (int i=1; i[] vector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + int i = selected[j]; + if (!isNull[i]) { + double value = vector[i]; + if (myagg.isNull) { + myagg.init (); + } + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + [] vector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = vector[selected[0]]; + myagg.sum += value; + myagg.count += 1; + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove the count > 1 check in the loop + // + for (int i=1; i< batchSize; ++i) { + value = vector[selected[i]]; + myagg.sum += value; + myagg.count += 1; + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + [] vector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + [] vector, + int batchSize) { + + if (myagg.isNull) { + myagg.init (); + } + + double value = vector[0]; + myagg.sum += value; + myagg.count += 1; + + if(myagg.count > 1) { + double t = myagg.count*value - myagg.sum; + myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + } + + // i=0 was pulled out to remove count > 1 check + for (int i=1; i