Return-Path: X-Original-To: apmail-asterixdb-commits-archive@minotaur.apache.org Delivered-To: apmail-asterixdb-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 0115117BB7 for ; Fri, 24 Apr 2015 18:46:22 +0000 (UTC) Received: (qmail 21799 invoked by uid 500); 24 Apr 2015 18:46:21 -0000 Delivered-To: apmail-asterixdb-commits-archive@asterixdb.apache.org Received: (qmail 21767 invoked by uid 500); 24 Apr 2015 18:46:21 -0000 Mailing-List: contact commits-help@asterixdb.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@asterixdb.incubator.apache.org Delivered-To: mailing list commits@asterixdb.incubator.apache.org Received: (qmail 21758 invoked by uid 99); 24 Apr 2015 18:46:21 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 24 Apr 2015 18:46:21 +0000 X-ASF-Spam-Status: No, hits=-0.0 required=5.0 tests=SPF_PASS X-Spam-Check-By: apache.org Received-SPF: pass (nike.apache.org: local policy) Received: from [54.76.25.247] (HELO mx1-eu-west.apache.org) (54.76.25.247) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 24 Apr 2015 18:45:33 +0000 Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-eu-west.apache.org (ASF Mail Server at mx1-eu-west.apache.org) with SMTP id 8C492253FF for ; Fri, 24 Apr 2015 18:45:30 +0000 (UTC) Received: (qmail 19043 invoked by uid 99); 24 Apr 2015 18:45:29 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 24 Apr 2015 18:45:29 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id B4429E0F7D; Fri, 24 Apr 2015 18:45:29 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: imaxon@apache.org To: commits@asterixdb.incubator.apache.org Date: Fri, 24 Apr 2015 18:46:11 -0000 Message-Id: In-Reply-To: <0a743a5c99b842bf8de294f7df7a3d2b@git.apache.org> References: <0a743a5c99b842bf8de294f7df7a3d2b@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [44/85] [abbrv] [partial] incubator-asterixdb-hyracks git commit: Move Pregelix and Hivesterix codebase to new repositories: 1. Move Pregelix codebase to https://github.com/pregelix/pregelix; 2. Move Hivesterix codebase to https://code.google.com/p/hives X-Virus-Checked: Checked by ClamAV on apache.org http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/ffc967fd/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java ---------------------------------------------------------------------- diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java deleted file mode 100644 index 8db5c15..0000000 --- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java +++ /dev/null @@ -1,406 +0,0 @@ -/* - * Copyright 2009-2013 by The Regents of the University of California - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * you may obtain a copy of the License from - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.udf.generic; - -import java.io.DataOutput; -import java.io.IOException; -import java.util.ArrayList; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.io.LongWritable; - -import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil; -import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer; - -/** - * Compute the Pearson correlation coefficient corr(x, y), using the following - * stable one-pass method, based on: "Formulas for Robust, One-Pass Parallel - * Computation of Covariances and Arbitrary-Order Statistical Moments", Philippe - * Pebay, Sandia Labs and - * "The Art of Computer Programming, volume 2: Seminumerical Algorithms", Donald - * Knuth. - * Incremental: n : mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : my_n = - * my_(n-1) + [y_n - my_(n-1)]/n : c_n = c_(n-1) + (x_n - mx_(n-1))*(y_n - * - my_n) : vx_n = vx_(n-1) + (x_n - mx_n)(x_n - mx_(n-1)): - * vy_n = vy_(n-1) + (y_n - my_n)(y_n - my_(n-1)): - * Merge: c_(A,B) = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B) - * vx_(A,B) = vx_A + vx_B + (mx_A - mx_B)*(mx_A - mx_B)*n_A*n_B/(n_A+n_B) - * vy_(A,B) = vy_A + vy_B + (my_A - my_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B) - */ -@Description(name = "corr", value = "_FUNC_(x,y) - Returns the Pearson coefficient of correlation\n" - + "between a set of number pairs", extended = "The function takes as arguments any pair of numeric types and returns a double.\n" - + "Any pair with a NULL is ignored. If the function is applied to an empty set or\n" - + "a singleton set, NULL will be returned. Otherwise, it computes the following:\n" - + " COVAR_POP(x,y)/(STDDEV_POP(x)*STDDEV_POP(y))\n" - + "where neither x nor y is null,\n" - + "COVAR_POP is the population covariance,\n" + "and STDDEV_POP is the population standard deviation.") -public class GenericUDAFCorrelation extends AbstractGenericUDAFResolver { - - static final Log LOG = LogFactory.getLog(GenericUDAFCorrelation.class.getName()); - - @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { - if (parameters.length != 2) { - throw new UDFArgumentTypeException(parameters.length - 1, "Exactly two arguments are expected."); - } - - if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { - throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); - } - - if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) { - throw new UDFArgumentTypeException(1, "Only primitive type arguments are accepted but " - + parameters[1].getTypeName() + " is passed."); - } - - switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - case LONG: - case FLOAT: - case DOUBLE: - switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - case LONG: - case FLOAT: - case DOUBLE: - return new GenericUDAFCorrelationEvaluator(); - case STRING: - case BOOLEAN: - default: - throw new UDFArgumentTypeException(1, "Only numeric type arguments are accepted but " - + parameters[1].getTypeName() + " is passed."); - } - case STRING: - case BOOLEAN: - default: - throw new UDFArgumentTypeException(0, "Only numeric type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); - } - } - - /** - * Evaluate the Pearson correlation coefficient using a stable one-pass - * algorithm, based on work by Philippe Pébay and Donald Knuth. - * Incremental: n : mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : - * my_n = my_(n-1) + [y_n - my_(n-1)]/n : c_n = c_(n-1) + (x_n - - * mx_(n-1))*(y_n - my_n) : vx_n = vx_(n-1) + (x_n - - * mx_n)(x_n - mx_(n-1)): vy_n = vy_(n-1) + (y_n - my_n)(y_n - * - my_(n-1)): - * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X vx_(A,B) - * = vx_A + vx_B + (mx_A - mx_B)*(mx_A - mx_B)*n_A*n_B/(n_A+n_B) vy_(A,B) = - * vy_A + vy_B + (my_A - my_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B) - */ - public static class GenericUDAFCorrelationEvaluator extends GenericUDAFEvaluator { - - // For PARTIAL1 and COMPLETE - private PrimitiveObjectInspector xInputOI; - private PrimitiveObjectInspector yInputOI; - - // For PARTIAL2 and FINAL - private StructObjectInspector soi; - private StructField countField; - private StructField xavgField; - private StructField yavgField; - private StructField xvarField; - private StructField yvarField; - private StructField covarField; - private LongObjectInspector countFieldOI; - private DoubleObjectInspector xavgFieldOI; - private DoubleObjectInspector yavgFieldOI; - private DoubleObjectInspector xvarFieldOI; - private DoubleObjectInspector yvarFieldOI; - private DoubleObjectInspector covarFieldOI; - - // For PARTIAL1 and PARTIAL2 - private Object[] partialResult; - - // For FINAL and COMPLETE - private DoubleWritable result; - - @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { - super.init(m, parameters); - - // init input - if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { - assert (parameters.length == 2); - xInputOI = (PrimitiveObjectInspector) parameters[0]; - yInputOI = (PrimitiveObjectInspector) parameters[1]; - } else { - assert (parameters.length == 1); - soi = (StructObjectInspector) parameters[0]; - - countField = soi.getStructFieldRef("count"); - xavgField = soi.getStructFieldRef("xavg"); - yavgField = soi.getStructFieldRef("yavg"); - xvarField = soi.getStructFieldRef("xvar"); - yvarField = soi.getStructFieldRef("yvar"); - covarField = soi.getStructFieldRef("covar"); - - countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector(); - xavgFieldOI = (DoubleObjectInspector) xavgField.getFieldObjectInspector(); - yavgFieldOI = (DoubleObjectInspector) yavgField.getFieldObjectInspector(); - xvarFieldOI = (DoubleObjectInspector) xvarField.getFieldObjectInspector(); - yvarFieldOI = (DoubleObjectInspector) yvarField.getFieldObjectInspector(); - covarFieldOI = (DoubleObjectInspector) covarField.getFieldObjectInspector(); - } - - // init output - if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { - // The output of a partial aggregation is a struct containing - // a long count, two double averages, two double variances, - // and a double covariance. - - ArrayList foi = new ArrayList(); - - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - - ArrayList fname = new ArrayList(); - fname.add("count"); - fname.add("xavg"); - fname.add("yavg"); - fname.add("xvar"); - fname.add("yvar"); - fname.add("covar"); - - partialResult = new Object[6]; - partialResult[0] = new LongWritable(0); - partialResult[1] = new DoubleWritable(0); - partialResult[2] = new DoubleWritable(0); - partialResult[3] = new DoubleWritable(0); - partialResult[4] = new DoubleWritable(0); - partialResult[5] = new DoubleWritable(0); - - return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); - - } else { - setResult(new DoubleWritable(0)); - return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; - } - } - - static class StdAgg implements SerializableBuffer { - long count; // number n of elements - double xavg; // average of x elements - double yavg; // average of y elements - double xvar; // n times the variance of x elements - double yvar; // n times the variance of y elements - double covar; // n times the covariance - - @Override - public void deSerializeAggBuffer(byte[] data, int start, int len) { - count = BufferSerDeUtil.getLong(data, start); - start += 8; - xavg = BufferSerDeUtil.getDouble(data, start); - start += 8; - yavg = BufferSerDeUtil.getDouble(data, start); - start += 8; - xvar = BufferSerDeUtil.getDouble(data, start); - start += 8; - yvar = BufferSerDeUtil.getDouble(data, start); - start += 8; - covar = BufferSerDeUtil.getDouble(data, start); - } - - @Override - public void serializeAggBuffer(byte[] data, int start, int len) { - BufferSerDeUtil.writeLong(count, data, start); - start += 8; - BufferSerDeUtil.writeDouble(xavg, data, start); - start += 8; - BufferSerDeUtil.writeDouble(yavg, data, start); - start += 8; - BufferSerDeUtil.writeDouble(xvar, data, start); - start += 8; - BufferSerDeUtil.writeDouble(yvar, data, start); - start += 8; - BufferSerDeUtil.writeDouble(covar, data, start); - } - - @Override - public void serializeAggBuffer(DataOutput output) throws IOException { - output.writeLong(count); - output.writeDouble(xavg); - output.writeDouble(yavg); - output.writeDouble(xvar); - output.writeDouble(yvar); - output.writeDouble(covar); - } - }; - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException { - StdAgg result = new StdAgg(); - reset(result); - return result; - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg) agg; - myagg.count = 0; - myagg.xavg = 0; - myagg.yavg = 0; - myagg.xvar = 0; - myagg.yvar = 0; - myagg.covar = 0; - } - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - assert (parameters.length == 2); - Object px = parameters[0]; - Object py = parameters[1]; - if (px != null && py != null) { - StdAgg myagg = (StdAgg) agg; - double vx = PrimitiveObjectInspectorUtils.getDouble(px, xInputOI); - double vy = PrimitiveObjectInspectorUtils.getDouble(py, yInputOI); - double xavgOld = myagg.xavg; - double yavgOld = myagg.yavg; - myagg.count++; - myagg.xavg += (vx - xavgOld) / myagg.count; - myagg.yavg += (vy - yavgOld) / myagg.count; - if (myagg.count > 1) { - myagg.covar += (vx - xavgOld) * (vy - myagg.yavg); - myagg.xvar += (vx - xavgOld) * (vx - myagg.xavg); - myagg.yvar += (vy - yavgOld) * (vy - myagg.yavg); - } - } - } - - @Override - public Object terminatePartial(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg) agg; - ((LongWritable) partialResult[0]).set(myagg.count); - ((DoubleWritable) partialResult[1]).set(myagg.xavg); - ((DoubleWritable) partialResult[2]).set(myagg.yavg); - ((DoubleWritable) partialResult[3]).set(myagg.xvar); - ((DoubleWritable) partialResult[4]).set(myagg.yvar); - ((DoubleWritable) partialResult[5]).set(myagg.covar); - return partialResult; - } - - @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException { - if (partial != null) { - StdAgg myagg = (StdAgg) agg; - - Object partialCount = soi.getStructFieldData(partial, countField); - Object partialXAvg = soi.getStructFieldData(partial, xavgField); - Object partialYAvg = soi.getStructFieldData(partial, yavgField); - Object partialXVar = soi.getStructFieldData(partial, xvarField); - Object partialYVar = soi.getStructFieldData(partial, yvarField); - Object partialCovar = soi.getStructFieldData(partial, covarField); - - long nA = myagg.count; - long nB = countFieldOI.get(partialCount); - - if (nA == 0) { - // Just copy the information since there is nothing so far - myagg.count = countFieldOI.get(partialCount); - myagg.xavg = xavgFieldOI.get(partialXAvg); - myagg.yavg = yavgFieldOI.get(partialYAvg); - myagg.xvar = xvarFieldOI.get(partialXVar); - myagg.yvar = yvarFieldOI.get(partialYVar); - myagg.covar = covarFieldOI.get(partialCovar); - } - - if (nA != 0 && nB != 0) { - // Merge the two partials - double xavgA = myagg.xavg; - double yavgA = myagg.yavg; - double xavgB = xavgFieldOI.get(partialXAvg); - double yavgB = yavgFieldOI.get(partialYAvg); - double xvarB = xvarFieldOI.get(partialXVar); - double yvarB = yvarFieldOI.get(partialYVar); - double covarB = covarFieldOI.get(partialCovar); - - myagg.count += nB; - myagg.xavg = (xavgA * nA + xavgB * nB) / myagg.count; - myagg.yavg = (yavgA * nA + yavgB * nB) / myagg.count; - myagg.xvar += xvarB + (xavgA - xavgB) * (xavgA - xavgB) * myagg.count; - myagg.yvar += yvarB + (yavgA - yavgB) * (yavgA - yavgB) * myagg.count; - myagg.covar += covarB + (xavgA - xavgB) * (yavgA - yavgB) * ((double) (nA * nB) / myagg.count); - } - } - } - - @Override - public Object terminate(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg) agg; - - if (myagg.count < 2) { // SQL standard - return null for zero or one - // pair - return null; - } else { - getResult().set(myagg.covar / java.lang.Math.sqrt(myagg.xvar) / java.lang.Math.sqrt(myagg.yvar)); - return getResult(); - } - } - - public void setResult(DoubleWritable result) { - this.result = result; - } - - public DoubleWritable getResult() { - return result; - } - } - -} http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/ffc967fd/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java ---------------------------------------------------------------------- diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java deleted file mode 100644 index 1b96259..0000000 --- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright 2009-2013 by The Regents of the University of California - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * you may obtain a copy of the License from - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.udf.generic; - -import java.io.DataOutput; -import java.io.IOException; - -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.io.LongWritable; - -import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil; -import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer; - -/** - * This class implements the COUNT aggregation function as in SQL. - */ -@Description(name = "count", value = "_FUNC_(*) - Returns the total number of retrieved rows, including " - + "rows containing NULL values.\n" - - + "_FUNC_(expr) - Returns the number of rows for which the supplied " + "expression is non-NULL.\n" - - + "_FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for " - + "which the supplied expression(s) are unique and non-NULL.") -public class GenericUDAFCount implements GenericUDAFResolver2 { - - @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { - // This method implementation is preserved for backward compatibility. - return new GenericUDAFCountEvaluator(); - } - - @Override - public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo paramInfo) throws SemanticException { - - @SuppressWarnings("deprecation") - TypeInfo[] parameters = paramInfo.getParameters(); - - if (parameters.length == 0) { - if (!paramInfo.isAllColumns()) { - throw new UDFArgumentException("Argument expected"); - } - assert !paramInfo.isDistinct() : "DISTINCT not supported with *"; - } else { - if (parameters.length > 1 && !paramInfo.isDistinct()) { - throw new UDFArgumentException("DISTINCT keyword must be specified"); - } - assert !paramInfo.isAllColumns() : "* not supported in expression list"; - } - - return new GenericUDAFCountEvaluator().setCountAllColumns(paramInfo.isAllColumns()); - } - - /** - * GenericUDAFCountEvaluator. - */ - public static class GenericUDAFCountEvaluator extends GenericUDAFEvaluator { - private boolean countAllColumns = false; - private LongObjectInspector partialCountAggOI; - private LongWritable result; - - @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { - super.init(m, parameters); - partialCountAggOI = PrimitiveObjectInspectorFactory.writableLongObjectInspector; - result = new LongWritable(0); - return PrimitiveObjectInspectorFactory.writableLongObjectInspector; - } - - private GenericUDAFCountEvaluator setCountAllColumns(boolean countAllCols) { - countAllColumns = countAllCols; - return this; - } - - /** class for storing count value. */ - static class CountAgg implements SerializableBuffer { - long value; - - @Override - public void deSerializeAggBuffer(byte[] data, int start, int len) { - value = BufferSerDeUtil.getLong(data, start); - } - - @Override - public void serializeAggBuffer(byte[] data, int start, int len) { - BufferSerDeUtil.writeLong(value, data, start); - } - - @Override - public void serializeAggBuffer(DataOutput output) throws IOException { - output.writeLong(value); - } - } - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException { - CountAgg buffer = new CountAgg(); - reset(buffer); - return buffer; - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException { - ((CountAgg) agg).value = 0; - } - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - // parameters == null means the input table/split is empty - if (parameters == null) { - return; - } - if (countAllColumns) { - assert parameters.length == 0; - ((CountAgg) agg).value++; - } else { - assert parameters.length > 0; - boolean countThisRow = true; - for (Object nextParam : parameters) { - if (nextParam == null) { - countThisRow = false; - break; - } - } - if (countThisRow) { - ((CountAgg) agg).value++; - } - } - } - - @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException { - if (partial != null) { - long p = partialCountAggOI.get(partial); - ((CountAgg) agg).value += p; - } - } - - @Override - public Object terminate(AggregationBuffer agg) throws HiveException { - result.set(((CountAgg) agg).value); - return result; - } - - @Override - public Object terminatePartial(AggregationBuffer agg) throws HiveException { - return terminate(agg); - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/ffc967fd/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java ---------------------------------------------------------------------- diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java deleted file mode 100644 index 0e98e11..0000000 --- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java +++ /dev/null @@ -1,355 +0,0 @@ -/* - * Copyright 2009-2013 by The Regents of the University of California - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * you may obtain a copy of the License from - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.udf.generic; - -import java.io.DataOutput; -import java.io.IOException; -import java.util.ArrayList; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.io.LongWritable; - -import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil; -import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer; - -/** - * Compute the covariance covar_pop(x, y), using the following one-pass method - * (ref. "Formulas for Robust, One-Pass Parallel Computation of Covariances and - * Arbitrary-Order Statistical Moments", Philippe Pebay, Sandia Labs): - * Incremental: n : mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : my_n = - * my_(n-1) + [y_n - my_(n-1)]/n : c_n = c_(n-1) + (x_n - mx_(n-1))*(y_n - * - my_n) : - * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X - */ -@Description(name = "covariance,covar_pop", value = "_FUNC_(x,y) - Returns the population covariance of a set of number pairs", extended = "The function takes as arguments any pair of numeric types and returns a double.\n" - + "Any pair with a NULL is ignored. If the function is applied to an empty set, NULL\n" - + "will be returned. Otherwise, it computes the following:\n" - + " (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/COUNT(x,y)\n" + "where neither x nor y is null.") -public class GenericUDAFCovariance extends AbstractGenericUDAFResolver { - - static final Log LOG = LogFactory.getLog(GenericUDAFCovariance.class.getName()); - - @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { - if (parameters.length != 2) { - throw new UDFArgumentTypeException(parameters.length - 1, "Exactly two arguments are expected."); - } - - if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { - throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); - } - - if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) { - throw new UDFArgumentTypeException(1, "Only primitive type arguments are accepted but " - + parameters[1].getTypeName() + " is passed."); - } - - switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - case LONG: - case FLOAT: - case DOUBLE: - switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - case LONG: - case FLOAT: - case DOUBLE: - return new GenericUDAFCovarianceEvaluator(); - case STRING: - case BOOLEAN: - default: - throw new UDFArgumentTypeException(1, "Only numeric or string type arguments are accepted but " - + parameters[1].getTypeName() + " is passed."); - } - case STRING: - case BOOLEAN: - default: - throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); - } - } - - /** - * Evaluate the variance using the algorithm described in - * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance, - * presumably by Pébay, Philippe (2008), in "Formulas for Robust, One-Pass - * Parallel Computation of Covariances and Arbitrary-Order Statistical - * Moments", Technical Report SAND2008-6212, Sandia National Laboratories, - * http://infoserve.sandia.gov/sand_doc/2008/086212.pdf - * Incremental: n : mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : - * my_n = my_(n-1) + [y_n - my_(n-1)]/n : c_n = c_(n-1) + (x_n - - * mx_(n-1))*(y_n - my_n) : - * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X - * This one-pass algorithm is stable. - */ - public static class GenericUDAFCovarianceEvaluator extends GenericUDAFEvaluator { - - // For PARTIAL1 and COMPLETE - private PrimitiveObjectInspector xInputOI; - private PrimitiveObjectInspector yInputOI; - - // For PARTIAL2 and FINAL - private StructObjectInspector soi; - private StructField countField; - private StructField xavgField; - private StructField yavgField; - private StructField covarField; - private LongObjectInspector countFieldOI; - private DoubleObjectInspector xavgFieldOI; - private DoubleObjectInspector yavgFieldOI; - private DoubleObjectInspector covarFieldOI; - - // For PARTIAL1 and PARTIAL2 - private Object[] partialResult; - - // For FINAL and COMPLETE - private DoubleWritable result; - - @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { - super.init(m, parameters); - - // init input - if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { - assert (parameters.length == 2); - xInputOI = (PrimitiveObjectInspector) parameters[0]; - yInputOI = (PrimitiveObjectInspector) parameters[1]; - } else { - assert (parameters.length == 1); - soi = (StructObjectInspector) parameters[0]; - - countField = soi.getStructFieldRef("count"); - xavgField = soi.getStructFieldRef("xavg"); - yavgField = soi.getStructFieldRef("yavg"); - covarField = soi.getStructFieldRef("covar"); - - countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector(); - xavgFieldOI = (DoubleObjectInspector) xavgField.getFieldObjectInspector(); - yavgFieldOI = (DoubleObjectInspector) yavgField.getFieldObjectInspector(); - covarFieldOI = (DoubleObjectInspector) covarField.getFieldObjectInspector(); - } - - // init output - if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { - // The output of a partial aggregation is a struct containing - // a long count, two double averages, and a double covariance. - - ArrayList foi = new ArrayList(); - - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - - ArrayList fname = new ArrayList(); - fname.add("count"); - fname.add("xavg"); - fname.add("yavg"); - fname.add("covar"); - - partialResult = new Object[4]; - partialResult[0] = new LongWritable(0); - partialResult[1] = new DoubleWritable(0); - partialResult[2] = new DoubleWritable(0); - partialResult[3] = new DoubleWritable(0); - - return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); - - } else { - setResult(new DoubleWritable(0)); - return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; - } - } - - static class StdAgg implements SerializableBuffer { - long count; // number n of elements - double xavg; // average of x elements - double yavg; // average of y elements - double covar; // n times the covariance - - @Override - public void deSerializeAggBuffer(byte[] data, int start, int len) { - count = BufferSerDeUtil.getLong(data, start); - start += 8; - xavg = BufferSerDeUtil.getDouble(data, start); - start += 8; - yavg = BufferSerDeUtil.getDouble(data, start); - start += 8; - covar = BufferSerDeUtil.getDouble(data, start); - } - - @Override - public void serializeAggBuffer(byte[] data, int start, int len) { - BufferSerDeUtil.writeLong(count, data, start); - start += 8; - BufferSerDeUtil.writeDouble(xavg, data, start); - start += 8; - BufferSerDeUtil.writeDouble(yavg, data, start); - start += 8; - BufferSerDeUtil.writeDouble(covar, data, start); - } - - @Override - public void serializeAggBuffer(DataOutput output) throws IOException { - output.writeLong(count); - output.writeDouble(xavg); - output.writeDouble(yavg); - output.writeDouble(covar); - } - }; - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException { - StdAgg result = new StdAgg(); - reset(result); - return result; - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg) agg; - myagg.count = 0; - myagg.xavg = 0; - myagg.yavg = 0; - myagg.covar = 0; - } - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - assert (parameters.length == 2); - Object px = parameters[0]; - Object py = parameters[1]; - if (px != null && py != null) { - StdAgg myagg = (StdAgg) agg; - double vx = PrimitiveObjectInspectorUtils.getDouble(px, xInputOI); - double vy = PrimitiveObjectInspectorUtils.getDouble(py, yInputOI); - myagg.count++; - myagg.yavg = myagg.yavg + (vy - myagg.yavg) / myagg.count; - if (myagg.count > 1) { - myagg.covar += (vx - myagg.xavg) * (vy - myagg.yavg); - } - myagg.xavg = myagg.xavg + (vx - myagg.xavg) / myagg.count; - } - } - - @Override - public Object terminatePartial(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg) agg; - ((LongWritable) partialResult[0]).set(myagg.count); - ((DoubleWritable) partialResult[1]).set(myagg.xavg); - ((DoubleWritable) partialResult[2]).set(myagg.yavg); - ((DoubleWritable) partialResult[3]).set(myagg.covar); - return partialResult; - } - - @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException { - if (partial != null) { - StdAgg myagg = (StdAgg) agg; - - Object partialCount = soi.getStructFieldData(partial, countField); - Object partialXAvg = soi.getStructFieldData(partial, xavgField); - Object partialYAvg = soi.getStructFieldData(partial, yavgField); - Object partialCovar = soi.getStructFieldData(partial, covarField); - - long nA = myagg.count; - long nB = countFieldOI.get(partialCount); - - if (nA == 0) { - // Just copy the information since there is nothing so far - myagg.count = countFieldOI.get(partialCount); - myagg.xavg = xavgFieldOI.get(partialXAvg); - myagg.yavg = yavgFieldOI.get(partialYAvg); - myagg.covar = covarFieldOI.get(partialCovar); - } - - if (nA != 0 && nB != 0) { - // Merge the two partials - double xavgA = myagg.xavg; - double yavgA = myagg.yavg; - double xavgB = xavgFieldOI.get(partialXAvg); - double yavgB = yavgFieldOI.get(partialYAvg); - double covarB = covarFieldOI.get(partialCovar); - - myagg.count += nB; - myagg.xavg = (xavgA * nA + xavgB * nB) / myagg.count; - myagg.yavg = (yavgA * nA + yavgB * nB) / myagg.count; - myagg.covar += covarB + (xavgA - xavgB) * (yavgA - yavgB) * ((double) (nA * nB) / myagg.count); - } - } - } - - @Override - public Object terminate(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg) agg; - - if (myagg.count == 0) { // SQL standard - return null for zero - // elements - return null; - } else { - getResult().set(myagg.covar / (myagg.count)); - return getResult(); - } - } - - public void setResult(DoubleWritable result) { - this.result = result; - } - - public DoubleWritable getResult() { - return result; - } - } - -} http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/ffc967fd/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java ---------------------------------------------------------------------- diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java deleted file mode 100644 index 61a32e5..0000000 --- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java +++ /dev/null @@ -1,286 +0,0 @@ -/* - * Copyright 2009-2013 by The Regents of the University of California - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * you may obtain a copy of the License from - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.udf.generic; - -import java.io.DataOutput; -import java.io.IOException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.util.StringUtils; - -import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil; -import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer; - -/** - * GenericUDAFSum. - */ -@Description(name = "sum", value = "_FUNC_(x) - Returns the sum of a set of numbers") -public class GenericUDAFSum extends AbstractGenericUDAFResolver { - - static final Log LOG = LogFactory.getLog(GenericUDAFSum.class.getName()); - - @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { - if (parameters.length != 1) { - throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected."); - } - - if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { - throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); - } - switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - case LONG: - return new GenericUDAFSumLong(); - case FLOAT: - case DOUBLE: - case STRING: - return new GenericUDAFSumDouble(); - case BOOLEAN: - default: - throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); - } - } - - /** - * GenericUDAFSumDouble. - */ - public static class GenericUDAFSumDouble extends GenericUDAFEvaluator { - private PrimitiveObjectInspector inputOI; - private DoubleWritable result; - - @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { - assert (parameters.length == 1); - super.init(m, parameters); - result = new DoubleWritable(0); - inputOI = (PrimitiveObjectInspector) parameters[0]; - return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; - } - - /** class for storing double sum value. */ - static class SumDoubleAgg implements SerializableBuffer { - boolean empty; - double sum; - - @Override - public void deSerializeAggBuffer(byte[] data, int start, int len) { - empty = BufferSerDeUtil.getBoolean(data, start); - start += 1; - sum = BufferSerDeUtil.getDouble(data, start); - } - - @Override - public void serializeAggBuffer(byte[] data, int start, int len) { - BufferSerDeUtil.writeBoolean(empty, data, start); - start += 1; - BufferSerDeUtil.writeDouble(sum, data, start); - } - - @Override - public void serializeAggBuffer(DataOutput output) throws IOException { - output.writeBoolean(empty); - output.writeDouble(sum); - } - } - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException { - SumDoubleAgg result = new SumDoubleAgg(); - reset(result); - return result; - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException { - SumDoubleAgg myagg = (SumDoubleAgg) agg; - myagg.empty = true; - myagg.sum = 0; - } - - boolean warned = false; - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - assert (parameters.length == 1); - try { - merge(agg, parameters[0]); - } catch (NumberFormatException e) { - if (!warned) { - warned = true; - LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); - LOG.warn(getClass().getSimpleName() + " ignoring similar exceptions."); - } - } - } - - @Override - public Object terminatePartial(AggregationBuffer agg) throws HiveException { - return terminate(agg); - } - - @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException { - if (partial != null) { - SumDoubleAgg myagg = (SumDoubleAgg) agg; - myagg.empty = false; - myagg.sum += PrimitiveObjectInspectorUtils.getDouble(partial, inputOI); - } - } - - @Override - public Object terminate(AggregationBuffer agg) throws HiveException { - SumDoubleAgg myagg = (SumDoubleAgg) agg; - if (myagg.empty) { - return null; - } - result.set(myagg.sum); - return result; - } - - } - - /** - * GenericUDAFSumLong. - */ - public static class GenericUDAFSumLong extends GenericUDAFEvaluator { - private PrimitiveObjectInspector inputOI; - private LongWritable result; - - @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { - assert (parameters.length == 1); - super.init(m, parameters); - result = new LongWritable(0); - inputOI = (PrimitiveObjectInspector) parameters[0]; - return PrimitiveObjectInspectorFactory.writableLongObjectInspector; - } - - /** class for storing double sum value. */ - static class SumLongAgg implements SerializableBuffer { - boolean empty; - long sum; - - @Override - public void deSerializeAggBuffer(byte[] data, int start, int len) { - empty = BufferSerDeUtil.getBoolean(data, start); - start += 1; - sum = BufferSerDeUtil.getLong(data, start); - } - - @Override - public void serializeAggBuffer(byte[] data, int start, int len) { - BufferSerDeUtil.writeBoolean(empty, data, start); - start += 1; - BufferSerDeUtil.writeLong(sum, data, start); - } - - @Override - public void serializeAggBuffer(DataOutput output) throws IOException { - output.writeBoolean(empty); - output.writeLong(sum); - } - } - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException { - SumLongAgg result = new SumLongAgg(); - reset(result); - return result; - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException { - SumLongAgg myagg = (SumLongAgg) agg; - myagg.empty = true; - myagg.sum = 0; - } - - private boolean warned = false; - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - assert (parameters.length == 1); - try { - merge(agg, parameters[0]); - } catch (NumberFormatException e) { - if (!warned) { - warned = true; - LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); - } - } - } - - @Override - public Object terminatePartial(AggregationBuffer agg) throws HiveException { - return terminate(agg); - } - - @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException { - if (partial != null) { - SumLongAgg myagg = (SumLongAgg) agg; - myagg.sum += PrimitiveObjectInspectorUtils.getLong(partial, inputOI); - myagg.empty = false; - } - } - - @Override - public Object terminate(AggregationBuffer agg) throws HiveException { - SumLongAgg myagg = (SumLongAgg) agg; - if (myagg.empty) { - return null; - } - result.set(myagg.sum); - return result; - } - - } - -} http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/ffc967fd/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java ---------------------------------------------------------------------- diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java deleted file mode 100644 index d3da16d..0000000 --- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java +++ /dev/null @@ -1,319 +0,0 @@ -/* - * Copyright 2009-2013 by The Regents of the University of California - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * you may obtain a copy of the License from - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.udf.generic; - -import java.io.DataOutput; -import java.io.IOException; -import java.util.ArrayList; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.util.StringUtils; - -import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil; -import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer; - -/** - * Compute the variance. This class is extended by: GenericUDAFVarianceSample - * GenericUDAFStd GenericUDAFStdSample - */ -@Description(name = "variance,var_pop", value = "_FUNC_(x) - Returns the variance of a set of numbers") -public class GenericUDAFVariance extends AbstractGenericUDAFResolver { - - static final Log LOG = LogFactory.getLog(GenericUDAFVariance.class.getName()); - - @Override - public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { - if (parameters.length != 1) { - throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected."); - } - - if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { - throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); - } - switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { - case BYTE: - case SHORT: - case INT: - case LONG: - case FLOAT: - case DOUBLE: - case STRING: - return new GenericUDAFVarianceEvaluator(); - case BOOLEAN: - default: - throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but " - + parameters[0].getTypeName() + " is passed."); - } - } - - /** - * Evaluate the variance using the algorithm described by Chan, Golub, and - * LeVeque in - * "Algorithms for computing the sample variance: analysis and recommendations" - * The American Statistician, 37 (1983) pp. 242--247. - * variance = variance1 + variance2 + n/(m*(m+n)) * pow(((m/n)*t1 - t2),2) - * where: - variance is sum[x-avg^2] (this is actually n times the variance) - * and is updated at every step. - n is the count of elements in chunk1 - m - * is the count of elements in chunk2 - t1 = sum of elements in chunk1, t2 = - * sum of elements in chunk2. - * This algorithm was proven to be numerically stable by J.L. Barlow in - * "Error analysis of a pairwise summation algorithm to compute sample variance" - * Numer. Math, 58 (1991) pp. 583--590 - */ - public static class GenericUDAFVarianceEvaluator extends GenericUDAFEvaluator { - - // For PARTIAL1 and COMPLETE - private PrimitiveObjectInspector inputOI; - - // For PARTIAL2 and FINAL - private StructObjectInspector soi; - private StructField countField; - private StructField sumField; - private StructField varianceField; - private LongObjectInspector countFieldOI; - private DoubleObjectInspector sumFieldOI; - - // For PARTIAL1 and PARTIAL2 - private Object[] partialResult; - - // For FINAL and COMPLETE - private DoubleWritable result; - - @Override - public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { - assert (parameters.length == 1); - super.init(m, parameters); - - // init input - if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { - inputOI = (PrimitiveObjectInspector) parameters[0]; - } else { - soi = (StructObjectInspector) parameters[0]; - - countField = soi.getStructFieldRef("count"); - sumField = soi.getStructFieldRef("sum"); - varianceField = soi.getStructFieldRef("variance"); - - countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector(); - sumFieldOI = (DoubleObjectInspector) sumField.getFieldObjectInspector(); - } - - // init output - if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) { - // The output of a partial aggregation is a struct containing - // a long count and doubles sum and variance. - - ArrayList foi = new ArrayList(); - - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - - ArrayList fname = new ArrayList(); - fname.add("count"); - fname.add("sum"); - fname.add("variance"); - - partialResult = new Object[3]; - partialResult[0] = new LongWritable(0); - partialResult[1] = new DoubleWritable(0); - partialResult[2] = new DoubleWritable(0); - - return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); - - } else { - setResult(new DoubleWritable(0)); - return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; - } - } - - static class StdAgg implements SerializableBuffer { - long count; // number of elements - double sum; // sum of elements - double variance; // sum[x-avg^2] (this is actually n times the - // variance) - - @Override - public void deSerializeAggBuffer(byte[] data, int start, int len) { - count = BufferSerDeUtil.getLong(data, start); - start += 8; - sum = BufferSerDeUtil.getDouble(data, start); - start += 8; - variance = BufferSerDeUtil.getDouble(data, start); - } - - @Override - public void serializeAggBuffer(byte[] data, int start, int len) { - BufferSerDeUtil.writeLong(count, data, start); - start += 8; - BufferSerDeUtil.writeDouble(sum, data, start); - start += 8; - BufferSerDeUtil.writeDouble(variance, data, start); - } - - @Override - public void serializeAggBuffer(DataOutput output) throws IOException { - output.writeLong(count); - output.writeDouble(sum); - output.writeDouble(variance); - } - }; - - @Override - public AggregationBuffer getNewAggregationBuffer() throws HiveException { - StdAgg result = new StdAgg(); - reset(result); - return result; - } - - @Override - public void reset(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg) agg; - myagg.count = 0; - myagg.sum = 0; - myagg.variance = 0; - } - - private boolean warned = false; - - @Override - public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { - assert (parameters.length == 1); - Object p = parameters[0]; - if (p != null) { - StdAgg myagg = (StdAgg) agg; - try { - double v = PrimitiveObjectInspectorUtils.getDouble(p, inputOI); - myagg.count++; - myagg.sum += v; - if (myagg.count > 1) { - double t = myagg.count * v - myagg.sum; - myagg.variance += (t * t) / ((double) myagg.count * (myagg.count - 1)); - } - } catch (NumberFormatException e) { - if (!warned) { - warned = true; - LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); - LOG.warn(getClass().getSimpleName() + " ignoring similar exceptions."); - } - } - } - } - - @Override - public Object terminatePartial(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg) agg; - ((LongWritable) partialResult[0]).set(myagg.count); - ((DoubleWritable) partialResult[1]).set(myagg.sum); - ((DoubleWritable) partialResult[2]).set(myagg.variance); - return partialResult; - } - - @Override - public void merge(AggregationBuffer agg, Object partial) throws HiveException { - if (partial != null) { - StdAgg myagg = (StdAgg) agg; - - Object partialCount = soi.getStructFieldData(partial, countField); - Object partialSum = soi.getStructFieldData(partial, sumField); - Object partialVariance = soi.getStructFieldData(partial, varianceField); - - long n = myagg.count; - long m = countFieldOI.get(partialCount); - - if (n == 0) { - // Just copy the information since there is nothing so far - myagg.variance = sumFieldOI.get(partialVariance); - myagg.count = countFieldOI.get(partialCount); - myagg.sum = sumFieldOI.get(partialSum); - } - - if (m != 0 && n != 0) { - // Merge the two partials - - double a = myagg.sum; - double b = sumFieldOI.get(partialSum); - - myagg.count += m; - myagg.sum += b; - double t = (m / (double) n) * a - b; - myagg.variance += sumFieldOI.get(partialVariance) + ((n / (double) m) / ((double) n + m)) * t * t; - } - } - } - - @Override - public Object terminate(AggregationBuffer agg) throws HiveException { - StdAgg myagg = (StdAgg) agg; - - if (myagg.count == 0) { // SQL standard - return null for zero - // elements - return null; - } else { - if (myagg.count > 1) { - getResult().set(myagg.variance / (myagg.count)); - } else { // for one element the variance is always 0 - getResult().set(0); - } - return getResult(); - } - } - - public void setResult(DoubleWritable result) { - this.result = result; - } - - public DoubleWritable getResult() { - return result; - } - } - -} http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/ffc967fd/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java ---------------------------------------------------------------------- diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java deleted file mode 100644 index 1219b46..0000000 --- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright 2009-2013 by The Regents of the University of California - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * you may obtain a copy of the License from - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.serde2.lazy.objectinspector; - -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ConcurrentHashMap; - -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.io.Text; - -/** - * ObjectInspectorFactory is the primary way to create new ObjectInspector - * instances. - * SerDe classes should call the static functions in this library to create an - * ObjectInspector to return to the caller of SerDe2.getObjectInspector(). - * The reason of having caches here is that ObjectInspectors do not have an - * internal state - so ObjectInspectors with the same construction parameters - * should result in exactly the same ObjectInspector. - */ -public final class LazyObjectInspectorFactory { - - static ConcurrentHashMap, LazySimpleStructObjectInspector> cachedLazySimpleStructObjectInspector = new ConcurrentHashMap, LazySimpleStructObjectInspector>(); - - public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(List structFieldNames, - List structFieldObjectInspectors, byte separator, Text nullSequence, - boolean lastColumnTakesRest, boolean escaped, byte escapeChar) { - ArrayList signature = new ArrayList(); - signature.add(structFieldNames); - signature.add(structFieldObjectInspectors); - signature.add(Byte.valueOf(separator)); - signature.add(nullSequence.toString()); - signature.add(Boolean.valueOf(lastColumnTakesRest)); - signature.add(Boolean.valueOf(escaped)); - signature.add(Byte.valueOf(escapeChar)); - LazySimpleStructObjectInspector result = cachedLazySimpleStructObjectInspector.get(signature); - if (result == null) { - result = new LazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors, separator, - nullSequence, lastColumnTakesRest, escaped, escapeChar); - cachedLazySimpleStructObjectInspector.put(signature, result); - } - return result; - } - - static ConcurrentHashMap, LazyListObjectInspector> cachedLazySimpleListObjectInspector = new ConcurrentHashMap, LazyListObjectInspector>(); - - public static LazyListObjectInspector getLazySimpleListObjectInspector(ObjectInspector listElementObjectInspector, - byte separator, Text nullSequence, boolean escaped, byte escapeChar) { - ArrayList signature = new ArrayList(); - signature.add(listElementObjectInspector); - signature.add(Byte.valueOf(separator)); - signature.add(nullSequence.toString()); - signature.add(Boolean.valueOf(escaped)); - signature.add(Byte.valueOf(escapeChar)); - LazyListObjectInspector result = cachedLazySimpleListObjectInspector.get(signature); - if (result == null) { - result = new LazyListObjectInspector(listElementObjectInspector, separator, nullSequence, escaped, - escapeChar); - cachedLazySimpleListObjectInspector.put(signature, result); - } - return result; - } - - static ConcurrentHashMap, LazyMapObjectInspector> cachedLazySimpleMapObjectInspector = new ConcurrentHashMap, LazyMapObjectInspector>(); - - public static LazyMapObjectInspector getLazySimpleMapObjectInspector(ObjectInspector mapKeyObjectInspector, - ObjectInspector mapValueObjectInspector, byte itemSeparator, byte keyValueSeparator, Text nullSequence, - boolean escaped, byte escapeChar) { - ArrayList signature = new ArrayList(); - signature.add(mapKeyObjectInspector); - signature.add(mapValueObjectInspector); - signature.add(Byte.valueOf(itemSeparator)); - signature.add(Byte.valueOf(keyValueSeparator)); - signature.add(nullSequence.toString()); - signature.add(Boolean.valueOf(escaped)); - signature.add(Byte.valueOf(escapeChar)); - LazyMapObjectInspector result = cachedLazySimpleMapObjectInspector.get(signature); - if (result == null) { - result = new LazyMapObjectInspector(mapKeyObjectInspector, mapValueObjectInspector, itemSeparator, - keyValueSeparator, nullSequence, escaped, escapeChar); - cachedLazySimpleMapObjectInspector.put(signature, result); - } - return result; - } - - static ConcurrentHashMap, LazyUnionObjectInspector> cachedLazyUnionObjectInspector = new ConcurrentHashMap, LazyUnionObjectInspector>(); - - public static LazyUnionObjectInspector getLazyUnionObjectInspector(List ois, byte separator, - Text nullSequence, boolean escaped, byte escapeChar) { - List signature = new ArrayList(); - signature.add(ois); - signature.add(Byte.valueOf(separator)); - signature.add(nullSequence.toString()); - signature.add(Boolean.valueOf(escaped)); - signature.add(Byte.valueOf(escapeChar)); - LazyUnionObjectInspector result = cachedLazyUnionObjectInspector.get(signature); - if (result == null) { - result = new LazyUnionObjectInspector(ois, separator, nullSequence, escaped, escapeChar); - cachedLazyUnionObjectInspector.put(signature, result); - } - return result; - } - - private LazyObjectInspectorFactory() { - // prevent instantiation - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/ffc967fd/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java ---------------------------------------------------------------------- diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java deleted file mode 100644 index e26f477..0000000 --- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright 2009-2013 by The Regents of the University of California - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * you may obtain a copy of the License from - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.serde2.typeinfo; - -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ConcurrentHashMap; - -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; - -/** - * TypeInfoFactory can be used to create the TypeInfo object for any types. - * TypeInfo objects are all read-only so we can reuse them easily. - * TypeInfoFactory has internal cache to make sure we don't create 2 TypeInfo - * objects that represents the same type. - */ -public final class TypeInfoFactory { - - static ConcurrentHashMap cachedPrimitiveTypeInfo = new ConcurrentHashMap(); - - private TypeInfoFactory() { - // prevent instantiation - } - - public static TypeInfo getPrimitiveTypeInfo(String typeName) { - if (null == PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(typeName)) { - throw new RuntimeException("Cannot getPrimitiveTypeInfo for " + typeName); - } - TypeInfo result = cachedPrimitiveTypeInfo.get(typeName); - if (result == null) { - result = new PrimitiveTypeInfo(typeName); - cachedPrimitiveTypeInfo.put(typeName, result); - } - return result; - } - - public static final TypeInfo voidTypeInfo = getPrimitiveTypeInfo(serdeConstants.VOID_TYPE_NAME); - public static final TypeInfo booleanTypeInfo = getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME); - public static final TypeInfo intTypeInfo = getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME); - public static final TypeInfo longTypeInfo = getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME); - public static final TypeInfo stringTypeInfo = getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); - public static final TypeInfo floatTypeInfo = getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME); - public static final TypeInfo doubleTypeInfo = getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME); - public static final TypeInfo byteTypeInfo = getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME); - public static final TypeInfo shortTypeInfo = getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME); - public static final TypeInfo timestampTypeInfo = getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME); - public static final TypeInfo binaryTypeInfo = getPrimitiveTypeInfo(serdeConstants.BINARY_TYPE_NAME); - public static final TypeInfo decimalTypeInfo = getPrimitiveTypeInfo(serdeConstants.DECIMAL_TYPE_NAME); - - public static final TypeInfo unknownTypeInfo = getPrimitiveTypeInfo("unknown"); - - public static TypeInfo getPrimitiveTypeInfoFromPrimitiveWritable(Class clazz) { - String typeName = PrimitiveObjectInspectorUtils.getTypeNameFromPrimitiveWritable(clazz); - if (typeName == null) { - throw new RuntimeException("Internal error: Cannot get typeName for " + clazz); - } - return getPrimitiveTypeInfo(typeName); - } - - public static TypeInfo getPrimitiveTypeInfoFromJavaPrimitive(Class clazz) { - return getPrimitiveTypeInfo(PrimitiveObjectInspectorUtils.getTypeNameFromPrimitiveJava(clazz)); - } - - static ConcurrentHashMap>, TypeInfo> cachedStructTypeInfo = new ConcurrentHashMap>, TypeInfo>(); - - public static TypeInfo getStructTypeInfo(List names, List typeInfos) { - ArrayList> signature = new ArrayList>(2); - signature.add(names); - signature.add(typeInfos); - TypeInfo result = cachedStructTypeInfo.get(signature); - if (result == null) { - result = new StructTypeInfo(names, typeInfos); - cachedStructTypeInfo.put(signature, result); - } - return result; - } - - static ConcurrentHashMap, TypeInfo> cachedUnionTypeInfo = new ConcurrentHashMap, TypeInfo>(); - - public static TypeInfo getUnionTypeInfo(List typeInfos) { - TypeInfo result = cachedUnionTypeInfo.get(typeInfos); - if (result == null) { - result = new UnionTypeInfo(typeInfos); - cachedUnionTypeInfo.put(typeInfos, result); - } - return result; - } - - static ConcurrentHashMap cachedListTypeInfo = new ConcurrentHashMap(); - - public static TypeInfo getListTypeInfo(TypeInfo elementTypeInfo) { - TypeInfo result = cachedListTypeInfo.get(elementTypeInfo); - if (result == null) { - result = new ListTypeInfo(elementTypeInfo); - cachedListTypeInfo.put(elementTypeInfo, result); - } - return result; - } - - static ConcurrentHashMap, TypeInfo> cachedMapTypeInfo = new ConcurrentHashMap, TypeInfo>(); - - public static TypeInfo getMapTypeInfo(TypeInfo keyTypeInfo, TypeInfo valueTypeInfo) { - ArrayList signature = new ArrayList(2); - signature.add(keyTypeInfo); - signature.add(valueTypeInfo); - TypeInfo result = cachedMapTypeInfo.get(signature); - if (result == null) { - result = new MapTypeInfo(keyTypeInfo, valueTypeInfo); - cachedMapTypeInfo.put(signature, result); - } - return result; - }; - -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/ffc967fd/hivesterix/hivesterix-dist/src/main/resources/conf/cluster.properties ---------------------------------------------------------------------- diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/cluster.properties b/hivesterix/hivesterix-dist/src/main/resources/conf/cluster.properties deleted file mode 100644 index f5d12a1..0000000 --- a/hivesterix/hivesterix-dist/src/main/resources/conf/cluster.properties +++ /dev/null @@ -1,50 +0,0 @@ -#/* -# Copyright 2009-2013 by The Regents of the University of California -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# you may obtain a copy of the License from -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#*/ -#The CC port for Hyracks clients -CC_CLIENTPORT=3099 - -#The CC port for Hyracks cluster management -CC_CLUSTERPORT=1099 - -#The tmp directory for cc to install jars -CCTMP_DIR=/tmp/t1 - -#The tmp directory for nc to install jars -NCTMP_DIR=/tmp/t2 - -#The directory to put cc logs -CCLOGS_DIR=$CCTMP_DIR/logs - -#The directory to put nc logs -NCLOGS_DIR=$NCTMP_DIR/logs - -#Comma separated I/O directories for the spilling of external sort -IO_DIRS="/tmp/t3,/tmp/t4" - -#The JAVA_HOME -JAVA_HOME=$JAVA_HOME - -#The frame size of the internal dataflow engine -FRAME_SIZE=65536 - -#CC JAVA_OPTS -CCJAVA_OPTS="-Xmx1g -Djava.util.logging.config.file=logging.properties" -# debug option: CCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7001,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties" -# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001" - -#NC JAVA_OPTS -NCJAVA_OPTS="-Xmx1g -Djava.util.logging.config.file=logging.properties" -# debug option: NCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7002,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties" -# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001" http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/ffc967fd/hivesterix/hivesterix-dist/src/main/resources/conf/configuration.xsl ---------------------------------------------------------------------- diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/configuration.xsl b/hivesterix/hivesterix-dist/src/main/resources/conf/configuration.xsl deleted file mode 100644 index 377cdbe..0000000 --- a/hivesterix/hivesterix-dist/src/main/resources/conf/configuration.xsl +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - - - - - - - - - - - -
namevaluedescription
- - -
-