hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j..@apache.org
Subject svn commit: r987360 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Date Fri, 20 Aug 2010 01:03:21 GMT
Author: jvs
Date: Fri Aug 20 01:03:20 2010
New Revision: 987360

URL: http://svn.apache.org/viewvc?rev=987360&view=rev
Log:
HIVE-1549. Add ANSI SQL correlation function CORR(X,Y).
(Pierre Huyn via jvs)


Added:
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_corr.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_corr.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=987360&r1=987359&r2=987360&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Aug 20 01:03:20 2010
@@ -50,6 +50,9 @@ Trunk -  Unreleased
     n-grams
     (Mayank Lahiri via jvs)
 
+    HIVE-1549. Add ANSI SQL correlation function CORR(X,Y).
+    (Pierre Huyn via jvs)
+
   IMPROVEMENTS
 
     HIVE-1394. Do not update transient_lastDdlTime if the partition is modified by a housekeeping

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=987360&r1=987359&r2=987360&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Fri
Aug 20 01:03:20 2010
@@ -134,6 +134,7 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBridge;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectSet;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFContextNGrams;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCorrelation;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCovariance;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCovarianceSample;
@@ -371,7 +372,7 @@ public final class FunctionRegistry {
     registerGenericUDAF("var_samp", new GenericUDAFVarianceSample());
     registerGenericUDAF("covar_pop", new GenericUDAFCovariance());
     registerGenericUDAF("covar_samp", new GenericUDAFCovarianceSample());
-
+    registerGenericUDAF("corr", new GenericUDAFCorrelation());
     registerGenericUDAF("histogram_numeric", new GenericUDAFHistogramNumeric());
     registerGenericUDAF("percentile_approx", new GenericUDAFPercentileApprox());
     registerGenericUDAF("collect_set", new GenericUDAFCollectSet());

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java?rev=987360&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
(added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
Fri Aug 20 01:03:20 2010
@@ -0,0 +1,378 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+
+/**
+ * Compute the Pearson correlation coefficient corr(x, y), using the following
+ * stable one-pass method, based on:
+ * "Formulas for Robust, One-Pass Parallel Computation of Covariances and
+ * Arbitrary-Order Statistical Moments", Philippe Pebay, Sandia Labs
+ * and "The Art of Computer Programming, volume 2: Seminumerical Algorithms",
+ * Donald Knuth.
+ *
+ *  Incremental:
+ *   n : <count>
+ *   mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg>
+ *   my_n = my_(n-1) + [y_n - my_(n-1)]/n : <yavg>
+ *   c_n = c_(n-1) + (x_n - mx_(n-1))*(y_n - my_n) : <covariance * n>
+ *   vx_n = vx_(n-1) + (x_n - mx_n)(x_n - mx_(n-1)): <variance * n>
+ *   vy_n = vy_(n-1) + (y_n - my_n)(y_n - my_(n-1)): <variance * n>
+ *
+ *  Merge:
+ *   c_(A,B) = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
+ *   vx_(A,B) = vx_A + vx_B + (mx_A - mx_B)*(mx_A - mx_B)*n_A*n_B/(n_A+n_B)
+ *   vy_(A,B) = vy_A + vy_B + (my_A - my_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
+ *
+ */
+@Description(name = "corr",
+    value = "_FUNC_(x,y) - Returns the Pearson coefficient of correlation\n"
+        + "between a set of number pairs",
+    extended = "The function takes as arguments any pair of numeric types and returns a double.\n"
+        + "Any pair with a NULL is ignored. If the function is applied to an empty set or\n"
+        + "a singleton set, NULL will be returned. Otherwise, it computes the following:\n"
+        + "   COVAR_POP(x,y)/(STDDEV_POP(x)*STDDEV_POP(y))\n"
+        + "where neither x nor y is null,\n"
+        + "COVAR_POP is the population covariance,\n"
+        + "and STDDEV_POP is the population standard deviation.")
+public class GenericUDAFCorrelation extends AbstractGenericUDAFResolver {
+
+  static final Log LOG = LogFactory.getLog(GenericUDAFCorrelation.class.getName());
+
+  @Override
+  public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException
{
+    if (parameters.length != 2) {
+      throw new UDFArgumentTypeException(parameters.length - 1,
+          "Exactly two arguments are expected.");
+    }
+
+    if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+      throw new UDFArgumentTypeException(0,
+          "Only primitive type arguments are accepted but "
+          + parameters[0].getTypeName() + " is passed.");
+    }
+
+    if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+        throw new UDFArgumentTypeException(1,
+            "Only primitive type arguments are accepted but "
+            + parameters[1].getTypeName() + " is passed.");
+    }
+
+    switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+    case BYTE:
+    case SHORT:
+    case INT:
+    case LONG:
+    case FLOAT:
+    case DOUBLE:
+      switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) {
+      case BYTE:
+      case SHORT:
+      case INT:
+      case LONG:
+      case FLOAT:
+      case DOUBLE:
+        return new GenericUDAFCorrelationEvaluator();
+      case STRING:
+      case BOOLEAN:
+      default:
+        throw new UDFArgumentTypeException(1,
+            "Only numeric type arguments are accepted but "
+            + parameters[1].getTypeName() + " is passed.");
+      }
+    case STRING:
+    case BOOLEAN:
+    default:
+      throw new UDFArgumentTypeException(0,
+          "Only numeric type arguments are accepted but "
+          + parameters[0].getTypeName() + " is passed.");
+    }
+  }
+
+  /**
+   * Evaluate the Pearson correlation coefficient using a stable one-pass
+   * algorithm, based on work by Philippe Pébay and Donald Knuth.
+   *
+   *  Incremental:
+   *   n : <count>
+   *   mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg>
+   *   my_n = my_(n-1) + [y_n - my_(n-1)]/n : <yavg>
+   *   c_n = c_(n-1) + (x_n - mx_(n-1))*(y_n - my_n) : <covariance * n>
+   *   vx_n = vx_(n-1) + (x_n - mx_n)(x_n - mx_(n-1)): <variance * n>
+   *   vy_n = vy_(n-1) + (y_n - my_n)(y_n - my_(n-1)): <variance * n>
+   *
+   *  Merge:
+   *   c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X
+   *   vx_(A,B) = vx_A + vx_B + (mx_A - mx_B)*(mx_A - mx_B)*n_A*n_B/(n_A+n_B)
+   *   vy_(A,B) = vy_A + vy_B + (my_A - my_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
+   *
+   */
+  public static class GenericUDAFCorrelationEvaluator extends GenericUDAFEvaluator {
+
+    // For PARTIAL1 and COMPLETE
+    private PrimitiveObjectInspector xInputOI;
+    private PrimitiveObjectInspector yInputOI;
+
+    // For PARTIAL2 and FINAL
+    private StructObjectInspector soi;
+    private StructField countField;
+    private StructField xavgField;
+    private StructField yavgField;
+    private StructField xvarField;
+    private StructField yvarField;
+    private StructField covarField;
+    private LongObjectInspector countFieldOI;
+    private DoubleObjectInspector xavgFieldOI;
+    private DoubleObjectInspector yavgFieldOI;
+    private DoubleObjectInspector xvarFieldOI;
+    private DoubleObjectInspector yvarFieldOI;
+    private DoubleObjectInspector covarFieldOI;
+
+    // For PARTIAL1 and PARTIAL2
+    private Object[] partialResult;
+
+    // For FINAL and COMPLETE
+    private DoubleWritable result;
+
+    @Override
+    public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException
{
+      super.init(m, parameters);
+
+      // init input
+      if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+        assert (parameters.length == 2);
+        xInputOI = (PrimitiveObjectInspector) parameters[0];
+        yInputOI = (PrimitiveObjectInspector) parameters[1];
+      } else {
+        assert (parameters.length == 1);
+        soi = (StructObjectInspector) parameters[0];
+
+        countField = soi.getStructFieldRef("count");
+        xavgField = soi.getStructFieldRef("xavg");
+        yavgField = soi.getStructFieldRef("yavg");
+        xvarField = soi.getStructFieldRef("xvar");
+        yvarField = soi.getStructFieldRef("yvar");
+        covarField = soi.getStructFieldRef("covar");
+
+        countFieldOI =
+            (LongObjectInspector) countField.getFieldObjectInspector();
+        xavgFieldOI =
+            (DoubleObjectInspector) xavgField.getFieldObjectInspector();
+        yavgFieldOI =
+            (DoubleObjectInspector) yavgField.getFieldObjectInspector();
+        xvarFieldOI =
+            (DoubleObjectInspector) xvarField.getFieldObjectInspector();
+        yvarFieldOI =
+            (DoubleObjectInspector) yvarField.getFieldObjectInspector();
+        covarFieldOI =
+            (DoubleObjectInspector) covarField.getFieldObjectInspector();
+      }
+
+      // init output
+      if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+        // The output of a partial aggregation is a struct containing
+        // a long count, two double averages, two double variances,
+        // and a double covariance.
+
+        ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+
+        foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+        foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+        foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+        foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+        foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+        foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+        ArrayList<String> fname = new ArrayList<String>();
+        fname.add("count");
+        fname.add("xavg");
+        fname.add("yavg");
+        fname.add("xvar");
+        fname.add("yvar");
+        fname.add("covar");
+
+        partialResult = new Object[6];
+        partialResult[0] = new LongWritable(0);
+        partialResult[1] = new DoubleWritable(0);
+        partialResult[2] = new DoubleWritable(0);
+        partialResult[3] = new DoubleWritable(0);
+        partialResult[4] = new DoubleWritable(0);
+        partialResult[5] = new DoubleWritable(0);
+
+        return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+
+      } else {
+        setResult(new DoubleWritable(0));
+        return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+      }
+    }
+
+    static class StdAgg implements AggregationBuffer {
+      long count; // number n of elements
+      double xavg; // average of x elements
+      double yavg; // average of y elements
+      double xvar; // n times the variance of x elements
+      double yvar; // n times the variance of y elements
+      double covar; // n times the covariance
+    };
+
+    @Override
+    public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+      StdAgg result = new StdAgg();
+      reset(result);
+      return result;
+    }
+
+    @Override
+    public void reset(AggregationBuffer agg) throws HiveException {
+      StdAgg myagg = (StdAgg) agg;
+      myagg.count = 0;
+      myagg.xavg = 0;
+      myagg.yavg = 0;
+      myagg.xvar = 0;
+      myagg.yvar = 0;
+      myagg.covar = 0;
+    }
+
+    @Override
+    public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException
{
+      assert (parameters.length == 2);
+      Object px = parameters[0];
+      Object py = parameters[1];
+      if (px != null && py != null) {
+        StdAgg myagg = (StdAgg) agg;
+        double vx = PrimitiveObjectInspectorUtils.getDouble(px, xInputOI);
+        double vy = PrimitiveObjectInspectorUtils.getDouble(py, yInputOI);
+        double xavgOld = myagg.xavg;
+        double yavgOld = myagg.yavg;
+        myagg.count++;
+        myagg.xavg += (vx - xavgOld) / myagg.count;
+        myagg.yavg += (vy - yavgOld) / myagg.count;
+        if (myagg.count > 1) {
+            myagg.covar += (vx - xavgOld) * (vy - myagg.yavg);
+            myagg.xvar += (vx - xavgOld) * (vx - myagg.xavg);
+            myagg.yvar += (vy - yavgOld) * (vy - myagg.yavg);
+        }
+      }
+    }
+
+    @Override
+    public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+      StdAgg myagg = (StdAgg) agg;
+      ((LongWritable) partialResult[0]).set(myagg.count);
+      ((DoubleWritable) partialResult[1]).set(myagg.xavg);
+      ((DoubleWritable) partialResult[2]).set(myagg.yavg);
+      ((DoubleWritable) partialResult[3]).set(myagg.xvar);
+      ((DoubleWritable) partialResult[4]).set(myagg.yvar);
+      ((DoubleWritable) partialResult[5]).set(myagg.covar);
+      return partialResult;
+    }
+
+    @Override
+    public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+      if (partial != null) {
+        StdAgg myagg = (StdAgg) agg;
+
+        Object partialCount = soi.getStructFieldData(partial, countField);
+        Object partialXAvg = soi.getStructFieldData(partial, xavgField);
+        Object partialYAvg = soi.getStructFieldData(partial, yavgField);
+        Object partialXVar = soi.getStructFieldData(partial, xvarField);
+        Object partialYVar = soi.getStructFieldData(partial, yvarField);
+        Object partialCovar = soi.getStructFieldData(partial, covarField);
+
+        long nA = myagg.count;
+        long nB = countFieldOI.get(partialCount);
+
+        if (nA == 0) {
+            // Just copy the information since there is nothing so far
+            myagg.count = countFieldOI.get(partialCount);
+            myagg.xavg = xavgFieldOI.get(partialXAvg);
+            myagg.yavg = yavgFieldOI.get(partialYAvg);
+            myagg.xvar = xvarFieldOI.get(partialXVar);
+            myagg.yvar = yvarFieldOI.get(partialYVar);
+            myagg.covar = covarFieldOI.get(partialCovar);
+        }
+
+        if (nA != 0 && nB != 0) {
+          // Merge the two partials
+          double xavgA = myagg.xavg;
+          double yavgA = myagg.yavg;
+          double xavgB = xavgFieldOI.get(partialXAvg);
+          double yavgB = yavgFieldOI.get(partialYAvg);
+          double xvarB = xvarFieldOI.get(partialXVar);
+          double yvarB = yvarFieldOI.get(partialYVar);
+          double covarB = covarFieldOI.get(partialCovar);
+
+          myagg.count += nB;
+          myagg.xavg = (xavgA * nA + xavgB * nB) / myagg.count;
+          myagg.yavg = (yavgA * nA + yavgB * nB) / myagg.count;
+          myagg.xvar += xvarB + (xavgA - xavgB) * (xavgA - xavgB) * myagg.count;
+          myagg.yvar += yvarB + (yavgA - yavgB) * (yavgA - yavgB) * myagg.count;
+          myagg.covar +=
+              covarB + (xavgA - xavgB) * (yavgA - yavgB) * ((double) (nA * nB) / myagg.count);
+        }
+      }
+    }
+
+    @Override
+    public Object terminate(AggregationBuffer agg) throws HiveException {
+      StdAgg myagg = (StdAgg) agg;
+
+      if (myagg.count < 2) { // SQL standard - return null for zero or one pair
+          return null;
+      } else {
+          getResult().set(
+                  myagg.covar
+                  / java.lang.Math.sqrt(myagg.xvar)
+                  / java.lang.Math.sqrt(myagg.yvar)
+                  );
+          return getResult();
+      }
+    }
+
+    public void setResult(DoubleWritable result) {
+      this.result = result;
+    }
+
+    public DoubleWritable getResult() {
+      return result;
+    }
+  }
+
+}

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_corr.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_corr.q?rev=987360&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_corr.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udaf_corr.q Fri Aug 20 01:03:20 2010
@@ -0,0 +1,16 @@
+DROP TABLE covar_tab;
+CREATE TABLE covar_tab (a INT, b INT, c INT)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/covar_tab.txt' OVERWRITE
+INTO TABLE covar_tab;
+
+DESCRIBE FUNCTION corr;
+DESCRIBE FUNCTION EXTENDED corr;
+SELECT corr(b, c) FROM covar_tab WHERE a < 1;
+SELECT corr(b, c) FROM covar_tab WHERE a < 3;
+SELECT corr(b, c) FROM covar_tab WHERE a = 3;
+SELECT a, corr(b, c) FROM covar_tab GROUP BY a ORDER BY a;
+SELECT corr(b, c) FROM covar_tab;
+
+DROP TABLE covar_tab;

Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out?rev=987360&r1=987359&r2=987360&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Fri Aug 20 01:03:20
2010
@@ -39,6 +39,7 @@ concat
 concat_ws
 context_ngrams
 conv
+corr
 cos
 count
 covar_pop
@@ -168,6 +169,7 @@ concat
 concat_ws
 context_ngrams
 conv
+corr
 cos
 count
 covar_pop

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_corr.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_corr.q.out?rev=987360&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_corr.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udaf_corr.q.out Fri Aug 20 01:03:20
2010
@@ -0,0 +1,97 @@
+PREHOOK: query: DROP TABLE covar_tab
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE covar_tab
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE covar_tab (a INT, b INT, c INT)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE covar_tab (a INT, b INT, c INT)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@covar_tab
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/covar_tab.txt' OVERWRITE
+INTO TABLE covar_tab
+PREHOOK: type: LOAD
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/covar_tab.txt' OVERWRITE
+INTO TABLE covar_tab
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@covar_tab
+PREHOOK: query: DESCRIBE FUNCTION corr
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION corr
+POSTHOOK: type: DESCFUNCTION
+corr(x,y) - Returns the Pearson coefficient of correlation
+between a set of number pairs
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED corr
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED corr
+POSTHOOK: type: DESCFUNCTION
+corr(x,y) - Returns the Pearson coefficient of correlation
+between a set of number pairs
+The function takes as arguments any pair of numeric types and returns a double.
+Any pair with a NULL is ignored. If the function is applied to an empty set or
+a singleton set, NULL will be returned. Otherwise, it computes the following:
+   COVAR_POP(x,y)/(STDDEV_POP(x)*STDDEV_POP(y))
+where neither x nor y is null,
+COVAR_POP is the population covariance,
+and STDDEV_POP is the population standard deviation.
+PREHOOK: query: SELECT corr(b, c) FROM covar_tab WHERE a < 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@covar_tab
+PREHOOK: Output: file:/tmp/nhuyn/hive_2010-08-18_14-44-58_681_3541590287509550544/-mr-10000
+POSTHOOK: query: SELECT corr(b, c) FROM covar_tab WHERE a < 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@covar_tab
+POSTHOOK: Output: file:/tmp/nhuyn/hive_2010-08-18_14-44-58_681_3541590287509550544/-mr-10000
+NULL
+PREHOOK: query: SELECT corr(b, c) FROM covar_tab WHERE a < 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@covar_tab
+PREHOOK: Output: file:/tmp/nhuyn/hive_2010-08-18_14-45-03_289_9041510910213817151/-mr-10000
+POSTHOOK: query: SELECT corr(b, c) FROM covar_tab WHERE a < 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@covar_tab
+POSTHOOK: Output: file:/tmp/nhuyn/hive_2010-08-18_14-45-03_289_9041510910213817151/-mr-10000
+NULL
+PREHOOK: query: SELECT corr(b, c) FROM covar_tab WHERE a = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@covar_tab
+PREHOOK: Output: file:/tmp/nhuyn/hive_2010-08-18_14-45-06_480_8481295633393615585/-mr-10000
+POSTHOOK: query: SELECT corr(b, c) FROM covar_tab WHERE a = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@covar_tab
+POSTHOOK: Output: file:/tmp/nhuyn/hive_2010-08-18_14-45-06_480_8481295633393615585/-mr-10000
+NULL
+PREHOOK: query: SELECT a, corr(b, c) FROM covar_tab GROUP BY a ORDER BY a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@covar_tab
+PREHOOK: Output: file:/tmp/nhuyn/hive_2010-08-18_14-45-09_655_4572224946166536614/-mr-10000
+POSTHOOK: query: SELECT a, corr(b, c) FROM covar_tab GROUP BY a ORDER BY a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@covar_tab
+POSTHOOK: Output: file:/tmp/nhuyn/hive_2010-08-18_14-45-09_655_4572224946166536614/-mr-10000
+1	NULL
+2	NULL
+3	NULL
+4	NULL
+5	NULL
+6	NULL
+PREHOOK: query: SELECT corr(b, c) FROM covar_tab
+PREHOOK: type: QUERY
+PREHOOK: Input: default@covar_tab
+PREHOOK: Output: file:/tmp/nhuyn/hive_2010-08-18_14-45-20_421_6697314491903913524/-mr-10000
+POSTHOOK: query: SELECT corr(b, c) FROM covar_tab
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@covar_tab
+POSTHOOK: Output: file:/tmp/nhuyn/hive_2010-08-18_14-45-20_421_6697314491903913524/-mr-10000
+0.6633880657639323
+PREHOOK: query: DROP TABLE covar_tab
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@covar_tab
+PREHOOK: Output: default@covar_tab
+POSTHOOK: query: DROP TABLE covar_tab
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@covar_tab
+POSTHOOK: Output: default@covar_tab



Mime
View raw message