tajo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jihoon...@apache.org
Subject tajo git commit: TAJO-923: Add VAR_SAMP and VAR_POP window functions.
Date Fri, 17 Apr 2015 14:58:12 GMT
Repository: tajo
Updated Branches:
  refs/heads/master d2a4f9b3d -> 14a1e536c


TAJO-923: Add VAR_SAMP and VAR_POP window functions.

Closes #458

Signed-off-by: Jihoon Son <jihoonson@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/14a1e536
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/14a1e536
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/14a1e536

Branch: refs/heads/master
Commit: 14a1e536c2848dbb9e1a714076d833a7ff1f5640
Parents: d2a4f9b
Author: Dongjoon Hyun <dongjoon@apache.org>
Authored: Fri Apr 17 23:55:57 2015 +0900
Committer: Jihoon Son <jihoonson@apache.org>
Committed: Fri Apr 17 23:57:28 2015 +0900

----------------------------------------------------------------------
 CHANGES                                         |   3 +
 .../tajo/engine/function/builtin/StdDev.java    |  94 --------------
 .../tajo/engine/function/builtin/StdDevPop.java |  10 +-
 .../engine/function/builtin/StdDevSamp.java     |   8 +-
 .../tajo/engine/function/builtin/VarPop.java    |  42 +++++++
 .../engine/function/builtin/VarPopDouble.java   |  39 ++++++
 .../engine/function/builtin/VarPopFloat.java    |  39 ++++++
 .../tajo/engine/function/builtin/VarPopInt.java |  39 ++++++
 .../engine/function/builtin/VarPopLong.java     |  39 ++++++
 .../tajo/engine/function/builtin/VarSamp.java   |  40 ++++++
 .../engine/function/builtin/VarSampDouble.java  |  39 ++++++
 .../engine/function/builtin/VarSampFloat.java   |  39 ++++++
 .../engine/function/builtin/VarSampInt.java     |  39 ++++++
 .../engine/function/builtin/VarSampLong.java    |  39 ++++++
 .../tajo/engine/function/builtin/Variance.java  |  94 ++++++++++++++
 tajo-core/src/main/proto/InternalTypes.proto    |   2 +-
 .../engine/function/TestBuiltinFunctions.java   | 123 +++++++++++++++++++
 17 files changed, 624 insertions(+), 104 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index b04358c..4899ed2 100644
--- a/CHANGES
+++ b/CHANGES
@@ -9,6 +9,9 @@ Release 0.11.0 - unreleased
 
     TAJO-921: Add STDDEV_SAMP and STDDEV_POP window functions. (Keuntae Park)
 
+    TAJO-923: Add VAR_SAMP and VAR_POP window functions. 
+    (Contributed by Dongjoon Hyun, Committed by jihoon)
+
     TAJO-1135: Implement queryable virtual table for cluster information.
     (jihun)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDev.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDev.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDev.java
deleted file mode 100644
index e736167..0000000
--- a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDev.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tajo.engine.function.builtin;
-
-import org.apache.tajo.catalog.CatalogUtil;
-import org.apache.tajo.catalog.Column;
-import org.apache.tajo.common.TajoDataTypes.DataType;
-import org.apache.tajo.common.TajoDataTypes.Type;
-import org.apache.tajo.datum.Datum;
-import org.apache.tajo.datum.NullDatum;
-import org.apache.tajo.datum.ProtobufDatum;
-import org.apache.tajo.plan.function.AggFunction;
-import org.apache.tajo.plan.function.FunctionContext;
-import org.apache.tajo.storage.Tuple;
-
-import static org.apache.tajo.InternalTypes.StdDevProto;
-
-public abstract class StdDev extends AggFunction<Datum> {
-
-  public StdDev(Column[] definedArgs) {
-    super(definedArgs);
-  }
-
-  public StdDevContext newContext() {
-    return new StdDevContext();
-  }
-
-  @Override
-  public void eval(FunctionContext ctx, Tuple params) {
-    StdDevContext StdDevCtx = (StdDevContext) ctx;
-    Datum datum = params.get(0);
-    if (datum.isNotNull()) {
-      double delta = datum.asFloat8() - StdDevCtx.avg;
-      StdDevCtx.count++;
-      StdDevCtx.avg += delta/StdDevCtx.count;
-      StdDevCtx.squareSumOfDiff += delta * (datum.asFloat8() - StdDevCtx.avg);
-    }
-  }
-
-  @Override
-  public void merge(FunctionContext ctx, Tuple part) {
-    StdDevContext StdDevCtx = (StdDevContext) ctx;
-    Datum d = part.get(0);
-    if (d instanceof NullDatum) {
-      return;
-    }
-    ProtobufDatum datum = (ProtobufDatum) d;
-    StdDevProto proto = (StdDevProto) datum.get();
-    double delta = proto.getAvg() - StdDevCtx.avg;
-    StdDevCtx.avg += delta * proto.getCount() / (StdDevCtx.count + proto.getCount());
-    StdDevCtx.squareSumOfDiff += proto.getSquareSumOfDiff() + delta * delta * StdDevCtx.count
* proto.getCount() / (StdDevCtx.count + proto.getCount());
-    StdDevCtx.count += proto.getCount();
-  }
-
-  @Override
-  public Datum getPartialResult(FunctionContext ctx) {
-    StdDevContext StdDevCtx = (StdDevContext) ctx;
-    if (StdDevCtx.count == 0) {
-      return NullDatum.get();
-    }
-    StdDevProto.Builder builder = StdDevProto.newBuilder();
-    builder.setSquareSumOfDiff(StdDevCtx.squareSumOfDiff);
-    builder.setAvg(StdDevCtx.avg);
-    builder.setCount(StdDevCtx.count);
-    return new ProtobufDatum(builder.build());
-  }
-
-  @Override
-  public DataType getPartialResultType() {
-    return CatalogUtil.newDataType(Type.PROTOBUF, StdDevProto.class.getName());
-  }
-
-  protected static class StdDevContext implements FunctionContext {
-    double squareSumOfDiff = 0.0;
-    double avg = 0.0;
-    long count = 0;
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevPop.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevPop.java
b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevPop.java
index 3403df1..3a5a017 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevPop.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevPop.java
@@ -24,19 +24,19 @@ import org.apache.tajo.datum.DatumFactory;
 import org.apache.tajo.datum.NullDatum;
 import org.apache.tajo.plan.function.FunctionContext;
 
-public abstract class StdDevPop extends StdDev {
+public abstract class StdDevPop extends Variance {
   public StdDevPop(Column[] definedArgs) {
     super(definedArgs);
   }
 
   @Override
   public Datum terminate(FunctionContext ctx) {
-    StdDevContext StdDevCtx = (StdDevContext) ctx;
-    if (StdDevCtx.count == 0) {
+    VarianceContext varianceCtx = (VarianceContext) ctx;
+    if (varianceCtx.count == 0) {
       return NullDatum.get();
-    } else if (StdDevCtx.count == 1) {
+    } else if (varianceCtx.count == 1) {
       return DatumFactory.createFloat8(0);
     }
-    return DatumFactory.createFloat8(Math.sqrt(StdDevCtx.squareSumOfDiff / StdDevCtx.count));
+    return DatumFactory.createFloat8(Math.sqrt(varianceCtx.squareSumOfDiff / varianceCtx.count));
   }
 }

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevSamp.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevSamp.java
b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevSamp.java
index 60f83a4..f4d56f5 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevSamp.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/StdDevSamp.java
@@ -24,18 +24,18 @@ import org.apache.tajo.datum.DatumFactory;
 import org.apache.tajo.datum.NullDatum;
 import org.apache.tajo.plan.function.FunctionContext;
 
-public abstract class StdDevSamp extends StdDev {
+public abstract class StdDevSamp extends Variance {
   public StdDevSamp(Column[] definedArgs) {
     super(definedArgs);
   }
 
   @Override
   public Datum terminate(FunctionContext ctx) {
-    StdDevContext StdDevCtx = (StdDevContext) ctx;
-    if (StdDevCtx.count <= 1) {
+    VarianceContext varianceCtx = (VarianceContext) ctx;
+    if (varianceCtx.count <= 1) {
       return NullDatum.get();
     }
 
-    return DatumFactory.createFloat8(Math.sqrt(StdDevCtx.squareSumOfDiff / (StdDevCtx.count
- 1)));
+    return DatumFactory.createFloat8(Math.sqrt(varianceCtx.squareSumOfDiff / (varianceCtx.count
- 1)));
   }
 }

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPop.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPop.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPop.java
new file mode 100644
index 0000000..2420302
--- /dev/null
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPop.java
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.builtin;
+
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.DatumFactory;
+import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.plan.function.FunctionContext;
+
+public abstract class VarPop extends Variance {
+  public VarPop(Column[] definedArgs) {
+    super(definedArgs);
+  }
+
+  @Override
+  public Datum terminate(FunctionContext ctx) {
+    VarianceContext varianceCtx = (VarianceContext) ctx;
+    if (varianceCtx.count == 0) {
+      return NullDatum.get();
+    } else if (varianceCtx.count == 1) {
+      return DatumFactory.createFloat8(0);
+    }
+    return DatumFactory.createFloat8(varianceCtx.squareSumOfDiff / varianceCtx.count);
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopDouble.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopDouble.java
b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopDouble.java
new file mode 100644
index 0000000..1f6d733
--- /dev/null
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopDouble.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.builtin;
+
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes.Type;
+import org.apache.tajo.engine.function.annotation.Description;
+import org.apache.tajo.engine.function.annotation.ParamTypes;
+
+@Description(
+    functionName = "VAR_POP",
+    description = "The variance of a set of numbers.",
+    example = "> SELECT VAR_POP(expr);",
+    returnType = Type.FLOAT8,
+    paramTypes = {@ParamTypes(paramTypes = {Type.FLOAT8})}
+)
+public class VarPopDouble extends VarPop {
+  public VarPopDouble() {
+    super(new Column[] {
+        new Column("expr", Type.FLOAT8)
+    });
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopFloat.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopFloat.java
b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopFloat.java
new file mode 100644
index 0000000..6351dfe
--- /dev/null
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopFloat.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.builtin;
+
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes.Type;
+import org.apache.tajo.engine.function.annotation.Description;
+import org.apache.tajo.engine.function.annotation.ParamTypes;
+
+@Description(
+    functionName = "VAR_POP",
+    description = "The variance of a set of numbers.",
+    example = "> SELECT VAR_POP(expr);",
+    returnType = Type.FLOAT8,
+    paramTypes = {@ParamTypes(paramTypes = {Type.FLOAT4})}
+)
+public class VarPopFloat extends VarPop {
+  public VarPopFloat() {
+    super(new Column[] {
+        new Column("expr", Type.FLOAT4)
+    });
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopInt.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopInt.java
b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopInt.java
new file mode 100644
index 0000000..35ec4f3
--- /dev/null
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopInt.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.builtin;
+
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes.Type;
+import org.apache.tajo.engine.function.annotation.Description;
+import org.apache.tajo.engine.function.annotation.ParamTypes;
+
+@Description(
+    functionName = "VAR_POP",
+    description = "The variance of a set of numbers.",
+    example = "> SELECT VAR_POP(expr);",
+    returnType = Type.FLOAT8,
+    paramTypes = {@ParamTypes(paramTypes = {Type.INT4})}
+)
+public class VarPopInt extends VarPop {
+  public VarPopInt() {
+    super(new Column[] {
+        new Column("expr", Type.INT4)
+    });
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopLong.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopLong.java
b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopLong.java
new file mode 100644
index 0000000..14d8b55
--- /dev/null
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarPopLong.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.builtin;
+
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes.Type;
+import org.apache.tajo.engine.function.annotation.Description;
+import org.apache.tajo.engine.function.annotation.ParamTypes;
+
+@Description(
+    functionName = "VAR_POP",
+    description = "The variance of a set of numbers.",
+    example = "> SELECT VAR_POP(expr);",
+    returnType = Type.FLOAT8,
+    paramTypes = {@ParamTypes(paramTypes = {Type.INT8})}
+)
+public class VarPopLong extends VarPop {
+  public VarPopLong() {
+    super(new Column[] {
+        new Column("expr", Type.INT8)
+    });
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSamp.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSamp.java
b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSamp.java
new file mode 100644
index 0000000..2f2ab51
--- /dev/null
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSamp.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.builtin;
+
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.DatumFactory;
+import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.plan.function.FunctionContext;
+
+public abstract class VarSamp extends Variance {
+  public VarSamp(Column[] definedArgs) {
+    super(definedArgs);
+  }
+
+  @Override
+  public Datum terminate(FunctionContext ctx) {
+    VarianceContext varianceCtx = (VarianceContext) ctx;
+    if (varianceCtx.count <= 1) {
+      return NullDatum.get();
+    }
+    return DatumFactory.createFloat8(varianceCtx.squareSumOfDiff / (varianceCtx.count - 1));
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampDouble.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampDouble.java
b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampDouble.java
new file mode 100644
index 0000000..de4ee7f
--- /dev/null
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampDouble.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.builtin;
+
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes.Type;
+import org.apache.tajo.engine.function.annotation.Description;
+import org.apache.tajo.engine.function.annotation.ParamTypes;
+
+@Description(
+    functionName = "VAR_SAMP",
+    description = "The unbiased sample variance of a set of numbers.",
+    example = "> SELECT VAR_SAMP(expr);",
+    returnType = Type.FLOAT8,
+    paramTypes = {@ParamTypes(paramTypes = {Type.FLOAT8})}
+)
+public class VarSampDouble extends VarSamp {
+  public VarSampDouble() {
+    super(new Column[] {
+        new Column("expr", Type.FLOAT8)
+    });
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampFloat.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampFloat.java
b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampFloat.java
new file mode 100644
index 0000000..014affc
--- /dev/null
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampFloat.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.builtin;
+
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes.Type;
+import org.apache.tajo.engine.function.annotation.Description;
+import org.apache.tajo.engine.function.annotation.ParamTypes;
+
+@Description(
+    functionName = "VAR_SAMP",
+    description = "The unbiased sample variance of a set of numbers.",
+    example = "> SELECT VAR_SAMP(expr);",
+    returnType = Type.FLOAT8,
+    paramTypes = {@ParamTypes(paramTypes = {Type.FLOAT4})}
+)
+public class VarSampFloat extends VarSamp {
+  public VarSampFloat() {
+    super(new Column[] {
+        new Column("expr", Type.FLOAT4)
+    });
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampInt.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampInt.java
b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampInt.java
new file mode 100644
index 0000000..1e50b9c
--- /dev/null
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampInt.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.builtin;
+
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes.Type;
+import org.apache.tajo.engine.function.annotation.Description;
+import org.apache.tajo.engine.function.annotation.ParamTypes;
+
+@Description(
+    functionName = "VAR_SAMP",
+    description = "The unbiased sample variance of a set of numbers.",
+    example = "> SELECT VAR_SAMP(expr);",
+    returnType = Type.FLOAT8,
+    paramTypes = {@ParamTypes(paramTypes = {Type.INT4})}
+)
+public class VarSampInt extends VarSamp {
+  public VarSampInt() {
+    super(new Column[] {
+        new Column("expr", Type.INT4)
+    });
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampLong.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampLong.java
b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampLong.java
new file mode 100644
index 0000000..a0a5a58
--- /dev/null
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/VarSampLong.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.builtin;
+
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes.Type;
+import org.apache.tajo.engine.function.annotation.Description;
+import org.apache.tajo.engine.function.annotation.ParamTypes;
+
+@Description(
+    functionName = "VAR_SAMP",
+    description = "The unbiased sample variance of a set of numbers.",
+    example = "> SELECT VAR_SAMP(expr);",
+    returnType = Type.FLOAT8,
+    paramTypes = {@ParamTypes(paramTypes = {Type.INT8})}
+)
+public class VarSampLong extends VarSamp {
+  public VarSampLong() {
+    super(new Column[] {
+        new Column("expr", Type.INT8)
+    });
+  }
+}

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/Variance.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/Variance.java
b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/Variance.java
new file mode 100644
index 0000000..3d925d1
--- /dev/null
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/Variance.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.builtin;
+
+import org.apache.tajo.catalog.CatalogUtil;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes.DataType;
+import org.apache.tajo.common.TajoDataTypes.Type;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.datum.ProtobufDatum;
+import org.apache.tajo.plan.function.AggFunction;
+import org.apache.tajo.plan.function.FunctionContext;
+import org.apache.tajo.storage.Tuple;
+
+import static org.apache.tajo.InternalTypes.VarianceProto;
+
+public abstract class Variance extends AggFunction<Datum> {
+
+  public Variance(Column[] definedArgs) {
+    super(definedArgs);
+  }
+
+  public VarianceContext newContext() {
+    return new VarianceContext();
+  }
+
+  @Override
+  public void eval(FunctionContext ctx, Tuple params) {
+    VarianceContext varianceCtx = (VarianceContext) ctx;
+    Datum datum = params.get(0);
+    if (datum.isNotNull()) {
+      double delta = datum.asFloat8() - varianceCtx.avg;
+      varianceCtx.count++;
+      varianceCtx.avg += delta/varianceCtx.count;
+      varianceCtx.squareSumOfDiff += delta * (datum.asFloat8() - varianceCtx.avg);
+    }
+  }
+
+  @Override
+  public void merge(FunctionContext ctx, Tuple part) {
+    VarianceContext varianceCtx = (VarianceContext) ctx;
+    Datum d = part.get(0);
+    if (d instanceof NullDatum) {
+      return;
+    }
+    ProtobufDatum datum = (ProtobufDatum) d;
+    VarianceProto proto = (VarianceProto) datum.get();
+    double delta = proto.getAvg() - varianceCtx.avg;
+    varianceCtx.avg += delta * proto.getCount() / (varianceCtx.count + proto.getCount());
+    varianceCtx.squareSumOfDiff += proto.getSquareSumOfDiff() + delta * delta * varianceCtx.count
* proto.getCount() / (varianceCtx.count + proto.getCount());
+    varianceCtx.count += proto.getCount();
+  }
+
+  @Override
+  public Datum getPartialResult(FunctionContext ctx) {
+    VarianceContext varianceCtx = (VarianceContext) ctx;
+    if (varianceCtx.count == 0) {
+      return NullDatum.get();
+    }
+    VarianceProto.Builder builder = VarianceProto.newBuilder();
+    builder.setSquareSumOfDiff(varianceCtx.squareSumOfDiff);
+    builder.setAvg(varianceCtx.avg);
+    builder.setCount(varianceCtx.count);
+    return new ProtobufDatum(builder.build());
+  }
+
+  @Override
+  public DataType getPartialResultType() {
+    return CatalogUtil.newDataType(Type.PROTOBUF, VarianceProto.class.getName());
+  }
+
+  protected static class VarianceContext implements FunctionContext {
+    double squareSumOfDiff = 0.0;
+    double avg = 0.0;
+    long count = 0;
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/main/proto/InternalTypes.proto
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/proto/InternalTypes.proto b/tajo-core/src/main/proto/InternalTypes.proto
index 7108991..13dd107 100644
--- a/tajo-core/src/main/proto/InternalTypes.proto
+++ b/tajo-core/src/main/proto/InternalTypes.proto
@@ -31,7 +31,7 @@ message AvgDoubleProto {
   required int64 count = 2;
 }
 
-message StdDevProto {
+message VarianceProto {
   required double squareSumOfDiff = 1;
   required double avg = 2;
   required int64 count = 3;

http://git-wip-us.apache.org/repos/asf/tajo/blob/14a1e536/tajo-core/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java
b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java
index d9d9b77..6bec3ad 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java
@@ -597,6 +597,129 @@ public class TestBuiltinFunctions extends QueryTestCaseBase {
 
   }
 
+  @Test
+  public void testVarSamp() throws Exception {
+    KeyValueSet tableOptions = new KeyValueSet();
+    tableOptions.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);
+    tableOptions.set(StorageConstants.TEXT_NULL, "\\\\N");
+
+    Schema schema = new Schema();
+    schema.addColumn("id", TajoDataTypes.Type.INT4);
+    schema.addColumn("value_int", TajoDataTypes.Type.INT4);
+    schema.addColumn("value_long", TajoDataTypes.Type.INT8);
+    schema.addColumn("value_float", TajoDataTypes.Type.FLOAT4);
+    schema.addColumn("value_double", TajoDataTypes.Type.FLOAT8);
+    String[] data = new String[]{
+            "1|\\N|-111|1.2|-50.5",
+            "2|1|\\N|\\N|52.5",
+            "3|2|-333|2.8|\\N" };
+    TajoTestingCluster.createTable("table11", schema, tableOptions, data, 1);
+
+    try {
+      ResultSet res = executeString("select var_samp(value_int) as vs_int, var_samp(value_long)
as vs_long, var_samp(value_float) as vs_float, var_samp(value_double) as vs_double from table11");
+      String ascExpected = "vs_int,vs_long,vs_float,vs_double\n" +
+              "-------------------------------\n" +
+              "0.5,24642.0,1.279999847412114,5304.5\n";
+
+      assertEquals(ascExpected, resultSetToString(res));
+      res.close();
+    } finally {
+      executeString("DROP TABLE table11 PURGE");
+    }
+  }
+
+  @Test
+  public void testVarSampWithFewNumbers() throws Exception {
+    KeyValueSet tableOptions = new KeyValueSet();
+    tableOptions.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);
+    tableOptions.set(StorageConstants.TEXT_NULL, "\\\\N");
+
+    Schema schema = new Schema();
+    schema.addColumn("id", TajoDataTypes.Type.INT4);
+    schema.addColumn("value_int", TajoDataTypes.Type.INT4);
+    schema.addColumn("value_long", TajoDataTypes.Type.INT8);
+    schema.addColumn("value_float", TajoDataTypes.Type.FLOAT4);
+    schema.addColumn("value_double", TajoDataTypes.Type.FLOAT8);
+    String[] data = new String[]{
+            "1|\\N|\\N|\\N|-50.5",
+            "2|1|\\N|\\N|\\N",
+            "3|\\N|\\N|\\N|\\N" };
+    TajoTestingCluster.createTable("table11", schema, tableOptions, data, 1);
+
+    try {
+      ResultSet res = executeString("select var_samp(value_int) as vsamp_int, var_samp(value_long)
as vsamp_long, var_samp(value_float) as vsamp_float, var_samp(value_double) as vsamp_double
from table11");
+      String ascExpected = "vsamp_int,vsamp_long,vsamp_float,vsamp_double\n" +
+              "-------------------------------\n" +
+              "null,null,null,null\n";
+
+      assertEquals(ascExpected, resultSetToString(res));
+      res.close();
+    } finally {
+      executeString("DROP TABLE table11 PURGE");
+    }
+  }
+
+  @Test
+  public void testVarPop() throws Exception {
+    KeyValueSet tableOptions = new KeyValueSet();
+    tableOptions.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);
+    tableOptions.set(StorageConstants.TEXT_NULL, "\\\\N");
+
+    Schema schema = new Schema();
+    schema.addColumn("id", TajoDataTypes.Type.INT4);
+    schema.addColumn("value_int", TajoDataTypes.Type.INT4);
+    schema.addColumn("value_long", TajoDataTypes.Type.INT8);
+    schema.addColumn("value_float", TajoDataTypes.Type.FLOAT4);
+    schema.addColumn("value_double", TajoDataTypes.Type.FLOAT8);
+    String[] data = new String[]{
+            "1|\\N|-111|1.2|-50.5",
+            "2|1|\\N|\\N|52.5",
+            "3|2|-333|2.8|\\N" };
+    TajoTestingCluster.createTable("table11", schema, tableOptions, data, 1);
+
+    try {
+      ResultSet res = executeString("select var_pop(value_int) as vpop_int, var_pop(value_long)
as vpop_long, var_pop(value_float) as vpop_float, var_pop(value_double) as vpop_double from
table11");
+      String ascExpected = "vpop_int,vpop_long,vpop_float,vpop_double\n" +
+              "-------------------------------\n" +
+              "0.25,12321.0,0.639999923706057,2652.25\n";
+
+      assertEquals(ascExpected, resultSetToString(res));
+      res.close();
+    } finally {
+      executeString("DROP TABLE table11 PURGE");
+    }
+  }
+
+  @Test
+  public void testVarPopWithFewNumbers() throws Exception {
+    KeyValueSet tableOptions = new KeyValueSet();
+    tableOptions.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);
+    tableOptions.set(StorageConstants.TEXT_NULL, "\\\\N");
+
+    Schema schema = new Schema();
+    schema.addColumn("id", TajoDataTypes.Type.INT4);
+    schema.addColumn("value_int", TajoDataTypes.Type.INT4);
+    schema.addColumn("value_long", TajoDataTypes.Type.INT8);
+    schema.addColumn("value_float", TajoDataTypes.Type.FLOAT4);
+    schema.addColumn("value_double", TajoDataTypes.Type.FLOAT8);
+    String[] data = new String[]{
+            "1|\\N|\\N|\\N|-50.5",
+            "2|1|\\N|\\N|\\N",
+            "3|\\N|\\N|\\N|\\N" };
+    TajoTestingCluster.createTable("table11", schema, tableOptions, data, 1);
+
+    try {
+      ResultSet res = executeString("select var_pop(value_int) as vpop_int, var_pop(value_long)
as vpop_long, var_pop(value_float) as vpop_float, var_pop(value_double) as vpop_double from
table11");
+      String ascExpected = "vpop_int,vpop_long,vpop_float,vpop_double\n" +
+              "-------------------------------\n" +
+              "0.0,null,null,0.0\n";
+
+      assertEquals(ascExpected, resultSetToString(res));
+      res.close();
+    } finally {
+      executeString("DROP TABLE table11 PURGE");
+    }
+  }
 
 //  @Test
 //  public void testRandom() throws Exception {


Mime
View raw message