hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From c..@apache.org
Subject svn commit: r1220933 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/parse/ java/org/apache/hadoop/hive/ql/udf/generic/ test/queries/clientpositive/ test/results/clientpositive/
Date Mon, 19 Dec 2011 20:25:48 GMT
Author: cws
Date: Mon Dec 19 20:25:48 2011
New Revision: 1220933

URL: http://svn.apache.org/viewvc?rev=1220933&view=rev
Log:
HIVE-2005. Implement BETWEEN operator (Navis via cws)

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java
    hive/trunk/ql/src/test/queries/clientpositive/udf_between.q
    hive/trunk/ql/src/test/results/clientpositive/udf_between.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
    hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1220933&r1=1220932&r2=1220933&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Mon Dec 19
20:25:48 2011
@@ -152,6 +152,7 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayContains;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAssertTrue;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCoalesce;
@@ -362,6 +363,7 @@ public final class FunctionRegistry {
     registerGenericUDF(">=", GenericUDFOPEqualOrGreaterThan.class);
     registerGenericUDF("not", GenericUDFOPNot.class);
     registerGenericUDF("!", GenericUDFOPNot.class);
+    registerGenericUDF("between", GenericUDFBetween.class);
 
     registerGenericUDF("ewah_bitmap_and", GenericUDFEWAHBitmapAnd.class);
     registerGenericUDF("ewah_bitmap_or", GenericUDFEWAHBitmapOr.class);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g?rev=1220933&r1=1220932&r2=1220933&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g Mon Dec 19 20:25:48 2011
@@ -2003,6 +2003,10 @@ precedenceEqualExpression
        -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions))
     | (KW_IN expressions)
        -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions)
+    | ( KW_NOT KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression)
)
+       -> ^(TOK_FUNCTION Identifier["between"] KW_TRUE $left $min $max)
+    | ( KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression)
)
+       -> ^(TOK_FUNCTION Identifier["between"] KW_FALSE $left $min $max)
     )*
     ;
 
@@ -2106,6 +2110,7 @@ sysFuncNames
     | KW_RLIKE
     | KW_REGEXP
     | KW_IN
+    | KW_BETWEEN
     ;
 
 descFuncNames

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java?rev=1220933&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBetween.java Mon
Dec 19 20:25:48 2011
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.BooleanWritable;
+
+@Description(name = "between", value = "_FUNC_ a [NOT] BETWEEN b AND c - evaluate if a is
[not] in between b and c")
+public class GenericUDFBetween extends GenericUDF {
+
+  GenericUDFOPEqualOrGreaterThan egt = new GenericUDFOPEqualOrGreaterThan();
+  GenericUDFOPEqualOrLessThan elt = new GenericUDFOPEqualOrLessThan();
+
+  private ObjectInspector[] argumentOIs;
+  private final BooleanWritable result = new BooleanWritable();
+
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException
{
+    if (!arguments[0].getTypeName().equals("boolean")) {
+      throw new UDFArgumentTypeException(0, "First argument for BETWEEN should be boolean
type");
+    }
+    egt.initialize(new ObjectInspector[] {arguments[1], arguments[2]});
+    elt.initialize(new ObjectInspector[] {arguments[1], arguments[3]});
+
+    argumentOIs = arguments;
+    return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector;
+  }
+
+  @Override
+  public Object evaluate(DeferredObject[] arguments) throws HiveException {
+    boolean invert = (Boolean) ((PrimitiveObjectInspector) argumentOIs[0])
+				.getPrimitiveJavaObject(arguments[0].get());
+
+    BooleanWritable left = ((BooleanWritable)egt.evaluate(new DeferredObject[] {arguments[1],
arguments[2]}));
+    if (left == null) {
+      return null;
+    }
+    if (!invert && !left.get()) {
+      result.set(false);
+      return result;
+    }
+    BooleanWritable right = ((BooleanWritable)elt.evaluate(new DeferredObject[] {arguments[1],
arguments[3]}));
+    if (right == null) {
+      return null;
+    }
+    boolean between = left.get() && right.get();
+    result.set(invert ? !between : between);
+    return result;
+  }
+
+  @Override
+  public String getDisplayString(String[] children) {
+    StringBuilder sb = new StringBuilder();
+    sb.append(children[1]);
+    if (Boolean.valueOf(children[0])) {
+      sb.append(" NOT");
+    }
+    sb.append(" BETWEEN ");
+    sb.append(children[2]).append(" AND ").append(children[3]);
+    return sb.toString();
+  }
+}

Added: hive/trunk/ql/src/test/queries/clientpositive/udf_between.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udf_between.q?rev=1220933&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udf_between.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/udf_between.q Mon Dec 19 20:25:48 2011
@@ -0,0 +1,14 @@
+describe function between;
+describe function extended between;
+
+explain SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20;
+SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT 20;
+
+explain SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20;
+SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50) LIMIT 20;
+
+explain SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1;
+SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1;
+
+explain SELECT * FROM src where 2 between 2 AND '3' LIMIT 1;
+SELECT * FROM src where 2 between 2 AND '3' LIMIT 1;

Modified: hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out?rev=1220933&r1=1220932&r2=1220933&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Mon Dec 19 20:25:48
2011
@@ -28,6 +28,7 @@ asin
 assert_true
 atan
 avg
+between
 bin
 case
 ceil

Added: hive/trunk/ql/src/test/results/clientpositive/udf_between.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udf_between.q.out?rev=1220933&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udf_between.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/udf_between.q.out Mon Dec 19 20:25:48 2011
@@ -0,0 +1,252 @@
+PREHOOK: query: describe function between
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: describe function between
+POSTHOOK: type: DESCFUNCTION
+between a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c
+PREHOOK: query: describe function extended between
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: describe function extended between
+POSTHOOK: type: DESCFUNCTION
+between a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c
+PREHOOK: query: explain SELECT * FROM src where key + 100 between (150 + -50) AND (150 +
50) LIMIT 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain SELECT * FROM src where key + 100 between (150 + -50) AND (150 +
50) LIMIT 20
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR
TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_FALSE
(+ (TOK_TABLE_OR_COL key) 100) (+ 150 (- 50)) (+ 150 50))) (TOK_LIMIT 20)))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: (key + 100) BETWEEN (150 + (- 50)) AND (150 + 50)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                Limit
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+
+
+PREHOOK: query: SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT
20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM src where key + 100 between (150 + -50) AND (150 + 50) LIMIT
20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+86	val_86
+27	val_27
+98	val_98
+66	val_66
+37	val_37
+15	val_15
+82	val_82
+17	val_17
+0	val_0
+57	val_57
+20	val_20
+92	val_92
+47	val_47
+72	val_72
+4	val_4
+35	val_35
+54	val_54
+51	val_51
+65	val_65
+83	val_83
+PREHOOK: query: explain SELECT * FROM src where key + 100 not between (150 + -50) AND (150
+ 50) LIMIT 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain SELECT * FROM src where key + 100 not between (150 + -50) AND (150
+ 50) LIMIT 20
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR
TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_TRUE
(+ (TOK_TABLE_OR_COL key) 100) (+ 150 (- 50)) (+ 150 50))) (TOK_LIMIT 20)))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: (key + 100) NOT BETWEEN (150 + (- 50)) AND (150 + 50)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                Limit
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+
+
+PREHOOK: query: SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50)
LIMIT 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM src where key + 100 not between (150 + -50) AND (150 + 50)
LIMIT 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238	val_238
+311	val_311
+165	val_165
+409	val_409
+255	val_255
+278	val_278
+484	val_484
+265	val_265
+193	val_193
+401	val_401
+150	val_150
+273	val_273
+224	val_224
+369	val_369
+128	val_128
+213	val_213
+146	val_146
+406	val_406
+429	val_429
+374	val_374
+PREHOOK: query: explain SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR
TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_FALSE
'b' 'a' 'c')) (TOK_LIMIT 1)))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: 'b' BETWEEN 'a' AND 'c'
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                Limit
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+
+
+PREHOOK: query: SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238	val_238
+PREHOOK: query: explain SELECT * FROM src where 2 between 2 AND '3' LIMIT 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain SELECT * FROM src where 2 between 2 AND '3' LIMIT 1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR
TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION between KW_FALSE
2 2 '3')) (TOK_LIMIT 1)))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Filter Operator
+              predicate:
+                  expr: 2 BETWEEN 2 AND '3'
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                outputColumnNames: _col0, _col1
+                Limit
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+
+
+PREHOOK: query: SELECT * FROM src where 2 between 2 AND '3' LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM src where 2 between 2 AND '3' LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+238	val_238



Mime
View raw message