hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jd...@apache.org
Subject [1/6] hive git commit: HIVE-15269: Dynamic Min-Max/BloomFilter runtime-filtering for Tez (Deepak Jaiswal via Jason Dere)
Date Tue, 24 Jan 2017 20:02:48 GMT
Repository: hive
Updated Branches:
  refs/heads/master 3040f6e71 -> cc3fd84ee


http://git-wip-us.apache.org/repos/asf/hive/blob/cc3fd84e/ql/src/test/results/clientpositive/perf/query83.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query83.q.out b/ql/src/test/results/clientpositive/perf/query83.q.out
index 004dc41..ee448d4 100644
--- a/ql/src/test/results/clientpositive/perf/query83.q.out
+++ b/ql/src/test/results/clientpositive/perf/query83.q.out
@@ -190,7 +190,7 @@ Stage-0
                               <-Reducer 16 [SIMPLE_EDGE]
                                 SHUFFLE [RS_76]
                                   PartitionCols:_col0
-                                  Merge Join Operator [MERGEJOIN_224] (rows=63350266 width=77)
+                                  Merge Join Operator [MERGEJOIN_222] (rows=63350266 width=77)
                                     Conds:RS_73._col1=RS_74._col0(Inner),Output:["_col0","_col2","_col4"]
                                   <-Map 15 [SIMPLE_EDGE]
                                     SHUFFLE [RS_73]
@@ -213,7 +213,7 @@ Stage-0
                               <-Reducer 21 [SIMPLE_EDGE]
                                 SHUFFLE [RS_77]
                                   PartitionCols:_col0
-                                  Merge Join Operator [MERGEJOIN_225] (rows=80353 width=1119)
+                                  Merge Join Operator [MERGEJOIN_224] (rows=80353 width=1119)
                                     Conds:RS_69._col1=RS_70._col0(Inner),Output:["_col0"]
                                   <-Map 20 [SIMPLE_EDGE]
                                     SHUFFLE [RS_69]
@@ -234,7 +234,7 @@ Stage-0
                                           PartitionCols:_col0
                                           Group By Operator [GBY_65] (rows=80353 width=1119)
                                             Output:["_col0"],keys:_col0
-                                            Merge Join Operator [MERGEJOIN_220] (rows=80353
width=1119)
+                                            Merge Join Operator [MERGEJOIN_223] (rows=80353
width=1119)
                                               Conds:RS_61._col1=RS_62._col0(Inner),Output:["_col0"]
                                             <-Map 22 [SIMPLE_EDGE]
                                               SHUFFLE [RS_61]
@@ -278,7 +278,7 @@ Stage-0
                               <-Reducer 28 [SIMPLE_EDGE]
                                 SHUFFLE [RS_119]
                                   PartitionCols:_col0
-                                  Merge Join Operator [MERGEJOIN_226] (rows=15838314 width=92)
+                                  Merge Join Operator [MERGEJOIN_225] (rows=15838314 width=92)
                                     Conds:RS_116._col1=RS_117._col0(Inner),Output:["_col0","_col2","_col4"]
                                   <-Map 27 [SIMPLE_EDGE]
                                     SHUFFLE [RS_116]
@@ -322,7 +322,7 @@ Stage-0
                                           PartitionCols:_col0
                                           Group By Operator [GBY_108] (rows=80353 width=1119)
                                             Output:["_col0"],keys:_col0
-                                            Merge Join Operator [MERGEJOIN_221] (rows=80353
width=1119)
+                                            Merge Join Operator [MERGEJOIN_226] (rows=80353
width=1119)
                                               Conds:RS_104._col1=RS_105._col0(Inner),Output:["_col0"]
                                             <-Map 34 [SIMPLE_EDGE]
                                               SHUFFLE [RS_104]
@@ -366,7 +366,7 @@ Stage-0
                               <-Reducer 2 [SIMPLE_EDGE]
                                 SHUFFLE [RS_33]
                                   PartitionCols:_col0
-                                  Merge Join Operator [MERGEJOIN_222] (rows=31678769 width=106)
+                                  Merge Join Operator [MERGEJOIN_219] (rows=31678769 width=106)
                                     Conds:RS_30._col1=RS_31._col0(Inner),Output:["_col0","_col2","_col4"]
                                   <-Map 1 [SIMPLE_EDGE]
                                     SHUFFLE [RS_30]
@@ -389,7 +389,7 @@ Stage-0
                               <-Reducer 9 [SIMPLE_EDGE]
                                 SHUFFLE [RS_34]
                                   PartitionCols:_col0
-                                  Merge Join Operator [MERGEJOIN_223] (rows=80353 width=1119)
+                                  Merge Join Operator [MERGEJOIN_221] (rows=80353 width=1119)
                                     Conds:RS_26._col1=RS_27._col0(Inner),Output:["_col0"]
                                   <-Map 8 [SIMPLE_EDGE]
                                     SHUFFLE [RS_26]
@@ -410,7 +410,7 @@ Stage-0
                                           PartitionCols:_col0
                                           Group By Operator [GBY_22] (rows=80353 width=1119)
                                             Output:["_col0"],keys:_col0
-                                            Merge Join Operator [MERGEJOIN_219] (rows=80353
width=1119)
+                                            Merge Join Operator [MERGEJOIN_220] (rows=80353
width=1119)
                                               Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col0"]
                                             <-Map 10 [SIMPLE_EDGE]
                                               SHUFFLE [RS_18]

http://git-wip-us.apache.org/repos/asf/hive/blob/cc3fd84e/ql/src/test/results/clientpositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index 90b86c3..b8daea9 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -36,6 +36,7 @@ avg
 base64
 between
 bin
+bloom_filter
 bround
 case
 cbrt
@@ -109,6 +110,7 @@ histogram_numeric
 hour
 if
 in
+in_bloom_filter
 in_file
 index
 initcap

http://git-wip-us.apache.org/repos/asf/hive/blob/cc3fd84e/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
index d15c83f..9f1a401 100644
--- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out
@@ -820,34 +820,101 @@ POSTHOOK: query: explain analyze
 select a.key, a.value, b.value
 from tab a join tab_part b on a.key = b.key
 POSTHOOK: type: QUERY
-Plan optimized by CBO.
-
-Vertex dependency in root stage
-Map 2 <- Map 1 (CUSTOM_EDGE)
-
-Stage-0
-  Fetch Operator
-    limit:-1
-    Stage-1
-      Map 2
-      File Output Operator [FS_10]
-        Select Operator [SEL_9] (rows=550/480 width=18)
-          Output:["_col0","_col1","_col2"]
-          Map Join Operator [MAPJOIN_15] (rows=550/480 width=18)
-            BucketMapJoin:true,Conds:RS_6._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"]
-          <-Map 1 [CUSTOM_EDGE]
-            MULTICAST [RS_6]
-              PartitionCols:_col0
-              Select Operator [SEL_2] (rows=242/242 width=18)
-                Output:["_col0","_col1"]
-                Filter Operator [FIL_13] (rows=242/242 width=18)
-                  predicate:key is not null
-                  TableScan [TS_0] (rows=242/242 width=18)
-                    default@tab,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
-          <-Select Operator [SEL_5] (rows=500/500 width=18)
-              Output:["_col0","_col1"]
-              Filter Operator [FIL_14] (rows=500/500 width=18)
-                predicate:key is not null
-                TableScan [TS_3] (rows=500/500 width=18)
-                  default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 3 <- Map 1 (CUSTOM_EDGE), Reducer 2 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE Column
stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE Column
stats: NONE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE
Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE
Column stats: NONE
+                        value expressions: _col1 (type: string)
+                      Select Operator
+                        expressions: _col0 (type: int)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 242/242 Data size: 4502 Basic stats: COMPLETE
Column stats: NONE
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1/3 Data size: 12 Basic stats: COMPLETE Column
stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1/3 Data size: 12 Basic stats: COMPLETE
Column stats: NONE
+                            value expressions: _col0 (type: int), _col1 (type: int), _col2
(type: binary)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: (key is not null and key BETWEEN DynamicValue(RS_6_a_key_min)
AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))
(type: boolean)
+                  Statistics: Num rows: 500/500 Data size: 9312 Basic stats: COMPLETE Column
stats: NONE
+                  Filter Operator
+                    predicate: (key is not null and key BETWEEN DynamicValue(RS_6_a_key_min)
AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))
(type: boolean)
+                    Statistics: Num rows: 500/244 Data size: 9312 Basic stats: COMPLETE Column
stats: NONE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 500/244 Data size: 9312 Basic stats: COMPLETE
Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        outputColumnNames: _col0, _col1, _col3
+                        input vertices:
+                          0 Map 1
+                        Statistics: Num rows: 550/480 Data size: 10243 Basic stats: COMPLETE
Column stats: NONE
+                        HybridGraceHashJoin: true
+                        Select Operator
+                          expressions: _col0 (type: int), _col1 (type: string), _col3 (type:
string)
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 550/480 Data size: 10243 Basic stats: COMPLETE
Column stats: NONE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 550/480 Data size: 10243 Basic stats: COMPLETE
Column stats: NONE
+                            table:
+                                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2,
expectedEntries=242)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1/1 Data size: 12 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1/1 Data size: 12 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
 

http://git-wip-us.apache.org/repos/asf/hive/blob/cc3fd84e/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
index 2db8b5e..fbf61ef 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out
@@ -31,13 +31,13 @@ Stage-0
     Stage-1
       Reducer 2 vectorized
       File Output Operator [FS_8]
-        Select Operator [SEL_7] (rows=16 width=107)
+        Select Operator [SEL_7] (rows=16 width=106)
           Output:["_col0","_col1"]
         <-Map 1 [SIMPLE_EDGE] vectorized
           SHUFFLE [RS_6]
-            Select Operator [SEL_5] (rows=16 width=107)
+            Select Operator [SEL_5] (rows=16 width=106)
               Output:["_col0","_col1"]
-              TableScan [TS_0] (rows=16 width=107)
+              TableScan [TS_0] (rows=16 width=106)
                 default@acid_vectorized,acid_vectorized, ACID table,Tbl:COMPLETE,Col:NONE,Output:["a","b"]
 
 PREHOOK: query: explain select key, value
@@ -640,34 +640,101 @@ POSTHOOK: query: explain
 select a.key, a.value, b.value
 from tab a join tab_part b on a.key = b.key
 POSTHOOK: type: QUERY
-Plan optimized by CBO.
-
-Vertex dependency in root stage
-Map 2 <- Map 1 (CUSTOM_EDGE)
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
-Stage-0
-  Fetch Operator
-    limit:-1
-    Stage-1
-      Map 2
-      File Output Operator [FS_10]
-        Select Operator [SEL_9] (rows=550 width=18)
-          Output:["_col0","_col1","_col2"]
-          Map Join Operator [MAPJOIN_15] (rows=550 width=18)
-            BucketMapJoin:true,Conds:RS_6._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"]
-          <-Map 1 [CUSTOM_EDGE]
-            MULTICAST [RS_6]
-              PartitionCols:_col0
-              Select Operator [SEL_2] (rows=242 width=18)
-                Output:["_col0","_col1"]
-                Filter Operator [FIL_13] (rows=242 width=18)
-                  predicate:key is not null
-                  TableScan [TS_0] (rows=242 width=18)
-                    default@tab,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
-          <-Select Operator [SEL_5] (rows=500 width=18)
-              Output:["_col0","_col1"]
-              Filter Operator [FIL_14] (rows=500 width=18)
-                predicate:key is not null
-                TableScan [TS_3] (rows=500 width=18)
-                  default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"]
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 3 <- Map 1 (CUSTOM_EDGE), Reducer 2 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column
stats: NONE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column
stats: NONE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column
stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column
stats: NONE
+                        value expressions: _col1 (type: string)
+                      Select Operator
+                        expressions: _col0 (type: int)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column
stats: NONE
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=242)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column
stats: NONE
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column
stats: NONE
+                            value expressions: _col0 (type: int), _col1 (type: int), _col2
(type: binary)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: (key is not null and key BETWEEN DynamicValue(RS_6_a_key_min)
AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))
(type: boolean)
+                  Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column
stats: NONE
+                  Filter Operator
+                    predicate: (key is not null and key BETWEEN DynamicValue(RS_6_a_key_min)
AND DynamicValue(RS_6_a_key_max) and in_bloom_filter(key, DynamicValue(RS_6_a_key_bloom_filter)))
(type: boolean)
+                    Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column
stats: NONE
+                    Select Operator
+                      expressions: key (type: int), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column
stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        outputColumnNames: _col0, _col1, _col3
+                        input vertices:
+                          0 Map 1
+                        Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE
Column stats: NONE
+                        HybridGraceHashJoin: true
+                        Select Operator
+                          expressions: _col0 (type: int), _col1 (type: string), _col3 (type:
string)
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE
Column stats: NONE
+                          File Output Operator
+                            compressed: false
+                            Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE
Column stats: NONE
+                            table:
+                                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 2 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2,
expectedEntries=242)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats:
NONE
+                  value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
 

http://git-wip-us.apache.org/repos/asf/hive/blob/cc3fd84e/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/LiteralDelegate.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/LiteralDelegate.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/LiteralDelegate.java
new file mode 100644
index 0000000..bd8a5ce
--- /dev/null
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/LiteralDelegate.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.sarg;
+
+import org.apache.hadoop.conf.Configurable;
+
+/**
+ * Interface to retrieve a literal value
+ */
+public interface LiteralDelegate extends Configurable {
+
+  Object getLiteral();
+
+  String getId();
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/cc3fd84e/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
index 8fda95c..3c10c83 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java
@@ -18,13 +18,20 @@
 
 package org.apache.hadoop.hive.ql.io.sarg;
 
+import org.apache.hadoop.conf.Configuration;
+
 /**
  * A factory for creating SearchArguments, as well as modifying those created by this factory.
  */
 public class SearchArgumentFactory {
   public static SearchArgument.Builder newBuilder() {
-    return new SearchArgumentImpl.BuilderImpl();
+    return newBuilder(null);
+  }
+
+  public static SearchArgument.Builder newBuilder(Configuration conf) {
+    return new SearchArgumentImpl.BuilderImpl(conf);
   }
+
   public static void setPredicateLeafColumn(PredicateLeaf leaf, String newName) {
     SearchArgumentImpl.PredicateLeafImpl.setColumnName(leaf, newName);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/cc3fd84e/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
index 10d8c51..db0a582 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
@@ -31,6 +31,8 @@ import java.util.Map;
 import java.util.Queue;
 import java.util.Set;
 
+import org.apache.hadoop.conf.Configuration;
+
 /**
  * The implementation of SearchArguments. Visible for testing only.
  */
@@ -57,27 +59,17 @@ public final class SearchArgumentImpl implements SearchArgument {
                              Type type,
                              String columnName,
                              Object literal,
-                             List<Object> literalList) {
+                             List<Object> literalList, Configuration conf) {
       this.operator = operator;
       this.type = type;
       this.columnName = columnName;
       this.literal = literal;
-      if (literal != null) {
-        if (literal.getClass() != type.getValueClass()) {
-          throw new IllegalArgumentException("Wrong value class " +
-              literal.getClass().getName() + " for " + type + "." + operator +
-              " leaf");
-        }
-      }
+      checkLiteralType(literal, type, conf);
       this.literalList = literalList;
       if (literalList != null) {
         Class valueCls = type.getValueClass();
         for(Object lit: literalList) {
-          if (lit != null && lit.getClass() != valueCls) {
-            throw new IllegalArgumentException("Wrong value class item " +
-                lit.getClass().getName() + " for " + type + "." + operator +
-                " leaf");
-          }
+          checkLiteralType(lit, type, conf);
         }
       }
     }
@@ -99,6 +91,10 @@ public final class SearchArgumentImpl implements SearchArgument {
 
     @Override
     public Object getLiteral() {
+      if (literal instanceof LiteralDelegate) {
+        return ((LiteralDelegate) literal).getLiteral();
+      }
+
       // To get around a kryo 2.22 bug while deserialize a Timestamp into Date
       // (https://github.com/EsotericSoftware/kryo/issues/88)
       // When we see a Date, convert back into Timestamp
@@ -110,6 +106,13 @@ public final class SearchArgumentImpl implements SearchArgument {
 
     @Override
     public List<Object> getLiteralList() {
+      if (literalList != null && literalList.size() > 0 && literalList.get(0)
instanceof LiteralDelegate) {
+        List<Object> newLiteraList = new ArrayList<Object>();
+        for (Object litertalObj : literalList) {
+          newLiteraList.add(((LiteralDelegate) litertalObj).getLiteral());
+        }
+        return newLiteraList;
+      }
       return literalList;
     }
 
@@ -169,6 +172,23 @@ public final class SearchArgumentImpl implements SearchArgument {
       assert leaf instanceof PredicateLeafImpl;
       ((PredicateLeafImpl)leaf).columnName = newName;
     }
+
+    protected void checkLiteralType(Object literal, Type type, Configuration conf) {
+      if (literal == null) {
+        return;
+      }
+
+      if (literal instanceof LiteralDelegate) {
+        // Give it a pass. Optionally, have LiteralDelegate provide a getLiteralClass() to
check.
+        ((LiteralDelegate) literal).setConf(conf);
+      } else {
+        if (literal.getClass() != type.getValueClass()) {
+          throw new IllegalArgumentException("Wrong value class " +
+              literal.getClass().getName() + " for " + type + "." + operator +
+              " leaf");
+        }
+      }
+    }
   }
 
   private final List<PredicateLeaf> leaves;
@@ -218,6 +238,11 @@ public final class SearchArgumentImpl implements SearchArgument {
 
   static class BuilderImpl implements Builder {
 
+    Configuration conf;
+    public BuilderImpl(Configuration conf) {
+      this.conf = conf;
+    }
+
     // max threshold for CNF conversion. having >8 elements in andList will be
     // converted to maybe
     private static final int CNF_COMBINATIONS_THRESHOLD = 256;
@@ -291,7 +316,7 @@ public final class SearchArgumentImpl implements SearchArgument {
       } else {
         PredicateLeaf leaf =
             new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN,
-                type, column, literal, null);
+                type, column, literal, null, conf);
         parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
       }
       return this;
@@ -306,7 +331,7 @@ public final class SearchArgumentImpl implements SearchArgument {
       } else {
         PredicateLeaf leaf =
             new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN_EQUALS,
-                type, column, literal, null);
+                type, column, literal, null, conf);
         parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
       }
       return this;
@@ -321,7 +346,7 @@ public final class SearchArgumentImpl implements SearchArgument {
       } else {
         PredicateLeaf leaf =
             new PredicateLeafImpl(PredicateLeaf.Operator.EQUALS,
-                type, column, literal, null);
+                type, column, literal, null, conf);
         parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
       }
       return this;
@@ -336,7 +361,7 @@ public final class SearchArgumentImpl implements SearchArgument {
       } else {
         PredicateLeaf leaf =
             new PredicateLeafImpl(PredicateLeaf.Operator.NULL_SAFE_EQUALS,
-                type, column, literal, null);
+                type, column, literal, null, conf);
         parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
       }
       return this;
@@ -358,7 +383,7 @@ public final class SearchArgumentImpl implements SearchArgument {
 
         PredicateLeaf leaf =
             new PredicateLeafImpl(PredicateLeaf.Operator.IN,
-                type, column, null, argList);
+                type, column, null, argList, conf);
         parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
       }
       return this;
@@ -372,7 +397,7 @@ public final class SearchArgumentImpl implements SearchArgument {
       } else {
         PredicateLeaf leaf =
             new PredicateLeafImpl(PredicateLeaf.Operator.IS_NULL,
-                type, column, null, null);
+                type, column, null, null, conf);
         parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
       }
       return this;
@@ -390,7 +415,7 @@ public final class SearchArgumentImpl implements SearchArgument {
         argList.add(upper);
         PredicateLeaf leaf =
             new PredicateLeafImpl(PredicateLeaf.Operator.BETWEEN,
-                type, column, null, argList);
+                type, column, null, argList, conf);
         parent.getChildren().add(new ExpressionTree(addLeaf(leaf)));
       }
       return this;

http://git-wip-us.apache.org/repos/asf/hive/blob/cc3fd84e/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java b/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java
index e60690d..d44bba8 100644
--- a/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java
+++ b/storage-api/src/java/org/apache/hive/common/util/BloomFilter.java
@@ -18,6 +18,8 @@
 
 package org.apache.hive.common.util;
 
+import java.io.*;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
@@ -242,6 +244,55 @@ public class BloomFilter {
   }
 
   /**
+   * Serialize a bloom filter
+   * @param out output stream to write to
+   * @param bloomFilter BloomFilter that needs to be seralized
+   */
+  public static void serialize(OutputStream out, BloomFilter bloomFilter) throws IOException
{
+    /**
+     * Serialized BloomFilter format:
+     * 1 byte for the number of hash functions.
+     * 1 big endian int(That is how OutputStream works) for the number of longs in the bitset
+     * big endina longs in the BloomFilter bitset
+     */
+    DataOutputStream dataOutputStream = new DataOutputStream(out);
+    dataOutputStream.writeByte(bloomFilter.numHashFunctions);
+    dataOutputStream.writeInt(bloomFilter.numBits);
+    for (long value : bloomFilter.getBitSet()) {
+      dataOutputStream.writeLong(value);
+    }
+  }
+
+  /**
+   * Deserialize a bloom filter
+   * Read a byte stream, which was written by {@linkplain #serialize(OutputStream, BloomFilter)}
+   * into a {@code BloomFilter}
+   * @param in input bytestream
+   * @return deserialized BloomFilter
+   */
+  public static BloomFilter deserialize(InputStream in) throws IOException {
+    if (in == null) {
+      throw new IOException("Input stream is null");
+    }
+
+    try {
+      DataInputStream dataInputStream = new DataInputStream(in);
+      int numHashFunc = dataInputStream.readByte();
+      int numBits = dataInputStream.readInt();
+      int sz = (numBits/Long.SIZE);
+      List<Long> data = new ArrayList<Long>();
+      for (int i = 0; i < sz; i++) {
+        data.add(dataInputStream.readLong());
+      }
+      return new BloomFilter(data, numBits, numHashFunc);
+    } catch (RuntimeException e) {
+      IOException io = new IOException( "Unable to deserialize BloomFilter");
+      io.initCause(e);
+      throw io;
+    }
+  }
+
+  /**
    * Bare metal bit set implementation. For performance reasons, this implementation does
not check
    * for index bounds nor expand the bit set size if the specified index is greater than
the size.
    */


Mime
View raw message