hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From harisan...@apache.org
Subject hive git commit: HIVE-11428: Performance: Struct IN() clauses are extremely slow (Hari Sankar Sivarama Subramaniyan, reviewed by Gopal V)
Date Fri, 07 Aug 2015 21:18:05 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-1 70589b1cf -> 0f944edc9


HIVE-11428: Performance: Struct IN() clauses are extremely slow (Hari Sankar Sivarama Subramaniyan,
reviewed by Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0f944edc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0f944edc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0f944edc

Branch: refs/heads/branch-1
Commit: 0f944edc9a6b2b55dd59130d17d524db9bd9cf72
Parents: 70589b1
Author: Hari Subramaniyan <harisankar@apache.org>
Authored: Fri Aug 7 14:16:43 2015 -0700
Committer: Hari Subramaniyan <harisankar@apache.org>
Committed: Fri Aug 7 14:17:51 2015 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/stats/StatsUtils.java | 44 ++++++++++++++++----
 .../hive/ql/udf/generic/GenericUDFIn.java       | 11 +++++
 .../test/results/clientpositive/null_cast.q.out |  4 +-
 .../results/clientpositive/udf_inline.q.out     |  6 +--
 .../objectinspector/ObjectInspectorFactory.java |  7 +++-
 .../objectinspector/ObjectInspectorUtils.java   | 16 +++++++
 6 files changed, 73 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0f944edc/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 0940c4d..a8a2091 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -60,11 +60,13 @@ import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
@@ -939,15 +941,21 @@ public class StatsUtils {
       }
       break;
     case STRUCT:
-      StructObjectInspector soi = (StructObjectInspector) oi;
-
-      // add constant object overhead for struct
-      result += JavaDataModel.get().object();
-
-      // add constant struct field names references overhead
-      result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref();
-      for (StructField field : soi.getAllStructFieldRefs()) {
-        result += getSizeOfComplexTypes(conf, field.getFieldObjectInspector());
+      if (oi instanceof StandardConstantStructObjectInspector) {
+        // constant map projection of known length
+        StandardConstantStructObjectInspector scsoi = (StandardConstantStructObjectInspector)
oi;
+        result += getSizeOfStruct(scsoi);
+      }  else {
+        StructObjectInspector soi = (StructObjectInspector) oi;
+
+        // add constant object overhead for struct
+        result += JavaDataModel.get().object();
+
+        // add constant struct field names references overhead
+        result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref();
+        for (StructField field : soi.getAllStructFieldRefs()) {
+          result += getSizeOfComplexTypes(conf, field.getFieldObjectInspector());
+        }
       }
       break;
     case UNION:
@@ -1052,6 +1060,24 @@ public class StatsUtils {
     return result;
   }
 
+  public static long getSizeOfStruct(StandardConstantStructObjectInspector soi) {
+	long result = 0;
+    // add constant object overhead for struct
+    result += JavaDataModel.get().object();
+
+    // add constant struct field names references overhead
+    result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref();
+    List<?> value = soi.getWritableConstantValue();
+    List<? extends StructField> fields = soi.getAllStructFieldRefs();
+    if (value == null || value.size() != fields.size()) {
+      return result;
+    }
+    for (int i = 0; i < fields.size(); i++) {
+      result += getWritableSize(fields.get(i).getFieldObjectInspector(), value.get(i));
+    }
+	return result;
+  }
+
   /**
    * Get size of primitive data types based on their respective writable object inspector
    * @param oi

http://git-wip-us.apache.org/repos/asf/hive/blob/0f944edc/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
index 38b1dc4..56ac3e1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIn.java
@@ -32,9 +32,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.io.BooleanWritable;
 
+import com.esotericsoftware.minlog.Log;
+
 /**
  * GenericUDFIn
  *
@@ -168,6 +171,14 @@ public class GenericUDFIn extends GenericUDF {
         }
         break;
       }
+      case STRUCT: {
+        if (constantInSet.contains(((StructObjectInspector) compareOI).getStructFieldsDataAsList(conversionHelper
+           .convertIfNecessary(arguments[0].get(), argumentOIs[0])))) {
+          bw.set(true);
+          return bw;
+        }
+        break;
+      }
       default:
         throw new RuntimeException("Compare of unsupported constant type: "
             + compareOI.getCategory());

http://git-wip-us.apache.org/repos/asf/hive/blob/0f944edc/ql/src/test/results/clientpositive/null_cast.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/null_cast.q.out b/ql/src/test/results/clientpositive/null_cast.q.out
index 810eacd..b5af69b 100644
--- a/ql/src/test/results/clientpositive/null_cast.q.out
+++ b/ql/src/test/results/clientpositive/null_cast.q.out
@@ -25,10 +25,10 @@ STAGE PLANS:
             Select Operator
               expressions: array(null,0) (type: array<int>), array(null,array()) (type:
array<array<string>>), array(null,map()) (type: array<map<string,string>>),
array(null,struct(0)) (type: array<struct<col1:int>>)
               outputColumnNames: _col0, _col1, _col2, _col3
-              Statistics: Num rows: 500 Data size: 340000 Basic stats: COMPLETE Column stats:
COMPLETE
+              Statistics: Num rows: 500 Data size: 108000 Basic stats: COMPLETE Column stats:
COMPLETE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 500 Data size: 340000 Basic stats: COMPLETE Column
stats: COMPLETE
+                Statistics: Num rows: 500 Data size: 108000 Basic stats: COMPLETE Column
stats: COMPLETE
                 table:
                     input format: org.apache.hadoop.mapred.TextInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/0f944edc/ql/src/test/results/clientpositive/udf_inline.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_inline.q.out b/ql/src/test/results/clientpositive/udf_inline.q.out
index 45bd463..7d372f3 100644
--- a/ql/src/test/results/clientpositive/udf_inline.q.out
+++ b/ql/src/test/results/clientpositive/udf_inline.q.out
@@ -33,13 +33,13 @@ STAGE PLANS:
           Select Operator
             expressions: array(struct(1,'dude!'),struct(2,'Wheres'),struct(3,'my car?'))
(type: array<struct<col1:int,col2:string>>)
             outputColumnNames: _col0
-            Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats:
COMPLETE
+            Statistics: Num rows: 500 Data size: 32000 Basic stats: COMPLETE Column stats:
COMPLETE
             UDTF Operator
-              Statistics: Num rows: 500 Data size: 1220000 Basic stats: COMPLETE Column stats:
COMPLETE
+              Statistics: Num rows: 500 Data size: 32000 Basic stats: COMPLETE Column stats:
COMPLETE
               function name: inline
               Limit
                 Number of rows: 2
-                Statistics: Num rows: 2 Data size: 4880 Basic stats: COMPLETE Column stats:
COMPLETE
+                Statistics: Num rows: 2 Data size: 128 Basic stats: COMPLETE Column stats:
COMPLETE
                 ListSink
 
 PREHOOK: query: SELECT inline( 

http://git-wip-us.apache.org/repos/asf/hive/blob/0f944edc/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
index c35f4e9..97bb715 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
@@ -215,7 +215,6 @@ public final class ObjectInspectorFactory {
     return new StandardConstantListObjectInspector(listElementObjectInspector, constantValue);
   }
 
-
   static ConcurrentHashMap<List<ObjectInspector>, StandardMapObjectInspector>
cachedStandardMapObjectInspector =
       new ConcurrentHashMap<List<ObjectInspector>, StandardMapObjectInspector>();
 
@@ -297,6 +296,12 @@ public final class ObjectInspectorFactory {
     return result;
   }
 
+  public static StandardConstantStructObjectInspector getStandardConstantStructObjectInspector(
+    List<String> structFieldNames,
+    List<ObjectInspector> structFieldObjectInspectors,  List<?> value) {
+    return new StandardConstantStructObjectInspector(structFieldNames, structFieldObjectInspectors,
value);
+  }
+
   static ConcurrentHashMap<List<StructObjectInspector>, UnionStructObjectInspector>
cachedUnionStructObjectInspector =
       new ConcurrentHashMap<List<StructObjectInspector>, UnionStructObjectInspector>();
 

http://git-wip-us.apache.org/repos/asf/hive/blob/0f944edc/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
index 041d218..a1545fe 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
@@ -1059,6 +1059,21 @@ public final class ObjectInspectorUtils {
               ObjectInspectorCopyOption.WRITABLE
             ),
             (Map<?, ?>)writableValue);
+      case STRUCT:
+          StructObjectInspector soi = (StructObjectInspector) oi;
+          List<? extends StructField> fields = soi.getAllStructFieldRefs();
+          List<String> fieldNames = new ArrayList<String>(fields.size());
+          List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(
+            fields.size());
+          for (StructField f : fields) {
+            fieldNames.add(f.getFieldName());
+            fieldObjectInspectors.add(getStandardObjectInspector(f
+            .getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE));
+          }
+          return ObjectInspectorFactory.getStandardConstantStructObjectInspector(
+            fieldNames,
+            fieldObjectInspectors,
+            (List<?>)writableValue);
       default:
        throw new IllegalArgumentException(
            writableOI.getCategory() + " not yet supported for constant OI");
@@ -1074,6 +1089,7 @@ public final class ObjectInspectorUtils {
       case PRIMITIVE:
       case LIST:
       case MAP:
+      case STRUCT:
         return true;
       default:
         return false;


Mime
View raw message