hadoop-hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j..@apache.org
Subject svn commit: r985094 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Date Fri, 13 Aug 2010 04:50:34 GMT
Author: jvs
Date: Fri Aug 13 04:50:34 2010
New Revision: 985094

URL: http://svn.apache.org/viewvc?rev=985094&view=rev
Log:
HIVE-1528. JSON UDTF function
(Ning Zhang via jvs)


Added:
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=985094&r1=985093&r2=985094&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Aug 13 04:50:34 2010
@@ -39,6 +39,9 @@ Trunk -  Unreleased
     HIVE-1514. API to change fileformat and location of a partition
     (He Yongqiang via namit)
 
+    HIVE-1528. JSON UDTF function
+    (Ning Zhang via jvs)
+
   IMPROVEMENTS
 
     HIVE-1394. Do not update transient_lastDdlTime if the partition is modified by a housekeeping

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=985094&r1=985093&r2=985094&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Fri
Aug 13 04:50:34 2010
@@ -130,13 +130,12 @@ import org.apache.hadoop.hive.ql.udf.UDF
 import org.apache.hadoop.hive.ql.udf.UDFUpper;
 import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
 import org.apache.hadoop.hive.ql.udf.UDFYear;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFnGrams;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBridge;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectSet;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFHistogramNumeric;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectSet;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
@@ -148,6 +147,7 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVariance;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVarianceSample;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFnGrams;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayContains;
@@ -173,6 +173,7 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFExplode;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFJSONTuple;
 import org.apache.hadoop.hive.ql.udf.generic.SimpleGenericUDAFParameterInfo;
 import org.apache.hadoop.hive.ql.udf.xml.GenericUDFXPath;
 import org.apache.hadoop.hive.ql.udf.xml.UDFXPathBoolean;
@@ -394,6 +395,7 @@ public final class FunctionRegistry {
 
     // Generic UDTF's
     registerGenericUDTF("explode", GenericUDTFExplode.class);
+    registerGenericUDTF("json_tuple", GenericUDTFJSONTuple.class);
   }
 
   public static void registerTemporaryUDF(String functionName,
@@ -734,8 +736,9 @@ public final class FunctionRegistry {
   }
 
   public static GenericUDAFResolver getGenericUDAFResolver(String functionName) {
-    if (LOG.isDebugEnabled())
+    if (LOG.isDebugEnabled()) {
       LOG.debug("Looking up GenericUDAF: " + functionName);
+    }
     FunctionInfo finfo = mFunctions.get(functionName.toLowerCase());
     if (finfo == null) {
       return null;
@@ -873,10 +876,11 @@ public final class FunctionRegistry {
           conversionCost += cost;
         }
       }
-      if (LOG.isDebugEnabled())
+      if (LOG.isDebugEnabled()) {
         LOG.debug("Method " + (match ? "did" : "didn't") + " match: passed = "
                   + argumentsPassed + " accepted = " + argumentsAccepted +
                   " method = " + m);
+      }
       if (match) {
         // Always choose the function with least implicit conversions.
         if (conversionCost < leastConversionCost) {

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java?rev=985094&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
(added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
Fri Aug 13 04:50:34 2010
@@ -0,0 +1,153 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.Text;
+import org.json.JSONException;
+import org.json.JSONObject;
+/**
+ * GenericUDTFJSONTuple: this
+ *
+ */
+@Description(name = "json_tuple",
+    value = "_FUNC_(jsonStr, p1, p2, ..., pn) - like get_json_object, but it takes multiple
names and return a tuple. " +
+    		"All the input parameters and output column types are string.")
+
+public class GenericUDTFJSONTuple extends GenericUDTF {
+
+  private static Log LOG = LogFactory.getLog(GenericUDTFJSONTuple.class.getName());
+
+  int numCols;    // number of output columns
+  String[] paths; // array of path expressions, each of which corresponds to a column
+  Text[] retCols; // array of returned column values
+  Text[] cols;    // object pool of non-null Text, avoid creating objects all the time
+  Object[] nullCols; // array of null column values
+  ObjectInspector[] inputOIs; // input ObjectInspectors
+  boolean pathParsed = false;
+  boolean seenErrors = false;
+
+  @Override
+  public void close() throws HiveException {
+  }
+
+  @Override
+  public StructObjectInspector initialize(ObjectInspector[] args)
+      throws UDFArgumentException {
+
+    inputOIs = args;
+    numCols = args.length - 1;
+
+    if (numCols < 1) {
+      throw new UDFArgumentException("json_tuple() takes at least two arguments: " +
+      		"the json string and a path expression");
+    }
+
+    for (int i = 0; i < args.length; ++i) {
+      if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE ||
+          !args[i].getTypeName().equals(Constants.STRING_TYPE_NAME)) {
+        throw new UDFArgumentException("json_tuple()'s arguments have to be string type");
+      }
+    }
+
+    seenErrors = false;
+    pathParsed = false;
+    paths = new String[numCols];
+    cols = new Text[numCols];
+    retCols = new Text[numCols];
+    nullCols = new Object[numCols];
+
+    for (int i = 0; i < numCols; ++i) {
+      cols[i] = new Text();
+      retCols[i] = cols[i];
+      nullCols[i] = null;
+    }
+
+    // construct output object inspector
+    ArrayList<String> fieldNames = new ArrayList<String>(numCols);
+    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(numCols);
+    for (int i = 0; i < numCols; ++i) {
+      // column name can be anything since it will be named by UDTF as clause
+      fieldNames.add("c" + i);
+      // all returned type will be Text
+      fieldOIs.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
+    }
+    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
+  }
+
+  @Override
+  public void process(Object[] o) throws HiveException {
+
+    if (o[0] == null) {
+      forward(nullCols);
+      return;
+    }
+    // get the path expression for the 1st row only
+    if (!pathParsed) {
+      for (int i = 0;i < numCols; ++i) {
+        paths[i] = ((StringObjectInspector) inputOIs[i+1]).getPrimitiveJavaObject(o[i+1]);
+      }
+      pathParsed = true;
+    }
+
+    String jsonStr = ((StringObjectInspector) inputOIs[0]).getPrimitiveJavaObject(o[0]);
+    if (jsonStr == null) {
+      forward(nullCols);
+      return;
+    }
+    try {
+      JSONObject jsonObj = new JSONObject(jsonStr);
+
+      for (int i = 0; i < numCols; ++i) {
+        if (jsonObj.isNull(paths[i])) {
+          retCols[i] = null;
+        } else {
+          if (retCols[i] == null) {
+            retCols[i] = cols[i]; // use the object pool rather than creating a new object
+          }
+          retCols[i].set(jsonObj.getString(paths[i]));
+        }
+      }
+      forward(retCols);
+    } catch (JSONException e) {
+      // parsing error, invalid JSON string
+      if (!seenErrors) {
+        LOG.error("The input is not a valid JSON string: " + jsonStr + ". Skipping such error
messages in the future.");
+        seenErrors = true;
+      }
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "json_tuple";
+  }
+}

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q?rev=985094&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q Fri Aug 13 04:50:34
2010
@@ -0,0 +1,36 @@
+create table json_t (key string, jstring string);
+
+insert overwrite table json_t
+select * from (
+  select '1', '{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}' from src limit 1
+  union all
+  select '2', '{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}' from src limit 1
+  union all
+  select '3', '{"f1": "value13", "f4": "value44", "f3": "value33", "f2": 2, "f5": 5.01}'
from src limit 1
+  union all
+  select '4', cast(null as string) from src limit 1
+  union all
+  select '5', '{"f1": "", "f5": null}' from src limit 1
+  union all
+  select '6', '[invalid JSON string]' from src limit 1
+) s;
+
+explain 
+select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4',
'f5') b as f1, f2, f3, f4, f5;
+
+select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4',
'f5') b as f1, f2, f3, f4, f5;
+
+explain 
+select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t
a;
+
+select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t
a;
+
+explain 
+select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3',
'f4', 'f5') b as f1, f2, f3, f4, f5;
+
+select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3',
'f4', 'f5') b as f1, f2, f3, f4, f5;
+
+explain 
+select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4',
'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2;
+
+select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4',
'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2;

Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out?rev=985094&r1=985093&r2=985094&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Fri Aug 13 04:50:34
2010
@@ -69,6 +69,7 @@ instr
 int
 isnotnull
 isnull
+json_tuple
 lcase
 length
 like
@@ -175,6 +176,7 @@ double
 e
 explode
 from_unixtime
+json_tuple
 lcase
 like
 locate

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out?rev=985094&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out Fri Aug 13
04:50:34 2010
@@ -0,0 +1,461 @@
+PREHOOK: query: create table json_t (key string, jstring string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table json_t (key string, jstring string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@json_t
+PREHOOK: query: insert overwrite table json_t
+select * from (
+  select '1', '{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}' from src limit 1
+  union all
+  select '2', '{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}' from src limit 1
+  union all
+  select '3', '{"f1": "value13", "f4": "value44", "f3": "value33", "f2": 2, "f5": 5.01}'
from src limit 1
+  union all
+  select '4', cast(null as string) from src limit 1
+  union all
+  select '5', '{"f1": "", "f5": null}' from src limit 1
+  union all
+  select '6', '[invalid JSON string]' from src limit 1
+) s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@json_t
+POSTHOOK: query: insert overwrite table json_t
+select * from (
+  select '1', '{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}' from src limit 1
+  union all
+  select '2', '{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}' from src limit 1
+  union all
+  select '3', '{"f1": "value13", "f4": "value44", "f3": "value33", "f2": 2, "f5": 5.01}'
from src limit 1
+  union all
+  select '4', cast(null as string) from src limit 1
+  union all
+  select '5', '{"f1": "", "f5": null}' from src limit 1
+  union all
+  select '6', '[invalid JSON string]' from src limit 1
+) s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@json_t
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+PREHOOK: query: explain 
+select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4',
'f5') b as f1, f2, f3, f4, f5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4',
'f5') b as f1, f2, f3, f4, f5
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple
(. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b)))
(TOK_TABREF json_t a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR
(. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_ALLCOLREF b)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Lateral View Forward
+              Select Operator
+                SELECT * : (no compute)
+                Lateral View Join Operator
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col2
+                          type: string
+                          expr: _col3
+                          type: string
+                          expr: _col4
+                          type: string
+                          expr: _col5
+                          type: string
+                          expr: _col6
+                          type: string
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 0
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              Select Operator
+                expressions:
+                      expr: jstring
+                      type: string
+                      expr: 'f1'
+                      type: string
+                      expr: 'f2'
+                      type: string
+                      expr: 'f3'
+                      type: string
+                      expr: 'f4'
+                      type: string
+                      expr: 'f5'
+                      type: string
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                UDTF Operator
+                  function name: json_tuple
+                  Lateral View Join Operator
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                    Select Operator
+                      expressions:
+                            expr: _col0
+                            type: string
+                            expr: _col2
+                            type: string
+                            expr: _col3
+                            type: string
+                            expr: _col4
+                            type: string
+                            expr: _col5
+                            type: string
+                            expr: _col6
+                            type: string
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                      File Output Operator
+                        compressed: false
+                        GlobalTableId: 0
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1',
'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_t
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-09_840_8200365876234855592/-mr-10000
+POSTHOOK: query: select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1',
'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_t
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-09_840_8200365876234855592/-mr-10000
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+4	NULL	NULL	NULL	NULL	NULL
+3	value13	2	value33	value44	5.01
+2	value12	2	value3	4.01	NULL
+1	value1	value2	3	NULL	5.23
+5		NULL	NULL	NULL	NULL
+PREHOOK: query: explain 
+select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t
a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t
a
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF json_t a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE))
(TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2'
'f3' 'f4' 'f5') f1 f2 f3 f4 f5))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Select Operator
+              expressions:
+                    expr: jstring
+                    type: string
+                    expr: 'f1'
+                    type: string
+                    expr: 'f2'
+                    type: string
+                    expr: 'f3'
+                    type: string
+                    expr: 'f4'
+                    type: string
+                    expr: 'f5'
+                    type: string
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+              UDTF Operator
+                function name: json_tuple
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3,
f4, f5) from json_t a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_t
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-13_794_8613454555471840841/-mr-10000
+POSTHOOK: query: select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3,
f4, f5) from json_t a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_t
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-13_794_8613454555471840841/-mr-10000
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+NULL	NULL	NULL	NULL	NULL
+value13	2	value33	value44	5.01
+value12	2	value3	4.01	NULL
+value1	value2	3	NULL	5.23
+	NULL	NULL	NULL	NULL
+PREHOOK: query: explain 
+select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3',
'f4', 'f5') b as f1, f2, f3, f4, f5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3',
'f4', 'f5') b as f1, f2, f3, f4, f5
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple
(. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b)))
(TOK_TABREF json_t a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR
(. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) f2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL
b) f5)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Lateral View Forward
+              Select Operator
+                SELECT * : (no compute)
+                Lateral View Join Operator
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col3
+                          type: string
+                          expr: _col6
+                          type: string
+                    outputColumnNames: _col0, _col1, _col2
+                    File Output Operator
+                      compressed: false
+                      GlobalTableId: 0
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              Select Operator
+                expressions:
+                      expr: jstring
+                      type: string
+                      expr: 'f1'
+                      type: string
+                      expr: 'f2'
+                      type: string
+                      expr: 'f3'
+                      type: string
+                      expr: 'f4'
+                      type: string
+                      expr: 'f5'
+                      type: string
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                UDTF Operator
+                  function name: json_tuple
+                  Lateral View Join Operator
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                    Select Operator
+                      expressions:
+                            expr: _col0
+                            type: string
+                            expr: _col3
+                            type: string
+                            expr: _col6
+                            type: string
+                      outputColumnNames: _col0, _col1, _col2
+                      File Output Operator
+                        compressed: false
+                        GlobalTableId: 0
+                        table:
+                            input format: org.apache.hadoop.mapred.TextInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring,
'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_t
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-17_260_2655690577880014370/-mr-10000
+POSTHOOK: query: select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring,
'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_t
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-17_260_2655690577880014370/-mr-10000
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+4	NULL	NULL
+3	2	5.01
+2	2	NULL
+1	value2	5.23
+5	NULL	NULL
+PREHOOK: query: explain 
+select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4',
'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4',
'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple
(. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b)))
(TOK_TABREF json_t a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR
(TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL
(TOK_TABLE_OR_COL f1))) (TOK_GROUPBY (TOK_TABLE_OR_COL f2))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Lateral View Forward
+              Select Operator
+                SELECT * : (no compute)
+                Lateral View Join Operator
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                  Filter Operator
+                    predicate:
+                        expr: _col2 is not null
+                        type: boolean
+                    Select Operator
+                      expressions:
+                            expr: _col3
+                            type: string
+                      outputColumnNames: _col3
+                      Group By Operator
+                        aggregations:
+                              expr: count()
+                        bucketGroup: false
+                        keys:
+                              expr: _col3
+                              type: string
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Reduce Output Operator
+                          key expressions:
+                                expr: _col0
+                                type: string
+                          sort order: +
+                          Map-reduce partition columns:
+                                expr: _col0
+                                type: string
+                          tag: -1
+                          value expressions:
+                                expr: _col1
+                                type: bigint
+              Select Operator
+                expressions:
+                      expr: jstring
+                      type: string
+                      expr: 'f1'
+                      type: string
+                      expr: 'f2'
+                      type: string
+                      expr: 'f3'
+                      type: string
+                      expr: 'f4'
+                      type: string
+                      expr: 'f5'
+                      type: string
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                UDTF Operator
+                  function name: json_tuple
+                  Lateral View Join Operator
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                    Filter Operator
+                      predicate:
+                          expr: _col2 is not null
+                          type: boolean
+                      Select Operator
+                        expressions:
+                              expr: _col3
+                              type: string
+                        outputColumnNames: _col3
+                        Group By Operator
+                          aggregations:
+                                expr: count()
+                          bucketGroup: false
+                          keys:
+                                expr: _col3
+                                type: string
+                          mode: hash
+                          outputColumnNames: _col0, _col1
+                          Reduce Output Operator
+                            key expressions:
+                                  expr: _col0
+                                  type: string
+                            sort order: +
+                            Map-reduce partition columns:
+                                  expr: _col0
+                                  type: string
+                            tag: -1
+                            value expressions:
+                                  expr: _col1
+                                  type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1',
'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_t
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-20_835_55486591128179740/-mr-10000
+POSTHOOK: query: select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1',
'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_t
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-20_835_55486591128179740/-mr-10000
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+NULL	1
+2	2
+value2	1



Mime
View raw message