hadoop-hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nzh...@apache.org
Subject svn commit: r1001072 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Date Fri, 24 Sep 2010 21:01:07 GMT
Author: nzhang
Date: Fri Sep 24 21:01:07 2010
New Revision: 1001072

URL: http://svn.apache.org/viewvc?rev=1001072&view=rev
Log:
HIVE-1659. parse_url_tuple: a UDTF version of parse_url (Xing Jin via Ning Zhang)

Added:
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFParseUrlTuple.java
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_parse_url_tuple.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=1001072&r1=1001071&r2=1001072&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Sep 24 21:01:07 2010
@@ -80,6 +80,9 @@ Trunk -  Unreleased
     HIVE-1661. Default values for parameters
     (Siying Dong via He Yongqiang) 
 
+    HIVE-1659. parse_url_tuple: a UDTF version of parse_url
+    (Xing Jin via Ning Zhang)
+
   IMPROVEMENTS
 
     HIVE-1394. Do not update transient_lastDdlTime if the partition is modified by a housekeeping

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1001072&r1=1001071&r2=1001072&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Fri
Sep 24 21:01:07 2010
@@ -179,6 +179,7 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFExplode;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFJSONTuple;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFParseUrlTuple;
 import org.apache.hadoop.hive.ql.udf.generic.SimpleGenericUDAFParameterInfo;
 import org.apache.hadoop.hive.ql.udf.xml.GenericUDFXPath;
 import org.apache.hadoop.hive.ql.udf.xml.UDFXPathBoolean;
@@ -406,6 +407,7 @@ public final class FunctionRegistry {
     // Generic UDTF's
     registerGenericUDTF("explode", GenericUDTFExplode.class);
     registerGenericUDTF("json_tuple", GenericUDTFJSONTuple.class);
+    registerGenericUDTF("parse_url_tuple", GenericUDTFParseUrlTuple.class);
   }
 
   public static void registerTemporaryUDF(String functionName,

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFParseUrlTuple.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFParseUrlTuple.java?rev=1001072&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFParseUrlTuple.java
(added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFParseUrlTuple.java
Fri Sep 24 21:01:07 2010
@@ -0,0 +1,243 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.net.URL;
+import java.net.MalformedURLException;
+import java.util.ArrayList;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.Text;
+/**
+ * GenericUDTFParseUrlTuple: this
+ *
+ */
+@Description(name = "parse_url_tuple",
+    value = "_FUNC_(url, partname1, partname2, ..., partnameN) - extracts N (N>=1) parts
from a URL.\n"
+          + "It takes a URL and one or multiple partnames, and returns a tuple. "
+          + "All the input parameters and output column types are string.",
+    extended = "Partname: HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, USERINFO, QUERY:<KEY_NAME>\n"
+             + "Note: Partnames are case-sensitive, and should not contain unnecessary white
spaces.\n"
+             + "Example:\n"
+             + "  > SELECT b.* FROM src LATERAL VIEW _FUNC_(fullurl, 'HOST', 'PATH', 'QUERY',
'QUERY:id') "
+             + "b as host, path, query, query_id LIMIT 1;\n"
+             + "  > SELECT _FUNC_(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL',
'FILE', "
+             + " 'AUTHORITY', 'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us,
qk1) from src a;")
+
+public class GenericUDTFParseUrlTuple extends GenericUDTF {
+
+  enum PARTNAME {
+    HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, USERINFO, QUERY_WITH_KEY, NULLNAME
+  };
+
+  private static Log LOG = LogFactory.getLog(GenericUDTFParseUrlTuple.class.getName());
+
+  int numCols;    // number of output columns
+  String[] paths; // array of pathnames, each of which corresponds to a column
+  PARTNAME[] partnames; // mapping from pathnames to enum PARTNAME
+  Text[] retCols; // array of returned column values
+  Text[] cols;    // object pool of non-null Text, avoid creating objects all the time
+  Object[] nullCols; // array of null column values
+  ObjectInspector[] inputOIs; // input ObjectInspectors
+  boolean pathParsed = false;
+  boolean seenErrors = false;
+  URL url = null;
+  Pattern p = null;
+  String lastKey = null;
+
+  @Override
+  public void close() throws HiveException {
+  }
+
+  @Override
+  public StructObjectInspector initialize(ObjectInspector[] args)
+      throws UDFArgumentException {
+
+    inputOIs = args;
+    numCols = args.length - 1;
+
+    if (numCols < 1) {
+      throw new UDFArgumentException("parse_url_tuple() takes at least two arguments: " +
+      		"the url string and a part name");
+    }
+
+    for (int i = 0; i < args.length; ++i) {
+      if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE ||
+          !args[i].getTypeName().equals(Constants.STRING_TYPE_NAME)) {
+        throw new UDFArgumentException("parse_url_tuple()'s arguments have to be string type");
+      }
+    }
+
+    seenErrors = false;
+    pathParsed = false;
+    url = null;
+    p = null;
+    lastKey = null;
+    paths = new String[numCols];
+    partnames = new PARTNAME[numCols];
+    cols = new Text[numCols];
+    retCols = new Text[numCols];
+    nullCols = new Object[numCols];
+
+    for (int i = 0; i < numCols; ++i) {
+      cols[i] = new Text();
+      retCols[i] = cols[i];
+      nullCols[i] = null;
+    }
+
+    // construct output object inspector
+    ArrayList<String> fieldNames = new ArrayList<String>(numCols);
+    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(numCols);
+    for (int i = 0; i < numCols; ++i) {
+      // column name can be anything since it will be named by UDTF as clause
+      fieldNames.add("c" + i);
+      // all returned type will be Text
+      fieldOIs.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
+    }
+    
+    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
+  }
+
+  @Override
+  public void process(Object[] o) throws HiveException {
+
+    if (o[0] == null) {
+      forward(nullCols);
+      return;
+    }
+    // get the path names for the 1st row only
+    if (!pathParsed) {
+        for (int i = 0;i < numCols; ++i) {
+          paths[i] = ((StringObjectInspector) inputOIs[i+1]).getPrimitiveJavaObject(o[i+1]);
+          
+          if (paths[i] == null) {
+            partnames[i] = PARTNAME.NULLNAME;
+          } else if (paths[i].equals("HOST")) {
+            partnames[i] = PARTNAME.HOST;
+          } else if (paths[i].equals("PATH")) {
+            partnames[i] = PARTNAME.PATH;
+          } else if (paths[i].equals("QUERY")) {
+            partnames[i] = PARTNAME.QUERY;
+          } else if (paths[i].equals("REF")) {
+            partnames[i] = PARTNAME.REF;
+          } else if (paths[i].equals("PROTOCOL")) {
+            partnames[i] = PARTNAME.PROTOCOL;
+          } else if (paths[i].equals("FILE")) {
+            partnames[i] = PARTNAME.FILE;
+          } else if (paths[i].equals("AUTHORITY")) {
+            partnames[i] = PARTNAME.AUTHORITY;
+          } else if (paths[i].equals("USERINFO")) {
+            partnames[i] = PARTNAME.USERINFO;
+          } else if (paths[i].startsWith("QUERY:")) {
+            partnames[i] = PARTNAME.QUERY_WITH_KEY;
+            paths[i] = paths[i].substring(6); // update paths[i], e.g., from "QUERY:id" to
"id" 
+          } else {
+            partnames[i] = PARTNAME.NULLNAME;
+          } 
+      }    
+      pathParsed = true;
+    }
+
+    String urlStr = ((StringObjectInspector) inputOIs[0]).getPrimitiveJavaObject(o[0]);
+    if (urlStr == null) {
+      forward(nullCols);
+      return;
+    }
+    
+    try {
+      String ret = null; 
+      url = new URL(urlStr);
+      for (int i = 0; i < numCols; ++i) {
+        ret = evaluate(url, i);
+        if (ret == null) {
+          retCols[i] = null;
+        } else {
+          if (retCols[i] == null) {
+            retCols[i] = cols[i]; // use the object pool rather than creating a new object
+          }
+          retCols[i].set(ret);
+        }
+      }
+
+      forward(retCols);
+      return;    
+    } catch (MalformedURLException e) {
+      // parsing error, invalid url string
+      if (!seenErrors) {
+        LOG.error("The input is not a valid url string: " + urlStr + ". Skipping such error
messages in the future.");
+        seenErrors = true;
+      }
+      forward(nullCols);
+      return;
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "parse_url_tuple";
+  }
+  
+  private String evaluate(URL url, int index) {
+    if (url == null || index < 0 || index >= partnames.length)
+      return null;
+
+    switch (partnames[index]) {
+      case HOST          : return url.getHost();
+      case PATH          : return url.getPath();
+      case QUERY         : return url.getQuery();
+      case REF           : return url.getRef();
+      case PROTOCOL      : return url.getProtocol();
+      case FILE          : return url.getFile();
+      case AUTHORITY     : return url.getAuthority();
+      case USERINFO      : return url.getUserInfo();
+      case QUERY_WITH_KEY: return evaluateQuery(url.getQuery(), paths[index]);
+      case NULLNAME:
+      default            : return null;
+    }
+  }
+
+  private String evaluateQuery(String query, String key) {
+    if (query == null || key == null) {
+      return null;
+    }
+
+    if (!key.equals(lastKey)) {
+      p = Pattern.compile("(&|^)" + key + "=([^&]*)");
+    }
+
+    lastKey = key;
+    Matcher m = p.matcher(query);
+    if (m.find()) {
+      return m.group(2);
+    }
+    return null;
+  } 
+}

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_parse_url_tuple.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_parse_url_tuple.q?rev=1001072&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_parse_url_tuple.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_parse_url_tuple.q Fri Sep 24
21:01:07 2010
@@ -0,0 +1,42 @@
+create table url_t (key string, fullurl string);
+
+insert overwrite table url_t
+select * from (
+  select '1', 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' from src limit 1
+  union all
+  select '2', 'https://www.socs.uts.edu.au:80/MosaicDocs-old/url-primer.html?k1=tps#chapter1'
from src limit 1
+  union all
+  select '3', 'ftp://sites.google.com/a/example.com/site/page' from src limit 1
+  union all
+  select '4', cast(null as string) from src limit 1
+  union all
+  select '5', 'htttp://' from src limit 1
+  union all
+  select '6', '[invalid url string]' from src limit 1
+) s;
+
+describe function parse_url_tuple;
+describe function extended parse_url_tuple;
+
+explain 
+select a.key, b.* from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY',
'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu, re, pr, fi,
au, us, qk1 order by a.key;
+
+select a.key, b.* from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY',
'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu, re, pr, fi,
au, us, qk1 order by a.key;
+
+explain 
+select parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY',
'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us, qk1) from url_t a order by ho,
pa, qu;
+
+select parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY',
'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us, qk1) from url_t a order by ho,
pa, qu;
+
+-- should return null for 'host', 'query', 'QUERY:nonExistCol' 
+explain
+select a.key, b.ho, b.qu, b.qk1, b.err1, b.err2, b.err3 from url_t a lateral view parse_url_tuple(a.fullurl,
'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1', 'host',
'query', 'QUERY:nonExistCol') b as ho, pa, qu, re, pr, fi, au, us, qk1, err1, err2, err3 order
by a.key;
+
+select a.key, b.ho, b.qu, b.qk1, b.err1, b.err2, b.err3 from url_t a lateral view parse_url_tuple(a.fullurl,
'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1', 'host',
'query', 'QUERY:nonExistCol') b as ho, pa, qu, re, pr, fi, au, us, qk1, err1, err2, err3 order
by a.key;
+
+
+explain
+select ho, count(*) from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH',
'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu,
re, pr, fi, au, us, qk1 where qk1 is not null group by ho;
+
+select ho, count(*) from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH',
'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu,
re, pr, fi, au, us, qk1 where qk1 is not null group by ho;
+

Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out?rev=1001072&r1=1001071&r2=1001072&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Fri Sep 24 21:01:07
2010
@@ -95,6 +95,7 @@ ngrams
 not
 or
 parse_url
+parse_url_tuple
 percentile
 percentile_approx
 pi
@@ -191,6 +192,7 @@ like
 locate
 minute
 negative
+parse_url_tuple
 percentile
 positive
 regexp_replace

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out?rev=1001072&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out Fri Sep
24 21:01:07 2010
@@ -0,0 +1,675 @@
+PREHOOK: query: create table url_t (key string, fullurl string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table url_t (key string, fullurl string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@url_t
+PREHOOK: query: insert overwrite table url_t
+select * from (
+  select '1', 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' from src limit 1
+  union all
+  select '2', 'https://www.socs.uts.edu.au:80/MosaicDocs-old/url-primer.html?k1=tps#chapter1'
from src limit 1
+  union all
+  select '3', 'ftp://sites.google.com/a/example.com/site/page' from src limit 1
+  union all
+  select '4', cast(null as string) from src limit 1
+  union all
+  select '5', 'htttp://' from src limit 1
+  union all
+  select '6', '[invalid url string]' from src limit 1
+) s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@url_t
+POSTHOOK: query: insert overwrite table url_t
+select * from (
+  select '1', 'http://facebook.com/path1/p.php?k1=v1&k2=v2#Ref1' from src limit 1
+  union all
+  select '2', 'https://www.socs.uts.edu.au:80/MosaicDocs-old/url-primer.html?k1=tps#chapter1'
from src limit 1
+  union all
+  select '3', 'ftp://sites.google.com/a/example.com/site/page' from src limit 1
+  union all
+  select '4', cast(null as string) from src limit 1
+  union all
+  select '5', 'htttp://' from src limit 1
+  union all
+  select '6', '[invalid url string]' from src limit 1
+) s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@url_t
+POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
+POSTHOOK: Lineage: url_t.key EXPRESSION []
+PREHOOK: query: describe function parse_url_tuple
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: describe function parse_url_tuple
+POSTHOOK: type: DESCFUNCTION
+POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
+POSTHOOK: Lineage: url_t.key EXPRESSION []
+parse_url_tuple(url, partname1, partname2, ..., partnameN) - extracts N (N>=1) parts from
a URL.
+It takes a URL and one or multiple partnames, and returns a tuple. All the input parameters
and output column types are string.
+PREHOOK: query: describe function extended parse_url_tuple
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: describe function extended parse_url_tuple
+POSTHOOK: type: DESCFUNCTION
+POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
+POSTHOOK: Lineage: url_t.key EXPRESSION []
+parse_url_tuple(url, partname1, partname2, ..., partnameN) - extracts N (N>=1) parts from
a URL.
+It takes a URL and one or multiple partnames, and returns a tuple. All the input parameters
and output column types are string.
+Partname: HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, USERINFO, QUERY:<KEY_NAME>
+Note: Partnames are case-sensitive, and should not contain unnecessary white spaces.
+Example:
+  > SELECT b.* FROM src LATERAL VIEW parse_url_tuple(fullurl, 'HOST', 'PATH', 'QUERY',
'QUERY:id') b as host, path, query, query_id LIMIT 1;
+  > SELECT parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE',
 'AUTHORITY', 'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us, qk1) from src a;
+PREHOOK: query: explain 
+select a.key, b.* from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY',
'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu, re, pr, fi,
au, us, qk1 order by a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select a.key, b.* from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY',
'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu, re, pr, fi,
au, us, qk1 order by a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
+POSTHOOK: Lineage: url_t.key EXPRESSION []
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION parse_url_tuple
(. (TOK_TABLE_OR_COL a) fullurl) 'HOST' 'PATH' 'QUERY' 'REF' 'PROTOCOL' 'FILE' 'AUTHORITY'
'USERINFO' 'QUERY:k1') ho pa qu re pr fi au us qk1 (TOK_TABALIAS b))) (TOK_TABREF url_t a)))
(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL
a) key)) (TOK_SELEXPR (TOK_ALLCOLREF b))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL
a) key)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Lateral View Forward
+              Select Operator
+                SELECT * : (no compute)
+                Lateral View Join Operator
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col2
+                          type: string
+                          expr: _col3
+                          type: string
+                          expr: _col4
+                          type: string
+                          expr: _col5
+                          type: string
+                          expr: _col6
+                          type: string
+                          expr: _col7
+                          type: string
+                          expr: _col8
+                          type: string
+                          expr: _col9
+                          type: string
+                          expr: _col10
+                          type: string
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9
+                    Reduce Output Operator
+                      key expressions:
+                            expr: _col0
+                            type: string
+                      sort order: +
+                      tag: -1
+                      value expressions:
+                            expr: _col0
+                            type: string
+                            expr: _col1
+                            type: string
+                            expr: _col2
+                            type: string
+                            expr: _col3
+                            type: string
+                            expr: _col4
+                            type: string
+                            expr: _col5
+                            type: string
+                            expr: _col6
+                            type: string
+                            expr: _col7
+                            type: string
+                            expr: _col8
+                            type: string
+                            expr: _col9
+                            type: string
+              Select Operator
+                expressions:
+                      expr: fullurl
+                      type: string
+                      expr: 'HOST'
+                      type: string
+                      expr: 'PATH'
+                      type: string
+                      expr: 'QUERY'
+                      type: string
+                      expr: 'REF'
+                      type: string
+                      expr: 'PROTOCOL'
+                      type: string
+                      expr: 'FILE'
+                      type: string
+                      expr: 'AUTHORITY'
+                      type: string
+                      expr: 'USERINFO'
+                      type: string
+                      expr: 'QUERY:k1'
+                      type: string
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9
+                UDTF Operator
+                  function name: parse_url_tuple
+                  Lateral View Join Operator
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10
+                    Select Operator
+                      expressions:
+                            expr: _col0
+                            type: string
+                            expr: _col2
+                            type: string
+                            expr: _col3
+                            type: string
+                            expr: _col4
+                            type: string
+                            expr: _col5
+                            type: string
+                            expr: _col6
+                            type: string
+                            expr: _col7
+                            type: string
+                            expr: _col8
+                            type: string
+                            expr: _col9
+                            type: string
+                            expr: _col10
+                            type: string
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6,
_col7, _col8, _col9
+                      Reduce Output Operator
+                        key expressions:
+                              expr: _col0
+                              type: string
+                        sort order: +
+                        tag: -1
+                        value expressions:
+                              expr: _col0
+                              type: string
+                              expr: _col1
+                              type: string
+                              expr: _col2
+                              type: string
+                              expr: _col3
+                              type: string
+                              expr: _col4
+                              type: string
+                              expr: _col5
+                              type: string
+                              expr: _col6
+                              type: string
+                              expr: _col7
+                              type: string
+                              expr: _col8
+                              type: string
+                              expr: _col9
+                              type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select a.key, b.* from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST',
'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho,
pa, qu, re, pr, fi, au, us, qk1 order by a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@url_t
+PREHOOK: Output: file:/tmp/xjin/hive_2010-09-23_14-33-06_345_6003355090197452651/-mr-10000
+POSTHOOK: query: select a.key, b.* from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST',
'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho,
pa, qu, re, pr, fi, au, us, qk1 order by a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@url_t
+POSTHOOK: Output: file:/tmp/xjin/hive_2010-09-23_14-33-06_345_6003355090197452651/-mr-10000
+POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
+POSTHOOK: Lineage: url_t.key EXPRESSION []
+1	facebook.com	/path1/p.php	k1=v1&k2=v2	Ref1	http	/path1/p.php?k1=v1&k2=v2	facebook.com
NULL	v1
+2	www.socs.uts.edu.au	/MosaicDocs-old/url-primer.html	k1=tps	chapter1	https	/MosaicDocs-old/url-primer.html?k1=tps
www.socs.uts.edu.au:80	NULL	tps
+3	sites.google.com	/a/example.com/site/page	NULL	NULL	ftp	/a/example.com/site/page	sites.google.com
NULL	NULL
+4	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+5	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+6	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+PREHOOK: query: explain 
+select parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY',
'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us, qk1) from url_t a order by ho,
pa, qu
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY',
'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us, qk1) from url_t a order by ho,
pa, qu
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
+POSTHOOK: Lineage: url_t.key EXPRESSION []
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF url_t a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE))
(TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION parse_url_tuple (. (TOK_TABLE_OR_COL a) fullurl) 'HOST'
'PATH' 'QUERY' 'REF' 'PROTOCOL' 'FILE' 'AUTHORITY' 'USERINFO' 'QUERY:k1') ho pa qu re pr fi
au us qk1)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ho)) (TOK_TABSORTCOLNAMEASC
(TOK_TABLE_OR_COL pa)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL qu)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Select Operator
+              expressions:
+                    expr: fullurl
+                    type: string
+                    expr: 'HOST'
+                    type: string
+                    expr: 'PATH'
+                    type: string
+                    expr: 'QUERY'
+                    type: string
+                    expr: 'REF'
+                    type: string
+                    expr: 'PROTOCOL'
+                    type: string
+                    expr: 'FILE'
+                    type: string
+                    expr: 'AUTHORITY'
+                    type: string
+                    expr: 'USERINFO'
+                    type: string
+                    expr: 'QUERY:k1'
+                    type: string
+              outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9
+              UDTF Operator
+                function name: parse_url_tuple
+                Reduce Output Operator
+                  key expressions:
+                        expr: c0
+                        type: string
+                        expr: c1
+                        type: string
+                        expr: c2
+                        type: string
+                  sort order: +++
+                  tag: -1
+                  value expressions:
+                        expr: c0
+                        type: string
+                        expr: c1
+                        type: string
+                        expr: c2
+                        type: string
+                        expr: c3
+                        type: string
+                        expr: c4
+                        type: string
+                        expr: c5
+                        type: string
+                        expr: c6
+                        type: string
+                        expr: c7
+                        type: string
+                        expr: c8
+                        type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL',
'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us, qk1) from
url_t a order by ho, pa, qu
+PREHOOK: type: QUERY
+PREHOOK: Input: default@url_t
+PREHOOK: Output: file:/tmp/xjin/hive_2010-09-23_14-33-09_351_1128977597067341681/-mr-10000
+POSTHOOK: query: select parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL',
'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') as (ho, pa, qu, re, pr, fi, au, us, qk1) from
url_t a order by ho, pa, qu
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@url_t
+POSTHOOK: Output: file:/tmp/xjin/hive_2010-09-23_14-33-09_351_1128977597067341681/-mr-10000
+POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
+POSTHOOK: Lineage: url_t.key EXPRESSION []
+NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+facebook.com	/path1/p.php	k1=v1&k2=v2	Ref1	http	/path1/p.php?k1=v1&k2=v2	facebook.com
NULL	v1
+sites.google.com	/a/example.com/site/page	NULL	NULL	ftp	/a/example.com/site/page	sites.google.com
NULL	NULL
+www.socs.uts.edu.au	/MosaicDocs-old/url-primer.html	k1=tps	chapter1	https	/MosaicDocs-old/url-primer.html?k1=tps
www.socs.uts.edu.au:80	NULL	tps
+PREHOOK: query: -- should return null for 'host', 'query', 'QUERY:nonExistCol' 
+explain
+select a.key, b.ho, b.qu, b.qk1, b.err1, b.err2, b.err3 from url_t a lateral view parse_url_tuple(a.fullurl,
'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1', 'host',
'query', 'QUERY:nonExistCol') b as ho, pa, qu, re, pr, fi, au, us, qk1, err1, err2, err3 order
by a.key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- should return null for 'host', 'query', 'QUERY:nonExistCol' 
+explain
+select a.key, b.ho, b.qu, b.qk1, b.err1, b.err2, b.err3 from url_t a lateral view parse_url_tuple(a.fullurl,
'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1', 'host',
'query', 'QUERY:nonExistCol') b as ho, pa, qu, re, pr, fi, au, us, qk1, err1, err2, err3 order
by a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
+POSTHOOK: Lineage: url_t.key EXPRESSION []
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION parse_url_tuple
(. (TOK_TABLE_OR_COL a) fullurl) 'HOST' 'PATH' 'QUERY' 'REF' 'PROTOCOL' 'FILE' 'AUTHORITY'
'USERINFO' 'QUERY:k1' 'host' 'query' 'QUERY:nonExistCol') ho pa qu re pr fi au us qk1 err1
err2 err3 (TOK_TABALIAS b))) (TOK_TABREF url_t a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR
TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL
b) ho)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) qu)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) qk1))
(TOK_SELEXPR (. (TOK_TABLE_OR_COL b) err1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) err2)) (TOK_SELEXPR
(. (TOK_TABLE_OR_COL b) err3))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a)
key)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Lateral View Forward
+              Select Operator
+                SELECT * : (no compute)
+                Lateral View Join Operator
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10, _col11, _col12, _col13
+                  Select Operator
+                    expressions:
+                          expr: _col0
+                          type: string
+                          expr: _col2
+                          type: string
+                          expr: _col4
+                          type: string
+                          expr: _col10
+                          type: string
+                          expr: _col11
+                          type: string
+                          expr: _col12
+                          type: string
+                          expr: _col13
+                          type: string
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                    Reduce Output Operator
+                      key expressions:
+                            expr: _col0
+                            type: string
+                      sort order: +
+                      tag: -1
+                      value expressions:
+                            expr: _col0
+                            type: string
+                            expr: _col1
+                            type: string
+                            expr: _col2
+                            type: string
+                            expr: _col3
+                            type: string
+                            expr: _col4
+                            type: string
+                            expr: _col5
+                            type: string
+                            expr: _col6
+                            type: string
+              Select Operator
+                expressions:
+                      expr: fullurl
+                      type: string
+                      expr: 'HOST'
+                      type: string
+                      expr: 'PATH'
+                      type: string
+                      expr: 'QUERY'
+                      type: string
+                      expr: 'REF'
+                      type: string
+                      expr: 'PROTOCOL'
+                      type: string
+                      expr: 'FILE'
+                      type: string
+                      expr: 'AUTHORITY'
+                      type: string
+                      expr: 'USERINFO'
+                      type: string
+                      expr: 'QUERY:k1'
+                      type: string
+                      expr: 'host'
+                      type: string
+                      expr: 'query'
+                      type: string
+                      expr: 'QUERY:nonExistCol'
+                      type: string
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10, _col11, _col12
+                UDTF Operator
+                  function name: parse_url_tuple
+                  Lateral View Join Operator
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10, _col11, _col12, _col13
+                    Select Operator
+                      expressions:
+                            expr: _col0
+                            type: string
+                            expr: _col2
+                            type: string
+                            expr: _col4
+                            type: string
+                            expr: _col10
+                            type: string
+                            expr: _col11
+                            type: string
+                            expr: _col12
+                            type: string
+                            expr: _col13
+                            type: string
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+                      Reduce Output Operator
+                        key expressions:
+                              expr: _col0
+                              type: string
+                        sort order: +
+                        tag: -1
+                        value expressions:
+                              expr: _col0
+                              type: string
+                              expr: _col1
+                              type: string
+                              expr: _col2
+                              type: string
+                              expr: _col3
+                              type: string
+                              expr: _col4
+                              type: string
+                              expr: _col5
+                              type: string
+                              expr: _col6
+                              type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select a.key, b.ho, b.qu, b.qk1, b.err1, b.err2, b.err3 from url_t a lateral
view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY',
'USERINFO', 'QUERY:k1', 'host', 'query', 'QUERY:nonExistCol') b as ho, pa, qu, re, pr, fi,
au, us, qk1, err1, err2, err3 order by a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@url_t
+PREHOOK: Output: file:/tmp/xjin/hive_2010-09-23_14-33-12_205_3636054115754495430/-mr-10000
+POSTHOOK: query: select a.key, b.ho, b.qu, b.qk1, b.err1, b.err2, b.err3 from url_t a lateral
view parse_url_tuple(a.fullurl, 'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY',
'USERINFO', 'QUERY:k1', 'host', 'query', 'QUERY:nonExistCol') b as ho, pa, qu, re, pr, fi,
au, us, qk1, err1, err2, err3 order by a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@url_t
+POSTHOOK: Output: file:/tmp/xjin/hive_2010-09-23_14-33-12_205_3636054115754495430/-mr-10000
+POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
+POSTHOOK: Lineage: url_t.key EXPRESSION []
+1	facebook.com	k1=v1&k2=v2	v1	NULL	NULL	NULL
+2	www.socs.uts.edu.au	k1=tps	tps	NULL	NULL	NULL
+3	sites.google.com	NULL	NULL	NULL	NULL	NULL
+4	NULL	NULL	NULL	NULL	NULL	NULL
+5	NULL	NULL	NULL	NULL	NULL	NULL
+6	NULL	NULL	NULL	NULL	NULL	NULL
+PREHOOK: query: explain
+select ho, count(*) from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH',
'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu,
re, pr, fi, au, us, qk1 where qk1 is not null group by ho
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select ho, count(*) from url_t a lateral view parse_url_tuple(a.fullurl, 'HOST', 'PATH',
'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b as ho, pa, qu,
re, pr, fi, au, us, qk1 where qk1 is not null group by ho
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
+POSTHOOK: Lineage: url_t.key EXPRESSION []
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION parse_url_tuple
(. (TOK_TABLE_OR_COL a) fullurl) 'HOST' 'PATH' 'QUERY' 'REF' 'PROTOCOL' 'FILE' 'AUTHORITY'
'USERINFO' 'QUERY:k1') ho pa qu re pr fi au us qk1 (TOK_TABALIAS b))) (TOK_TABREF url_t a)))
(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL
ho)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL
qk1))) (TOK_GROUPBY (TOK_TABLE_OR_COL ho))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        a 
+          TableScan
+            alias: a
+            Lateral View Forward
+              Select Operator
+                SELECT * : (no compute)
+                Lateral View Join Operator
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10
+                  Filter Operator
+                    predicate:
+                        expr: _col10 is not null
+                        type: boolean
+                    Select Operator
+                      expressions:
+                            expr: _col2
+                            type: string
+                      outputColumnNames: _col2
+                      Group By Operator
+                        aggregations:
+                              expr: count()
+                        bucketGroup: false
+                        keys:
+                              expr: _col2
+                              type: string
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Reduce Output Operator
+                          key expressions:
+                                expr: _col0
+                                type: string
+                          sort order: +
+                          Map-reduce partition columns:
+                                expr: _col0
+                                type: string
+                          tag: -1
+                          value expressions:
+                                expr: _col1
+                                type: bigint
+              Select Operator
+                expressions:
+                      expr: fullurl
+                      type: string
+                      expr: 'HOST'
+                      type: string
+                      expr: 'PATH'
+                      type: string
+                      expr: 'QUERY'
+                      type: string
+                      expr: 'REF'
+                      type: string
+                      expr: 'PROTOCOL'
+                      type: string
+                      expr: 'FILE'
+                      type: string
+                      expr: 'AUTHORITY'
+                      type: string
+                      expr: 'USERINFO'
+                      type: string
+                      expr: 'QUERY:k1'
+                      type: string
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9
+                UDTF Operator
+                  function name: parse_url_tuple
+                  Lateral View Join Operator
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10
+                    Filter Operator
+                      predicate:
+                          expr: _col10 is not null
+                          type: boolean
+                      Select Operator
+                        expressions:
+                              expr: _col2
+                              type: string
+                        outputColumnNames: _col2
+                        Group By Operator
+                          aggregations:
+                                expr: count()
+                          bucketGroup: false
+                          keys:
+                                expr: _col2
+                                type: string
+                          mode: hash
+                          outputColumnNames: _col0, _col1
+                          Reduce Output Operator
+                            key expressions:
+                                  expr: _col0
+                                  type: string
+                            sort order: +
+                            Map-reduce partition columns:
+                                  expr: _col0
+                                  type: string
+                            tag: -1
+                            value expressions:
+                                  expr: _col1
+                                  type: bigint
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(VALUE._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select ho, count(*) from url_t a lateral view parse_url_tuple(a.fullurl,
'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b
as ho, pa, qu, re, pr, fi, au, us, qk1 where qk1 is not null group by ho
+PREHOOK: type: QUERY
+PREHOOK: Input: default@url_t
+PREHOOK: Output: file:/tmp/xjin/hive_2010-09-23_14-33-15_195_8128654165273206766/-mr-10000
+POSTHOOK: query: select ho, count(*) from url_t a lateral view parse_url_tuple(a.fullurl,
'HOST', 'PATH', 'QUERY', 'REF', 'PROTOCOL', 'FILE', 'AUTHORITY', 'USERINFO', 'QUERY:k1') b
as ho, pa, qu, re, pr, fi, au, us, qk1 where qk1 is not null group by ho
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@url_t
+POSTHOOK: Output: file:/tmp/xjin/hive_2010-09-23_14-33-15_195_8128654165273206766/-mr-10000
+POSTHOOK: Lineage: url_t.fullurl EXPRESSION []
+POSTHOOK: Lineage: url_t.key EXPRESSION []
+facebook.com	1
+www.socs.uts.edu.au	1



Mime
View raw message