Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id B9FBB200BA6 for ; Tue, 4 Oct 2016 07:08:39 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id B8904160AE5; Tue, 4 Oct 2016 05:08:39 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id A0B83160ADC for ; Tue, 4 Oct 2016 07:08:38 +0200 (CEST) Received: (qmail 41330 invoked by uid 500); 4 Oct 2016 05:08:37 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 41319 invoked by uid 99); 4 Oct 2016 05:08:37 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 04 Oct 2016 05:08:37 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 76406DFB89; Tue, 4 Oct 2016 05:08:37 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: pxiong@apache.org To: commits@hive.apache.org Message-Id: <4b2d9eee5fb242f69fc1d00aa1c1a471@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-14768: Add a new UDTF Replicate_Rows (Pengcheng Xiong, reviewed by Ashutosh Chauhan) Date: Tue, 4 Oct 2016 05:08:37 +0000 (UTC) archived-at: Tue, 04 Oct 2016 05:08:39 -0000 Repository: hive Updated Branches: refs/heads/master 0a4b3d8ff -> e19f0e35e HIVE-14768: Add a new UDTF Replicate_Rows (Pengcheng Xiong, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e19f0e35 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e19f0e35 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e19f0e35 Branch: refs/heads/master Commit: e19f0e35e09ca283e5de46ae7e2db1e11396335e Parents: 0a4b3d8 Author: Pengcheng Xiong Authored: Mon Oct 3 22:07:24 2016 -0700 Committer: Pengcheng Xiong Committed: Mon Oct 3 22:07:24 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/FunctionRegistry.java | 1 + .../udf/generic/GenericUDTFReplicateRows.java | 88 +++++++++++++++ .../clientpositive/udtf_replicate_rows.q | 23 ++++ .../results/clientpositive/show_functions.q.out | 1 + .../clientpositive/udtf_replicate_rows.q.out | 107 +++++++++++++++++++ 5 files changed, 220 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 6870dfa..6b29be1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -468,6 +468,7 @@ public final class FunctionRegistry { // Generic UDTF's system.registerGenericUDTF("explode", GenericUDTFExplode.class); + system.registerGenericUDTF("replicate_rows", GenericUDTFReplicateRows.class); system.registerGenericUDTF("inline", GenericUDTFInline.class); system.registerGenericUDTF("json_tuple", GenericUDTFJSONTuple.class); system.registerGenericUDTF("parse_url_tuple", GenericUDTFParseUrlTuple.class); http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java new file mode 100644 index 0000000..164445d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver; +import org.apache.hadoop.hive.serde2.lazy.LazyLong; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.LongWritable; + +/** + * Takes a row of data and repeats n times. + */ +@Description(name = "replicate_rows", value = "_FUNC_(n, cols...) - turns 1 row into n rows") +public class GenericUDTFReplicateRows extends GenericUDTF { + @Override + public void close() throws HiveException { + } + + private transient List argOIs = new ArrayList(); + + @Override + public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException { + if (args.length < 2) { + throw new UDFArgumentException("UDTFReplicateRows() expects at least two arguments."); + } + if (!(args[0] instanceof LongObjectInspector)) { + throw new UDFArgumentException( + "The first argument to UDTFReplicateRows() must be a long (got " + + args[0].getTypeName() + " instead)."); + } + + ArrayList fieldNames = new ArrayList(); + ArrayList fieldOIs = new ArrayList(); + for (int index = 0; index < args.length; ++index) { + fieldNames.add("col" + index); + fieldOIs.add(args[index]); + } + argOIs = fieldOIs; + return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); + } + + @Override + public void process(Object[] args) throws HiveException, UDFArgumentException { + + long numRows = ((LongObjectInspector) argOIs.get(0)).get(args[0]); + + for (long n = 0; n < numRows; n++) { + forward(args); + } + } + + @Override + public String toString() { + return "UDTFReplicateRows"; + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/test/queries/clientpositive/udtf_replicate_rows.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/udtf_replicate_rows.q b/ql/src/test/queries/clientpositive/udtf_replicate_rows.q new file mode 100644 index 0000000..a074a78 --- /dev/null +++ b/ql/src/test/queries/clientpositive/udtf_replicate_rows.q @@ -0,0 +1,23 @@ +set hive.mapred.mode=nonstrict; +set hive.cbo.enable=false; + +DESCRIBE FUNCTION replicate_rows; +DESCRIBE FUNCTION EXTENDED replicate_rows; + +create table t (x bigint, y string, z int); + +insert into table t values (3,'2',0),(2,'3',1),(0,'2',2),(-1,'k',3); + +SELECT replicate_rows(x,y) FROM t; + +SELECT replicate_rows(x,y,y) FROM t; + +SELECT replicate_rows(x,y,y,y,z) FROM t; + +select y,x from (SELECT replicate_rows(x,y) as (x,y) FROM t)subq; + +select z,y,x from(SELECT replicate_rows(x,y,y) as (z,y,x) FROM t)subq; + +SELECT replicate_rows(x,concat(y,'...'),y) FROM t; + + http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/test/results/clientpositive/show_functions.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out index 4a40094..7b746ff 100644 --- a/ql/src/test/results/clientpositive/show_functions.q.out +++ b/ql/src/test/results/clientpositive/show_functions.q.out @@ -186,6 +186,7 @@ regexp_extract regexp_replace repeat replace +replicate_rows reverse rlike round http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out b/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out new file mode 100644 index 0000000..f76a584 --- /dev/null +++ b/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out @@ -0,0 +1,107 @@ +PREHOOK: query: DESCRIBE FUNCTION replicate_rows +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION replicate_rows +POSTHOOK: type: DESCFUNCTION +replicate_rows(n, cols...) - turns 1 row into n rows +PREHOOK: query: DESCRIBE FUNCTION EXTENDED replicate_rows +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED replicate_rows +POSTHOOK: type: DESCFUNCTION +replicate_rows(n, cols...) - turns 1 row into n rows +PREHOOK: query: create table t (x bigint, y string, z int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (x bigint, y string, z int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into table t values (3,'2',0),(2,'3',1),(0,'2',2),(-1,'k',3) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@t +POSTHOOK: query: insert into table t values (3,'2',0),(2,'3',1),(0,'2',2),(-1,'k',3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.y SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.z EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: SELECT replicate_rows(x,y) FROM t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT replicate_rows(x,y) FROM t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +3 2 +3 2 +3 2 +2 3 +2 3 +PREHOOK: query: SELECT replicate_rows(x,y,y) FROM t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT replicate_rows(x,y,y) FROM t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +3 2 2 +3 2 2 +3 2 2 +2 3 3 +2 3 3 +PREHOOK: query: SELECT replicate_rows(x,y,y,y,z) FROM t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT replicate_rows(x,y,y,y,z) FROM t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +3 2 2 2 0 +3 2 2 2 0 +3 2 2 2 0 +2 3 3 3 1 +2 3 3 3 1 +PREHOOK: query: select y,x from (SELECT replicate_rows(x,y) as (x,y) FROM t)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select y,x from (SELECT replicate_rows(x,y) as (x,y) FROM t)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +2 3 +2 3 +2 3 +3 2 +3 2 +PREHOOK: query: select z,y,x from(SELECT replicate_rows(x,y,y) as (z,y,x) FROM t)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select z,y,x from(SELECT replicate_rows(x,y,y) as (z,y,x) FROM t)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +3 2 2 +3 2 2 +3 2 2 +2 3 3 +2 3 3 +PREHOOK: query: SELECT replicate_rows(x,concat(y,'...'),y) FROM t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT replicate_rows(x,concat(y,'...'),y) FROM t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +3 2... 2 +3 2... 2 +3 2... 2 +2 3... 3 +2 3... 3