hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jd...@apache.org
Subject svn commit: r1658589 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/udf/generic/ test/queries/clientpositive/ test/results/clientpositive/
Date Mon, 09 Feb 2015 21:58:55 GMT
Author: jdere
Date: Mon Feb  9 21:58:55 2015
New Revision: 1658589

URL: http://svn.apache.org/r1658589
Log:
HIVE-9587: UDF decode should accept STRING_GROUP types for the second parameter (Alexander
Pivovarov via Jason Dere)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/udf_decode.q
    hive/trunk/ql/src/test/results/clientpositive/udf_decode.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDecode.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEncode.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDecode.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDecode.java?rev=1658589&r1=1658588&r2=1658589&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDecode.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDecode.java Mon
Feb  9 21:58:55 2015
@@ -38,8 +38,8 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
-import org.apache.hadoop.io.Text;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
 
 @Description(name = "decode",
     value = "_FUNC_(bin, str) - Decode the first argument using the second argument character
set",
@@ -47,35 +47,42 @@ import org.apache.hadoop.io.Text;
         "'UTF-8', 'UTF-16BE', 'UTF-16LE', and 'UTF-16'. If either argument\n" +
         "is null, the result will also be null")
 public class GenericUDFDecode extends GenericUDF {
-  private transient CharsetDecoder decoder = null;
-  private transient BinaryObjectInspector bytesOI = null;
-  private transient StringObjectInspector charsetOI = null;
+  private transient CharsetDecoder decoder;
+  private transient BinaryObjectInspector bytesOI;
+  private transient PrimitiveObjectInspector charsetOI;
 
   @Override
   public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException
{
     if (arguments.length != 2) {
-      throw new UDFArgumentLengthException("Encode() requires exactly two arguments");
+      throw new UDFArgumentLengthException("Decode() requires exactly two arguments");
     }
 
     if (arguments[0].getCategory() != Category.PRIMITIVE ||
         ((PrimitiveObjectInspector)arguments[0]).getPrimitiveCategory() != PrimitiveCategory.BINARY){
-      throw new UDFArgumentTypeException(0, "The first argument to Encode() must be a binary");
+      throw new UDFArgumentTypeException(0, "The first argument to Decode() must be a binary");
     }
 
     bytesOI = (BinaryObjectInspector) arguments[0];
 
-    if (arguments[1].getCategory() != Category.PRIMITIVE ||
-        ((PrimitiveObjectInspector)arguments[1]).getPrimitiveCategory() != PrimitiveCategory.STRING){
-      throw new UDFArgumentTypeException(1, "The second argument to Encode() must be a string");
+    if (arguments[1].getCategory() != Category.PRIMITIVE) {
+      throw new UDFArgumentTypeException(1, "The second argument to Decode() must be primitive");
     }
 
-    charsetOI = (StringObjectInspector) arguments[1];
-
-    // If the character set for encoding is constant, we can optimize that
-    StringObjectInspector charSetOI = (StringObjectInspector) arguments[1];
-    if (charSetOI instanceof ConstantObjectInspector){
-      String charSetName = ((Text) ((ConstantObjectInspector) charSetOI).getWritableConstantValue()).toString();
-      decoder = Charset.forName(charSetName).newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
+    charsetOI = (PrimitiveObjectInspector) arguments[1];
+
+    if (PrimitiveGrouping.STRING_GROUP != PrimitiveObjectInspectorUtils
+        .getPrimitiveGrouping(charsetOI.getPrimitiveCategory())) {
+      throw new UDFArgumentTypeException(1,
+          "The second argument to Decode() must be from string group");
+    }
+
+    // If the character set for decoding is constant, we can optimize that
+    if (arguments[1] instanceof ConstantObjectInspector) {
+      String charSetName = ((ConstantObjectInspector) arguments[1]).getWritableConstantValue()
+          .toString();
+      decoder = Charset.forName(charSetName).newDecoder()
+          .onMalformedInput(CodingErrorAction.REPORT)
+          .onUnmappableCharacter(CodingErrorAction.REPORT);
     }
 
     return (ObjectInspector) PrimitiveObjectInspectorFactory.javaStringObjectInspector;
@@ -97,7 +104,8 @@ public class GenericUDFDecode extends Ge
         throw new HiveException(e);
       }
     } else {
-      decoded = Charset.forName(charsetOI.getPrimitiveJavaObject(arguments[1].get())).decode(wrappedBytes);
+      String charSetName = PrimitiveObjectInspectorUtils.getString(arguments[1].get(), charsetOI);
+      decoded = Charset.forName(charSetName).decode(wrappedBytes);
     }
     return decoded.toString();
   }
@@ -106,7 +114,7 @@ public class GenericUDFDecode extends Ge
   public String getDisplayString(String[] children) {
     assert (children.length == 2);
     StringBuilder sb = new StringBuilder();
-    sb.append("encode(");
+    sb.append("decode(");
     sb.append(children[0]).append(",");
     sb.append(children[1]).append(")");
     return sb.toString();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEncode.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEncode.java?rev=1658589&r1=1658588&r2=1658589&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEncode.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEncode.java Mon
Feb  9 21:58:55 2015
@@ -35,13 +35,10 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
 import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
 
 @Description(name = "encode",
 value = "_FUNC_(str, str) - Encode the first argument using the second argument character
set",

Added: hive/trunk/ql/src/test/queries/clientpositive/udf_decode.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udf_decode.q?rev=1658589&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udf_decode.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/udf_decode.q Mon Feb  9 21:58:55 2015
@@ -0,0 +1,10 @@
+DESCRIBE FUNCTION decode;
+DESC FUNCTION EXTENDED decode;
+
+explain select decode(binary('TestDecode1'), 'UTF-8');
+
+select
+decode(binary('TestDecode1'), 'UTF-8'),
+decode(binary('TestDecode2'), cast('UTF-8' as varchar(10))),
+decode(binary('TestDecode3'), cast('UTF-8' as char(5))),
+decode(cast(null as binary), 'UTF-8');
\ No newline at end of file

Added: hive/trunk/ql/src/test/results/clientpositive/udf_decode.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udf_decode.q.out?rev=1658589&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udf_decode.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/udf_decode.q.out Mon Feb  9 21:58:55 2015
@@ -0,0 +1,52 @@
+PREHOOK: query: DESCRIBE FUNCTION decode
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION decode
+POSTHOOK: type: DESCFUNCTION
+decode(bin, str) - Decode the first argument using the second argument character set
+PREHOOK: query: DESC FUNCTION EXTENDED decode
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESC FUNCTION EXTENDED decode
+POSTHOOK: type: DESCFUNCTION
+decode(bin, str) - Decode the first argument using the second argument character set
+Possible options for the character set are 'US_ASCII', 'ISO-8859-1',
+'UTF-8', 'UTF-16BE', 'UTF-16LE', and 'UTF-16'. If either argument
+is null, the result will also be null
+PREHOOK: query: explain select decode(binary('TestDecode1'), 'UTF-8')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select decode(binary('TestDecode1'), 'UTF-8')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: _dummy_table
+          Row Limit Per Split: 1
+          Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
+          Select Operator
+            expressions: 'TestDecode1' (type: string)
+            outputColumnNames: _col0
+            Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
+            ListSink
+
+PREHOOK: query: select
+decode(binary('TestDecode1'), 'UTF-8'),
+decode(binary('TestDecode2'), cast('UTF-8' as varchar(10))),
+decode(binary('TestDecode3'), cast('UTF-8' as char(5))),
+decode(cast(null as binary), 'UTF-8')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select
+decode(binary('TestDecode1'), 'UTF-8'),
+decode(binary('TestDecode2'), cast('UTF-8' as varchar(10))),
+decode(binary('TestDecode3'), cast('UTF-8' as char(5))),
+decode(cast(null as binary), 'UTF-8')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+TestDecode1	TestDecode2	TestDecode3	NULL



Mime
View raw message