hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From na...@apache.org
Subject svn commit: r1398654 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/udf/ java/org/apache/hadoop/hive/ql/udf/generic/ test/queries/clientpositive/ test/results/clientpositive/
Date Tue, 16 Oct 2012 05:35:54 GMT
Author: namit
Date: Tue Oct 16 05:35:53 2012
New Revision: 1398654

URL: http://svn.apache.org/viewvc?rev=1398654&view=rev
Log:
HIVE-3581 get_json_object and json_tuple return null in the presence of new line characters
(Kevin Wilfong via namit)


Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
    hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q
    hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q
    hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out
    hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java?rev=1398654&r1=1398653&r2=1398654&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java Tue Oct 16 05:35:53
2012
@@ -29,6 +29,8 @@ import java.util.regex.Pattern;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
 import org.apache.hadoop.io.Text;
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonParser.Feature;
 import org.codehaus.jackson.map.ObjectMapper;
 import org.codehaus.jackson.map.type.TypeFactory;
 import org.codehaus.jackson.type.JavaType;
@@ -59,7 +61,12 @@ public class UDFJson extends UDF {
   private final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-\\:\\s]+).*");
   private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]");
 
-  private static final ObjectMapper MAPPER = new ObjectMapper();
+  private static final JsonFactory JSON_FACTORY = new JsonFactory();
+  static {
+    // Allows for unescaped ASCII control characters in JSON values
+    JSON_FACTORY.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
+  }
+  private static final ObjectMapper MAPPER = new ObjectMapper(JSON_FACTORY);
   private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class);
 
   // An LRU cache using a linked hash map

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java?rev=1398654&r1=1398653&r2=1398654&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
Tue Oct 16 05:35:53 2012
@@ -35,6 +35,8 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
 import org.apache.hadoop.io.Text;
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonParser.Feature;
 import org.codehaus.jackson.map.ObjectMapper;
 import org.codehaus.jackson.map.type.TypeFactory;
 import org.codehaus.jackson.type.JavaType;
@@ -51,7 +53,12 @@ public class GenericUDTFJSONTuple extend
 
   private static Log LOG = LogFactory.getLog(GenericUDTFJSONTuple.class.getName());
 
-  private static final ObjectMapper MAPPER = new ObjectMapper();
+  private static final JsonFactory JSON_FACTORY = new JsonFactory();
+  static {
+    // Allows for unescaped ASCII control characters in JSON values
+    JSON_FACTORY.enable(Feature.ALLOW_UNQUOTED_CONTROL_CHARS);
+  }
+  private static final ObjectMapper MAPPER = new ObjectMapper(JSON_FACTORY);
   private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class);
 
   int numCols;    // number of output columns

Modified: hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q?rev=1398654&r1=1398653&r2=1398654&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/udf_get_json_object.q Tue Oct 16 05:35:53
2012
@@ -26,4 +26,15 @@ SELECT get_json_object(src_json.json, '$
 
 SELECT get_json_object(src_json.json, '$.zip code') FROM src_json;
 
-SELECT get_json_object(src_json.json, '$.fb:testid') FROM src_json;
\ No newline at end of file
+SELECT get_json_object(src_json.json, '$.fb:testid') FROM src_json;
+
+
+-- Verify that get_json_object can handle new lines in JSON values
+
+CREATE TABLE dest2(c1 STRING) STORED AS RCFILE;
+
+INSERT OVERWRITE TABLE dest2 SELECT '{"a":"b\nc"}' FROM src LIMIT 1;
+
+SELECT * FROM dest2;
+
+SELECT get_json_object(c1, '$.a') FROM dest2;
\ No newline at end of file

Modified: hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q?rev=1398654&r1=1398653&r2=1398654&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q Tue Oct 16 05:35:53 2012
@@ -34,3 +34,14 @@ explain 
 select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4',
'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2 order by f2;
 
 select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4',
'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2 order by f2;
+
+
+-- Verify that json_tuple can handle new lines in JSON values
+
+CREATE TABLE dest1(c1 STRING) STORED AS RCFILE;
+
+INSERT OVERWRITE TABLE dest1 SELECT '{"a":"b\nc"}' FROM src LIMIT 1;
+
+SELECT * FROM dest1;
+
+SELECT json FROM dest1 a LATERAL VIEW json_tuple(c1, 'a') b AS json;
\ No newline at end of file

Modified: hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out?rev=1398654&r1=1398653&r2=1398654&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out Tue Oct 16 05:35:53
2012
@@ -173,3 +173,47 @@ POSTHOOK: Input: default@src_json
 #### A masked pattern was here ####
 POSTHOOK: Lineage: dest1.c1 SIMPLE []
 1234
+PREHOOK: query: -- Verify that get_json_object can handle new lines in JSON values
+
+CREATE TABLE dest2(c1 STRING) STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Verify that get_json_object can handle new lines in JSON values
+
+CREATE TABLE dest2(c1 STRING) STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+PREHOOK: query: INSERT OVERWRITE TABLE dest2 SELECT '{"a":"b\nc"}' FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest2
+POSTHOOK: query: INSERT OVERWRITE TABLE dest2 SELECT '{"a":"b\nc"}' FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: dest2.c1 SIMPLE []
+PREHOOK: query: SELECT * FROM dest2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM dest2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: dest2.c1 SIMPLE []
+{"a":"b
+c"}
+PREHOOK: query: SELECT get_json_object(c1, '$.a') FROM dest2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT get_json_object(c1, '$.a') FROM dest2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: dest2.c1 SIMPLE []
+b
+c

Modified: hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out?rev=1398654&r1=1398653&r2=1398654&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out Tue Oct 16 05:35:53
2012
@@ -571,3 +571,51 @@ POSTHOOK: Lineage: json_t.key EXPRESSION
 NULL	1
 2	2
 value2	1
+PREHOOK: query: -- Verify that json_tuple can handle new lines in JSON values
+
+CREATE TABLE dest1(c1 STRING) STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Verify that json_tuple can handle new lines in JSON values
+
+CREATE TABLE dest1(c1 STRING) STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT '{"a":"b\nc"}' FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT '{"a":"b\nc"}' FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+PREHOOK: query: SELECT * FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+{"a":"b
+c"}
+PREHOOK: query: SELECT json FROM dest1 a LATERAL VIEW json_tuple(c1, 'a') b AS json
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT json FROM dest1 a LATERAL VIEW json_tuple(c1, 'a') b AS json
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.c1 SIMPLE []
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+b
+c



Mime
View raw message