hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ct...@apache.org
Subject hive git commit: HIVE-14205: Hive doesn't support union type with AVRO file format (Yibing Shi, via Chaoyu Tang)
Date Wed, 27 Jul 2016 03:43:56 GMT
Repository: hive
Updated Branches:
  refs/heads/master 7b6516d7f -> c922546e8


HIVE-14205: Hive doesn't support union type with AVRO file format (Yibing Shi, via Chaoyu
Tang)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c922546e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c922546e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c922546e

Branch: refs/heads/master
Commit: c922546e850b25797f38e83e3f05e55ee5ea58f3
Parents: 7b6516d
Author: ctang <ctang@cloudera.com>
Authored: Tue Jul 26 23:43:36 2016 -0400
Committer: ctang <ctang@cloudera.com>
Committed: Tue Jul 26 23:43:36 2016 -0400

----------------------------------------------------------------------
 .../clientnegative/avro_non_nullable_union.q    |  19 +++
 .../clientpositive/avro_nullable_union.q        |  39 +++++
 .../avro_non_nullable_union.q.out               |  42 ++++++
 .../clientpositive/avro_nullable_union.q.out    | 145 +++++++++++++++++++
 .../hive/serde2/avro/AvroDeserializer.java      |  22 ++-
 .../hadoop/hive/serde2/avro/AvroSerdeUtils.java |  39 ++++-
 .../hive/serde2/avro/TestAvroDeserializer.java  |   6 +-
 .../hive/serde2/avro/TestAvroSerdeUtils.java    |  36 ++++-
 8 files changed, 329 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c922546e/ql/src/test/queries/clientnegative/avro_non_nullable_union.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/avro_non_nullable_union.q b/ql/src/test/queries/clientnegative/avro_non_nullable_union.q
new file mode 100644
index 0000000..16e7cfa
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/avro_non_nullable_union.q
@@ -0,0 +1,19 @@
+-- verify that Hive fails to read a union type column from avro file with null union data
if AVRO schema definition is not nullable
+
+DROP TABLE IF EXISTS union_nullable_test_text;
+
+CREATE TABLE union_nullable_test_text (id int, value uniontype<int,double>) ROW FORMAT
DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ':' STORED AS textfile;
+
+LOAD DATA LOCAL INPATH '../../data/files/union_nullable.txt' INTO TABLE union_nullable_test_text;
+
+DROP TABLE IF EXISTS union_non_nullable_test_avro;
+
+CREATE TABLE union_non_nullable_test_avro STORED AS avro tblproperties('avro.schema.literal'='{"name":"nullable",
"type":"record", "fields":[{"name":"id", "type":"int"}, {"name":"value", "type":["int", "double"]}]}');
+
+INSERT OVERWRITE TABLE union_non_nullable_test_avro SELECT * FROM union_nullable_test_text;
+
+SELECT * FROM union_non_nullable_test_avro;
+
+DROP TABLE union_nullable_test_text;
+DROP TABLE union_non_nullable_test_avro;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/c922546e/ql/src/test/queries/clientpositive/avro_nullable_union.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/avro_nullable_union.q b/ql/src/test/queries/clientpositive/avro_nullable_union.q
new file mode 100644
index 0000000..8db815a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/avro_nullable_union.q
@@ -0,0 +1,39 @@
+-- verify that we can write a nullable union type column with both nullable and non-nullable
data
+
+DROP TABLE IF EXISTS union_nullable_test_text;
+
+CREATE TABLE union_nullable_test_text (id int, value uniontype<int,double>) ROW FORMAT
DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ':' STORED AS textfile;
+
+LOAD DATA LOCAL INPATH '../../data/files/union_nullable.txt' INTO TABLE union_nullable_test_text;
+
+DROP TABLE IF EXISTS union_nullable_test_avro;
+
+CREATE TABLE union_nullable_test_avro STORED AS avro tblproperties('avro.schema.literal'='{"name":"nullable",
"type":"record", "fields":[{"name":"id", "type":"int"}, {"name":"value", "type":["null", "int",
"double"]}]}');
+
+INSERT OVERWRITE TABLE union_nullable_test_avro SELECT * FROM union_nullable_test_text;
+
+SELECT * FROM union_nullable_test_avro;
+
+DROP TABLE union_nullable_test_avro;
+DROP TABLE union_nullable_test_text;
+
+
+-- verify that we can write a non nullable union type column with non-nullable data
+
+DROP TABLE IF EXISTS union_non_nullable_test_text;
+
+CREATE TABLE union_non_nullable_test_text (id int, value uniontype<int,double>) ROW
FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ':' STORED AS textfile;
+
+LOAD DATA LOCAL INPATH '../../data/files/union_non_nullable.txt' INTO TABLE union_non_nullable_test_text;
+
+DROP TABLE IF EXISTS union_non_nullable_test_avro;
+
+CREATE TABLE union_non_nullable_test_avro STORED AS avro tblproperties('avro.schema.literal'='{"name":"nullable",
"type":"record", "fields":[{"name":"id", "type":"int"}, {"name":"value", "type":["int", "double"]}]}');
+
+INSERT OVERWRITE TABLE union_non_nullable_test_avro SELECT * FROM union_non_nullable_test_text;
+
+SELECT * FROM union_non_nullable_test_avro;
+
+DROP TABLE union_non_nullable_test_text;
+DROP TABLE union_non_nullable_test_avro;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/c922546e/ql/src/test/results/clientnegative/avro_non_nullable_union.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/avro_non_nullable_union.q.out b/ql/src/test/results/clientnegative/avro_non_nullable_union.q.out
new file mode 100644
index 0000000..6c087b8
--- /dev/null
+++ b/ql/src/test/results/clientnegative/avro_non_nullable_union.q.out
@@ -0,0 +1,42 @@
+PREHOOK: query: -- verify that Hive fails to read a union type column from avro file with
null union data if AVRO schema definition is not nullable
+
+DROP TABLE IF EXISTS union_nullable_test_text
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- verify that Hive fails to read a union type column from avro file with
null union data if AVRO schema definition is not nullable
+
+DROP TABLE IF EXISTS union_nullable_test_text
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE union_nullable_test_text (id int, value uniontype<int,double>)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ':' STORED AS
textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@union_nullable_test_text
+POSTHOOK: query: CREATE TABLE union_nullable_test_text (id int, value uniontype<int,double>)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ':' STORED AS
textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@union_nullable_test_text
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/union_nullable.txt' INTO TABLE union_nullable_test_text
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@union_nullable_test_text
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/union_nullable.txt' INTO TABLE
union_nullable_test_text
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@union_nullable_test_text
+PREHOOK: query: DROP TABLE IF EXISTS union_non_nullable_test_avro
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS union_non_nullable_test_avro
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE union_non_nullable_test_avro STORED AS avro tblproperties('avro.schema.literal'='{"name":"nullable",
"type":"record", "fields":[{"name":"id", "type":"int"}, {"name":"value", "type":["int", "double"]}]}')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@union_non_nullable_test_avro
+POSTHOOK: query: CREATE TABLE union_non_nullable_test_avro STORED AS avro tblproperties('avro.schema.literal'='{"name":"nullable",
"type":"record", "fields":[{"name":"id", "type":"int"}, {"name":"value", "type":["int", "double"]}]}')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@union_non_nullable_test_avro
+PREHOOK: query: INSERT OVERWRITE TABLE union_non_nullable_test_avro SELECT * FROM union_nullable_test_text
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_nullable_test_text
+PREHOOK: Output: default@union_non_nullable_test_avro
+#### A masked pattern was here ####
+FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask

http://git-wip-us.apache.org/repos/asf/hive/blob/c922546e/ql/src/test/results/clientpositive/avro_nullable_union.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/avro_nullable_union.q.out b/ql/src/test/results/clientpositive/avro_nullable_union.q.out
new file mode 100644
index 0000000..b80182e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/avro_nullable_union.q.out
@@ -0,0 +1,145 @@
+PREHOOK: query: -- verify that we can write a nullable union type column with both nullable
and non-nullable data
+
+DROP TABLE IF EXISTS union_nullable_test_text
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- verify that we can write a nullable union type column with both nullable
and non-nullable data
+
+DROP TABLE IF EXISTS union_nullable_test_text
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE union_nullable_test_text (id int, value uniontype<int,double>)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ':' STORED AS
textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@union_nullable_test_text
+POSTHOOK: query: CREATE TABLE union_nullable_test_text (id int, value uniontype<int,double>)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ':' STORED AS
textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@union_nullable_test_text
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/union_nullable.txt' INTO TABLE union_nullable_test_text
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@union_nullable_test_text
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/union_nullable.txt' INTO TABLE
union_nullable_test_text
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@union_nullable_test_text
+PREHOOK: query: DROP TABLE IF EXISTS union_nullable_test_avro
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS union_nullable_test_avro
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE union_nullable_test_avro STORED AS avro tblproperties('avro.schema.literal'='{"name":"nullable",
"type":"record", "fields":[{"name":"id", "type":"int"}, {"name":"value", "type":["null", "int",
"double"]}]}')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@union_nullable_test_avro
+POSTHOOK: query: CREATE TABLE union_nullable_test_avro STORED AS avro tblproperties('avro.schema.literal'='{"name":"nullable",
"type":"record", "fields":[{"name":"id", "type":"int"}, {"name":"value", "type":["null", "int",
"double"]}]}')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@union_nullable_test_avro
+PREHOOK: query: INSERT OVERWRITE TABLE union_nullable_test_avro SELECT * FROM union_nullable_test_text
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_nullable_test_text
+PREHOOK: Output: default@union_nullable_test_avro
+POSTHOOK: query: INSERT OVERWRITE TABLE union_nullable_test_avro SELECT * FROM union_nullable_test_text
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_nullable_test_text
+POSTHOOK: Output: default@union_nullable_test_avro
+POSTHOOK: Lineage: union_nullable_test_avro.id SIMPLE [(union_nullable_test_text)union_nullable_test_text.FieldSchema(name:id,
type:int, comment:null), ]
+POSTHOOK: Lineage: union_nullable_test_avro.value SIMPLE [(union_nullable_test_text)union_nullable_test_text.FieldSchema(name:value,
type:uniontype<int,double>, comment:null), ]
+PREHOOK: query: SELECT * FROM union_nullable_test_avro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_nullable_test_avro
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM union_nullable_test_avro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_nullable_test_avro
+#### A masked pattern was here ####
+1	{0:1}
+2	{1:2.3}
+3	NULL
+PREHOOK: query: DROP TABLE union_nullable_test_avro
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@union_nullable_test_avro
+PREHOOK: Output: default@union_nullable_test_avro
+POSTHOOK: query: DROP TABLE union_nullable_test_avro
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@union_nullable_test_avro
+POSTHOOK: Output: default@union_nullable_test_avro
+PREHOOK: query: DROP TABLE union_nullable_test_text
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@union_nullable_test_text
+PREHOOK: Output: default@union_nullable_test_text
+POSTHOOK: query: DROP TABLE union_nullable_test_text
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@union_nullable_test_text
+POSTHOOK: Output: default@union_nullable_test_text
+PREHOOK: query: -- verify that we can write a non nullable union type column with non-nullable
data
+
+DROP TABLE IF EXISTS union_non_nullable_test_text
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: -- verify that we can write a non nullable union type column with non-nullable
data
+
+DROP TABLE IF EXISTS union_non_nullable_test_text
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE union_non_nullable_test_text (id int, value uniontype<int,double>)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ':' STORED AS
textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@union_non_nullable_test_text
+POSTHOOK: query: CREATE TABLE union_non_nullable_test_text (id int, value uniontype<int,double>)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ':' STORED AS
textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@union_non_nullable_test_text
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/union_non_nullable.txt' INTO TABLE
union_non_nullable_test_text
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@union_non_nullable_test_text
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/union_non_nullable.txt' INTO TABLE
union_non_nullable_test_text
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@union_non_nullable_test_text
+PREHOOK: query: DROP TABLE IF EXISTS union_non_nullable_test_avro
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS union_non_nullable_test_avro
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE union_non_nullable_test_avro STORED AS avro tblproperties('avro.schema.literal'='{"name":"nullable",
"type":"record", "fields":[{"name":"id", "type":"int"}, {"name":"value", "type":["int", "double"]}]}')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@union_non_nullable_test_avro
+POSTHOOK: query: CREATE TABLE union_non_nullable_test_avro STORED AS avro tblproperties('avro.schema.literal'='{"name":"nullable",
"type":"record", "fields":[{"name":"id", "type":"int"}, {"name":"value", "type":["int", "double"]}]}')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@union_non_nullable_test_avro
+PREHOOK: query: INSERT OVERWRITE TABLE union_non_nullable_test_avro SELECT * FROM union_non_nullable_test_text
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_non_nullable_test_text
+PREHOOK: Output: default@union_non_nullable_test_avro
+POSTHOOK: query: INSERT OVERWRITE TABLE union_non_nullable_test_avro SELECT * FROM union_non_nullable_test_text
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_non_nullable_test_text
+POSTHOOK: Output: default@union_non_nullable_test_avro
+POSTHOOK: Lineage: union_non_nullable_test_avro.id SIMPLE [(union_non_nullable_test_text)union_non_nullable_test_text.FieldSchema(name:id,
type:int, comment:null), ]
+POSTHOOK: Lineage: union_non_nullable_test_avro.value SIMPLE [(union_non_nullable_test_text)union_non_nullable_test_text.FieldSchema(name:value,
type:uniontype<int,double>, comment:null), ]
+PREHOOK: query: SELECT * FROM union_non_nullable_test_avro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@union_non_nullable_test_avro
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM union_non_nullable_test_avro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@union_non_nullable_test_avro
+#### A masked pattern was here ####
+1	{0:1}
+2	{1:2.3}
+PREHOOK: query: DROP TABLE union_non_nullable_test_text
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@union_non_nullable_test_text
+PREHOOK: Output: default@union_non_nullable_test_text
+POSTHOOK: query: DROP TABLE union_non_nullable_test_text
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@union_non_nullable_test_text
+POSTHOOK: Output: default@union_non_nullable_test_text
+PREHOOK: query: DROP TABLE union_non_nullable_test_avro
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@union_non_nullable_test_avro
+PREHOOK: Output: default@union_non_nullable_test_avro
+POSTHOOK: query: DROP TABLE union_non_nullable_test_avro
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@union_non_nullable_test_avro
+POSTHOOK: Output: default@union_non_nullable_test_avro

http://git-wip-us.apache.org/repos/asf/hive/blob/c922546e/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
index 6165138..88c3da9 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
@@ -306,9 +306,28 @@ class AvroDeserializer {
    */
   private Object deserializeNullableUnion(Object datum, Schema fileSchema, Schema recordSchema)
                                             throws AvroSerdeException {
+    if (recordSchema.getTypes().size() == 2) {
+      // A type like [NULL, T]
+      return deserializeSingleItemNullableUnion(datum, fileSchema, recordSchema);
+    } else {
+      // Types like [NULL, T1, T2, ...]
+      if (datum == null) {
+        return null;
+      } else {
+        Schema newRecordSchema = AvroSerdeUtils.getOtherTypeFromNullableType(recordSchema);
+        return worker(datum, fileSchema, newRecordSchema,
+            SchemaToTypeInfo.generateTypeInfo(newRecordSchema, null));
+      }
+    }
+  }
+
+  private Object deserializeSingleItemNullableUnion(Object datum,
+                                                    Schema fileSchema,
+                                                    Schema recordSchema)
+      throws AvroSerdeException {
     int tag = GenericData.get().resolveUnion(recordSchema, datum); // Determine index of
value
     Schema schema = recordSchema.getTypes().get(tag);
-    if (schema.getType().equals(Schema.Type.NULL)) {
+    if (schema.getType().equals(Type.NULL)) {
       return null;
     }
 
@@ -344,7 +363,6 @@ class AvroDeserializer {
     }
     return worker(datum, currentFileSchema, schema,
       SchemaToTypeInfo.generateTypeInfo(schema, null));
-
   }
 
   private Object deserializeStruct(GenericData.Record datum, Schema fileSchema, StructTypeInfo
columnType)

http://git-wip-us.apache.org/repos/asf/hive/blob/c922546e/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
index 08ee62b..3a539b2 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
@@ -43,6 +43,7 @@ import java.nio.Buffer;
 import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.List;
+import java.util.ArrayList;
 import java.util.Properties;
 
 /**
@@ -174,14 +175,29 @@ public class AvroSerdeUtils {
    * types via a union of type T and null.  This is a very common use case.
    * As such, we want to silently convert it to just T and allow the value to be null.
    *
+   * When a Hive union type is used with AVRO, the schema type becomes
+   * Union[NULL, T1, T2, ...]. The NULL in the union should be silently removed
+   *
    * @return true if type represents Union[T, Null], false otherwise
    */
   public static boolean isNullableType(Schema schema) {
-    return schema.getType().equals(Schema.Type.UNION) &&
-           schema.getTypes().size() == 2 &&
-             (schema.getTypes().get(0).getType().equals(Schema.Type.NULL) ||
-              schema.getTypes().get(1).getType().equals(Schema.Type.NULL));
-      // [null, null] not allowed, so this check is ok.
+    if (!schema.getType().equals(Schema.Type.UNION)) {
+      return false;
+    }
+
+    List<Schema> itemSchemas = schema.getTypes();
+    if (itemSchemas.size() < 2) {
+      return false;
+    }
+
+    for (Schema itemSchema : itemSchemas) {
+      if (Schema.Type.NULL.equals(itemSchema.getType())) {
+        return true;
+      }
+    }
+
+    // [null, null] not allowed, so this check is ok.
+    return false;
   }
 
   /**
@@ -189,9 +205,18 @@ public class AvroSerdeUtils {
    * does no checking that the provides Schema is nullable.
    */
   public static Schema getOtherTypeFromNullableType(Schema schema) {
-    List<Schema> types = schema.getTypes();
+    List<Schema> itemSchemas = new ArrayList<>();
+    for (Schema itemSchema : schema.getTypes()) {
+      if (!Schema.Type.NULL.equals(itemSchema.getType())) {
+        itemSchemas.add(itemSchema);
+      }
+    }
 
-    return types.get(0).getType().equals(Schema.Type.NULL) ? types.get(1) : types.get(0);
+    if (itemSchemas.size() > 1) {
+      return Schema.createUnion(itemSchemas);
+    } else {
+      return itemSchemas.get(0);
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hive/blob/c922546e/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java
index 986b803..3dc3331 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java
@@ -291,7 +291,8 @@ public class TestAvroDeserializer {
     assertTrue(result.value instanceof String);
     assertEquals("this is a string", result.value);
     UnionObjectInspector uoi = (UnionObjectInspector)result.oi;
-    assertEquals(2, uoi.getTag(result.unionObject));
+    // The null in union type should be removed
+    assertEquals(1, uoi.getTag(result.unionObject));
 
     // Now the other enum possibility
     record = new GenericData.Record(ws);
@@ -300,7 +301,8 @@ public class TestAvroDeserializer {
     assertTrue(result.value instanceof Integer);
     assertEquals(99, result.value);
     uoi = (UnionObjectInspector)result.oi;
-    assertEquals(1, uoi.getTag(result.unionObject));
+    // The null in union type should be removed
+    assertEquals(0, uoi.getTag(result.unionObject));
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/hive/blob/c922546e/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java
index 0013b78..4e1ee69 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java
@@ -26,6 +26,8 @@ import org.junit.Test;
 
 import java.io.IOException;
 import java.net.URISyntaxException;
+import java.util.Arrays;
+import java.util.List;
 import java.util.Properties;
 
 import static org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.EXCEPTION_MESSAGE;
@@ -68,14 +70,32 @@ public class TestAvroSerdeUtils {
 
   @Test
   public void isNullableTypeIdentifiesUnionsOfMoreThanTwoTypes() {
-    String schemaString = "{\n" +
-      "  \"type\": \"record\", \n" +
-      "  \"name\": \"shouldNotPass\",\n" +
-      "  \"fields\" : [\n" +
-      "    {\"name\":\"mayBeNull\", \"type\":[\"string\", \"int\", \"null\"]}\n" +
-      "  ]\n" +
-      "}";
-    testField(schemaString, "mayBeNull", false);
+    List<String> schemaStrings = Arrays.asList(
+        "{\n" +
+            "  \"type\": \"record\", \n" +
+            "  \"name\": \"shouldNotPass\",\n" +
+            "  \"fields\" : [\n" +
+            "    {\"name\":\"mayBeNull\", \"type\":[\"string\", \"int\", \"null\"]}\n" +
+            "  ]\n" +
+            "}",
+        "{\n" +
+            "  \"type\": \"record\", \n" +
+            "  \"name\": \"shouldNotPass\",\n" +
+            "  \"fields\" : [\n" +
+            "    {\"name\":\"mayBeNull\", \"type\":[\"string\", \"null\", \"int\"]}\n" +
+            "  ]\n" +
+            "}",
+        "{\n" +
+            "  \"type\": \"record\", \n" +
+            "  \"name\": \"shouldNotPass\",\n" +
+            "  \"fields\" : [\n" +
+            "    {\"name\":\"mayBeNull\", \"type\":[\"null\", \"string\", \"int\"]}\n" +
+            "  ]\n" +
+            "}"
+    );
+    for (String schemaString : schemaStrings) {
+      testField(schemaString, "mayBeNull", true);
+    }
   }
 
   @Test


Mime
View raw message