hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From x..@apache.org
Subject hive git commit: HIVE-18209: Fix API call in VectorizedListColumnReader to get value from BytesColumnVector (Colin Ma, reviewed by Ferdinand Xu)
Date Mon, 18 Dec 2017 02:16:03 GMT
Repository: hive
Updated Branches:
  refs/heads/master 7acc4ce1b -> 11227ebab


HIVE-18209: Fix API call in VectorizedListColumnReader to get value from BytesColumnVector
(Colin Ma, reviewed by Ferdinand Xu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/11227eba
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/11227eba
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/11227eba

Branch: refs/heads/master
Commit: 11227ebab390df10970fb8ef61f3e24421d6c66e
Parents: 7acc4ce
Author: Ferdinand Xu <cheng.a.xu@intel.com>
Authored: Mon Dec 18 10:01:13 2017 +0800
Committer: Ferdinand Xu <cheng.a.xu@intel.com>
Committed: Mon Dec 18 10:01:13 2017 +0800

----------------------------------------------------------------------
 .../vector/VectorizedListColumnReader.java      |  3 +-
 .../parquet/TestVectorizedListColumnReader.java | 34 +++++++++++++++++++-
 .../parquet/VectorizedColumnReaderTestBase.java |  1 +
 3 files changed, 36 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/11227eba/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
index ea4f2f2..12af77c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedListColumnReader.java
@@ -258,7 +258,8 @@ public class VectorizedListColumnReader extends BaseVectorizedColumnReader
{
         lcv.child = new BytesColumnVector(total);
         lcv.child.init();
         for (int i = 0; i < valueList.size(); i++) {
-          ((BytesColumnVector)lcv.child).setVal(i, ((List<byte[]>)valueList).get(i));
+          byte[] src = ((List<byte[]>)valueList).get(i);
+          ((BytesColumnVector)lcv.child).setRef(i, src, 0, src.length);
         }
         break;
       case FLOAT:

http://git-wip-us.apache.org/repos/asf/hive/blob/11227eba/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java
index de19615..8ea5d25 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedListColumnReader.java
@@ -72,8 +72,9 @@ public class TestVectorizedListColumnReader extends VectorizedColumnReaderTestBa
         }
       }
       for (int j = 0; j < listMaxSize; j++) {
-        group.append("list_int32_field_for_repeat_test", getIntValue(isDictionaryEncoding,
j));
+        group.append("list_binary_field_for_repeat_test", getBinaryValue(isDictionaryEncoding,
i));
       }
+
       writer.write(group);
     }
     writer.close();
@@ -157,6 +158,14 @@ public class TestVectorizedListColumnReader extends VectorizedColumnReaderTestBa
     removeFile();
   }
 
+  @Test
+  public void testUnrepeatedStringWithoutNullListRead() throws Exception {
+    removeFile();
+    writeListData(initWriterFromFile(), false, 1025);
+    testUnRepeateStringWithoutNullListRead();
+    removeFile();
+  }
+
   private void testListReadAllType(boolean isDictionaryEncoding, int elementNum) throws Exception
{
     testListRead(isDictionaryEncoding, "int", elementNum);
     testListRead(isDictionaryEncoding, "long", elementNum);
@@ -250,6 +259,10 @@ public class TestVectorizedListColumnReader extends VectorizedColumnReaderTestBa
     try {
       while (reader.next(NullWritable.get(), previous)) {
         ListColumnVector vector = (ListColumnVector) previous.cols[0];
+
+        //since Repeating only happens when offset length is 1.
+        assertEquals((vector.offsets.length == 1),vector.isRepeating);
+
         for (int i = 0; i < vector.offsets.length; i++) {
           if (row == elementNum) {
             assertEquals(i, vector.offsets.length - 1);
@@ -305,4 +318,23 @@ public class TestVectorizedListColumnReader extends VectorizedColumnReaderTestBa
       reader.close();
     }
   }
+
+  private void testUnRepeateStringWithoutNullListRead() throws Exception {
+    Configuration conf = new Configuration();
+    conf.set(IOConstants.COLUMNS, "list_binary_field_for_repeat_test");
+    conf.set(IOConstants.COLUMNS_TYPES, "array<string>");
+    conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
+    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
+    VectorizedParquetRecordReader reader = createTestParquetReader(
+        "message hive_schema {repeated binary list_binary_field_for_repeat_test;}", conf);
+    VectorizedRowBatch previous = reader.createValue();
+    try {
+      while (reader.next(NullWritable.get(), previous)) {
+        ListColumnVector vector = (ListColumnVector) previous.cols[0];
+        assertEquals((vector.offsets.length == 1),vector.isRepeating);
+      }
+    } finally {
+      reader.close();
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/11227eba/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java
index 33c5c82..db7777d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java
@@ -124,6 +124,7 @@ public class VectorizedColumnReaderTestBase {
       + "repeated fixed_len_byte_array(3) list_byte_array_field;"
       + "repeated binary list_binary_field;"
       + "repeated binary list_decimal_field (DECIMAL(5,2));"
+      + "repeated binary list_binary_field_for_repeat_test;"
       + "repeated int32 list_int32_field_for_repeat_test;"
       + "repeated group map_int32 (MAP_KEY_VALUE) {\n"
       + "  required int32 key;\n"


Mime
View raw message