impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tarmstr...@apache.org
Subject impala git commit: IMPALA-6370: fix partitioned parquet tables with nested types
Date Fri, 05 Jan 2018 21:14:34 GMT
Repository: impala
Updated Branches:
  refs/heads/master eea8ade36 -> d3ff67b8b


IMPALA-6370: fix partitioned parquet tables with nested types

When materialising a nested collection, has_template_tuple() should use
the template tuple for the collection, not the top-level tuple.

Testing:
Added tests based on nested-types-basic.test that operate on a simple
partitioned table. The tests reliably crashed Impala before the fix.

Change-Id: Ic808b824ce3b31af0539036d8ca23d17b18deab4
Reviewed-on: http://gerrit.cloudera.org:8080/8947
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/d3ff67b8
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/d3ff67b8
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/d3ff67b8

Branch: refs/heads/master
Commit: d3ff67b8b3d7f1ba598ff6a66355562a840d2075
Parents: eea8ade
Author: Tim Armstrong <tarmstrong@cloudera.com>
Authored: Thu Jan 4 16:19:02 2018 -0800
Committer: Impala Public Jenkins <impala-public-jenkins@gerrit.cloudera.org>
Committed: Fri Jan 5 20:44:21 2018 +0000

----------------------------------------------------------------------
 be/src/exec/hdfs-scanner.h                      |   8 +-
 .../nested-types-basic-partitioned.test         | 351 +++++++++++++++++++
 tests/query_test/test_nested_types.py           |  29 ++
 3 files changed, 385 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/d3ff67b8/be/src/exec/hdfs-scanner.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scanner.h b/be/src/exec/hdfs-scanner.h
index 9b80d6c..95bb58a 100644
--- a/be/src/exec/hdfs-scanner.h
+++ b/be/src/exec/hdfs-scanner.h
@@ -428,7 +428,7 @@ class HdfsScanner {
   /// Initialize a tuple. Inlined into the convenience version below for codegen.
   void IR_ALWAYS_INLINE InitTuple(
       const TupleDescriptor* desc, Tuple* template_tuple, Tuple* tuple) {
-    if (has_template_tuple()) {
+    if (has_template_tuple(template_tuple)) {
       InitTupleFromTemplate(template_tuple, tuple, tuple_byte_size());
     } else {
       tuple->ClearNullBits(desc->null_bytes_offset(), desc->num_null_bytes());
@@ -481,9 +481,11 @@ class HdfsScanner {
   /// Not inlined in IR so it can be replaced with a constant.
   int IR_NO_INLINE tuple_byte_size() const { return tuple_byte_size_; }
 
-  /// Returns true iff there is a template tuple with partition key values.
+  /// Returns true iff 'template_tuple' is non-NULL.
   /// Not inlined in IR so it can be replaced with a constant.
-  bool IR_NO_INLINE has_template_tuple() const { return template_tuple_ != nullptr; }
+  static bool IR_NO_INLINE has_template_tuple(Tuple* template_tuple) {
+    return template_tuple != nullptr;
+  }
 
   inline Tuple* next_tuple(int tuple_byte_size, Tuple* t) const {
     uint8_t* mem = reinterpret_cast<uint8_t*>(t);

http://git-wip-us.apache.org/repos/asf/impala/blob/d3ff67b8/testdata/workloads/functional-query/queries/QueryTest/nested-types-basic-partitioned.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-basic-partitioned.test
b/testdata/workloads/functional-query/queries/QueryTest/nested-types-basic-partitioned.test
new file mode 100644
index 0000000..75779b6
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-types-basic-partitioned.test
@@ -0,0 +1,351 @@
+====
+---- QUERY
+select part, id from complextypes_partitioned
+order by part, id
+---- RESULTS
+1,1
+1,2
+1,3
+1,4
+1,5
+1,6
+1,7
+1,8
+2,1
+2,2
+2,3
+2,4
+2,5
+2,6
+2,7
+2,8
+---- TYPES
+int,bigint
+====
+---- QUERY
+select part, count(*) from complextypes_partitioned group by part
+order by part
+---- RESULTS
+1,8
+2,8
+---- TYPES
+int,bigint
+====
+---- QUERY
+select part, id from complextypes_partitioned where id > 3
+order by part, id
+---- RESULTS
+1,4
+1,5
+1,6
+1,7
+1,8
+2,4
+2,5
+2,6
+2,7
+2,8
+---- TYPES
+int,bigint
+====
+---- QUERY
+select part, item from complextypes_partitioned t, t.int_array
+order by part, item
+---- RESULTS
+1,-1
+1,1
+1,1
+1,2
+1,2
+1,3
+1,3
+1,NULL
+1,NULL
+1,NULL
+2,-1
+2,1
+2,1
+2,2
+2,2
+2,3
+2,3
+2,NULL
+2,NULL
+2,NULL
+---- TYPES
+int,int
+====
+---- QUERY
+select item from complextypes_partitioned.int_array
+---- RESULTS: VERIFY_IS_EQUAL_SORTED
+-1
+1
+2
+3
+NULL
+1
+2
+NULL
+3
+NULL
+-1
+1
+2
+3
+NULL
+1
+2
+NULL
+3
+NULL
+---- TYPES
+int
+====
+---- QUERY
+select count(*) from complextypes_partitioned.int_array
+---- RESULTS
+20
+---- TYPES
+bigint
+====
+---- QUERY
+select count(item) from complextypes_partitioned.int_array
+---- RESULTS
+14
+---- TYPES
+bigint
+====
+---- QUERY
+select item from complextypes_partitioned.int_array_array.item
+---- RESULTS: VERIFY_IS_EQUAL_SORTED
+-1
+-2
+1
+2
+3
+4
+NULL
+1
+2
+NULL
+3
+NULL
+4
+5
+6
+-1
+-2
+1
+2
+3
+4
+NULL
+1
+2
+NULL
+3
+NULL
+4
+5
+6
+---- TYPES
+int
+====
+---- QUERY
+select item from complextypes_partitioned.int_array_array.item where item > 3
+---- RESULTS: VERIFY_IS_EQUAL_SORTED
+4
+4
+5
+6
+4
+4
+5
+6
+---- TYPES
+int
+====
+---- QUERY
+select count(*) from complextypes_partitioned.int_array_array.item
+---- RESULTS
+30
+---- TYPES
+bigint
+====
+---- QUERY
+select count(item) from complextypes_partitioned.int_array_array.item
+---- RESULTS
+24
+---- TYPES
+bigint
+====
+---- QUERY
+select count(*) from complextypes_partitioned.int_array_array
+---- RESULTS
+22
+---- TYPES
+bigint
+====
+---- QUERY
+select nested_struct.a from complextypes_partitioned
+order by nested_struct.a
+---- RESULTS
+-1
+-1
+1
+1
+7
+7
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+---- TYPES
+int
+====
+---- QUERY
+select count(nested_struct.a) from complextypes_partitioned
+---- RESULTS
+6
+---- TYPES
+bigint
+====
+---- QUERY
+select item from complextypes_partitioned.nested_struct.b
+---- RESULTS
+-1
+1
+NULL
+2
+3
+NULL
+-1
+1
+NULL
+2
+3
+NULL
+---- TYPES
+int
+====
+---- QUERY
+select count(*) from complextypes_partitioned.nested_struct.b
+---- RESULTS
+12
+---- TYPES
+bigint
+====
+---- QUERY
+select count(item) from complextypes_partitioned.nested_struct.b
+---- RESULTS
+8
+---- TYPES
+bigint
+====
+---- QUERY
+select item from complextypes_partitioned.nested_struct.b where item is null;
+---- RESULTS
+NULL
+NULL
+NULL
+NULL
+---- TYPES
+int
+====
+---- QUERY
+select inner_array.item.e from complextypes_partitioned.nested_struct.c.d.item inner_array
+---- RESULTS: VERIFY_IS_EQUAL_SORTED
+-1
+10
+-10
+11
+NULL
+10
+NULL
+-10
+NULL
+11
+NULL
+NULL
+-1
+10
+-10
+11
+NULL
+10
+NULL
+-10
+NULL
+11
+NULL
+NULL
+---- TYPES
+int
+====
+---- QUERY
+select count(inner_array.item.e) from complextypes_partitioned.nested_struct.c.d.item inner_array
+---- RESULTS
+14
+---- TYPES
+bigint
+====
+---- QUERY
+select count(*) from complextypes_partitioned.nested_struct.c.d.item inner_array
+---- RESULTS
+24
+---- TYPES
+bigint
+====
+---- QUERY
+select count(*) from complextypes_partitioned.nested_struct.c.d.item inner_array
+where inner_array.item.f = 'bbb'
+---- RESULTS
+4
+---- TYPES
+bigint
+====
+---- QUERY
+select inner_array.item.e, inner_array.item.f
+from complextypes_partitioned t, t.nested_struct.c.d.item inner_array
+order by t.part, 1, 2
+---- RESULTS: VERIFY_IS_EQUAL_SORTED
+-1,'nonnullable'
+10,'aaa'
+-10,'bbb'
+11,'c'
+NULL,'NULL'
+10,'aaa'
+NULL,'NULL'
+-10,'bbb'
+NULL,'NULL'
+11,'c'
+NULL,'NULL'
+NULL,'NULL'
+-1,'nonnullable'
+10,'aaa'
+-10,'bbb'
+11,'c'
+NULL,'NULL'
+10,'aaa'
+NULL,'NULL'
+-10,'bbb'
+NULL,'NULL'
+11,'c'
+NULL,'NULL'
+NULL,'NULL'
+---- TYPES
+int,string
+====
+---- QUERY
+select count(*) from complextypes_partitioned.nested_struct.c.d
+---- RESULTS
+20
+---- TYPES
+bigint
+====

http://git-wip-us.apache.org/repos/asf/impala/blob/d3ff67b8/tests/query_test/test_nested_types.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_nested_types.py b/tests/query_test/test_nested_types.py
index 68021d5..85ed3a4 100644
--- a/tests/query_test/test_nested_types.py
+++ b/tests/query_test/test_nested_types.py
@@ -97,6 +97,35 @@ class TestNestedTypes(ImpalaTestSuite):
     self.client.execute("select s.UppercasenamE from %s" % table_name)
     self.client.execute("select s.* from %s" % table_name)
 
+  def test_partitioned_table(self, vector, unique_database):
+    """IMPALA-6370: Test that a partitioned table with nested types can be scanned."""
+    table = "complextypes_partitioned"
+    db_table = "{0}.{1}".format(unique_database, table)
+    self.client.execute("""
+        CREATE EXTERNAL TABLE {0} (
+          id BIGINT,
+          int_array ARRAY<INT>,
+          int_array_array ARRAY<ARRAY<INT>>,
+          int_map MAP<STRING,INT>,
+          int_map_array ARRAY<MAP<STRING,INT>>,
+          nested_struct STRUCT<
+              a:INT,
+              b:ARRAY<INT>,
+              c:STRUCT<d:ARRAY<ARRAY<STRUCT<e:INT,f:STRING>>>>,
+              g:MAP<STRING,STRUCT<h:STRUCT<i:ARRAY<DOUBLE>>>>>
+        )
+        PARTITIONED BY (
+          part int
+        )
+        STORED AS PARQUET""".format(db_table))
+    # Add multiple partitions pointing to the complextypes_tbl data.
+    for partition in [1, 2]:
+      self.client.execute("ALTER TABLE {0} ADD PARTITION(part={1}) LOCATION '{2}'".format(
+          db_table, partition,
+          self._get_table_location("functional_parquet.complextypestbl", vector)))
+    self.run_test_case('QueryTest/nested-types-basic-partitioned', vector,
+        unique_database)
+
 class TestParquetArrayEncodings(ImpalaTestSuite):
   TESTFILE_DIR = os.path.join(os.environ['IMPALA_HOME'],
                               "testdata/parquet_nested_types_encodings")


Mime
View raw message