impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sail...@apache.org
Subject [7/9] incubator-impala git commit: IMPALA-4372: 'Describe formatted' returns types in upper case
Date Tue, 15 Nov 2016 18:50:35 GMT
IMPALA-4372: 'Describe formatted' returns types in upper case

A recent change caused 'describe formatted' to display the types
in all upper case, but we want 'describe formatted' to match Hive's
'describe' output, which displays the types in lower case.

This patch also fixes several problems with test_describe_formatted,
which was encountering an error but reporting success.

Change-Id: I274b97d4d1247244247fb38a5ca7f4c10bba8d22
Reviewed-on: http://gerrit.cloudera.org:8080/4861
Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/e6e2baea
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/e6e2baea
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/e6e2baea

Branch: refs/heads/master
Commit: e6e2baea33eca0fbad9958c7c2f6087ee1019c46
Parents: 0ea4a66
Author: Thomas Tauber-Marshall <tmarshall@cloudera.com>
Authored: Wed Oct 26 19:41:19 2016 -0700
Committer: Internal Jenkins <cloudera-hudson@gerrit.cloudera.org>
Committed: Tue Nov 15 05:38:12 2016 +0000

----------------------------------------------------------------------
 .../java/org/apache/impala/catalog/Column.java  |  2 +-
 .../queries/QueryTest/avro-schema-changes.test  |  6 +--
 tests/common/impala_test_suite.py               |  9 ++--
 .../metadata/test_metadata_query_statements.py  | 47 +++++++++++++++-----
 tests/performance/query_exec_functions.py       | 10 ++---
 tests/performance/query_executor.py             |  1 +
 6 files changed, 49 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e6e2baea/fe/src/main/java/org/apache/impala/catalog/Column.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/Column.java b/fe/src/main/java/org/apache/impala/catalog/Column.java
index ab064e7..91928aa 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Column.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Column.java
@@ -124,7 +124,7 @@ public class Column {
     return Lists.transform(columns, new Function<Column, FieldSchema>() {
       public FieldSchema apply(Column column) {
         Preconditions.checkNotNull(column.getType());
-        return new FieldSchema(column.getName(), column.getType().toSql(),
+        return new FieldSchema(column.getName(), column.getType().toSql().toLowerCase(),
             column.getComment());
       }
     });

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e6e2baea/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
b/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
index 1e3eac9..8233a02 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/avro-schema-changes.test
@@ -101,8 +101,8 @@ DESCRIBE FORMATTED avro_alter_schema_add_new_column;
 ---- TYPES
 string,string,string
 ---- RESULTS: VERIFY_IS_SUBSET
-'old_col','STRING','from deserializer'
-'new_col','STRING','from deserializer'
+'old_col','string','from deserializer'
+'new_col','string','from deserializer'
 ====
 ---- QUERY
 # IMPALA-3776: Create an Avro table, remove a column from the Avro schema and make sure
@@ -134,5 +134,5 @@ DESCRIBE FORMATTED avro_alter_schema_remove_column;
 ---- TYPES
 string,string,string
 ---- RESULTS: VERIFY_IS_SUBSET
-'col1','STRING','from deserializer'
+'col1','string','from deserializer'
 ====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e6e2baea/tests/common/impala_test_suite.py
----------------------------------------------------------------------
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index afa8e2c..e287360 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -467,24 +467,23 @@ class ImpalaTestSuite(BaseTestSuite):
     assert len(result.data) <= 1, 'Multiple values returned from scalar'
     return result.data[0] if len(result.data) == 1 else None
 
-  def exec_and_compare_hive_and_impala_hs2(self, stmt):
+  def exec_and_compare_hive_and_impala_hs2(self, stmt, compare = lambda x, y: x == y):
     """Compare Hive and Impala results when executing the same statment over HS2"""
     # execute_using_jdbc expects a Query object. Convert the query string into a Query
     # object
     query = Query()
     query.query_str = stmt
     # Run the statement targeting Hive
-    exec_opts = JdbcQueryExecConfig(impalad=HIVE_HS2_HOST_PORT)
+    exec_opts = JdbcQueryExecConfig(impalad=HIVE_HS2_HOST_PORT, transport='SASL')
     hive_results = execute_using_jdbc(query, exec_opts).data
 
     # Run the statement targeting Impala
-    exec_opts = JdbcQueryExecConfig(impalad=IMPALAD_HS2_HOST_PORT)
+    exec_opts = JdbcQueryExecConfig(impalad=IMPALAD_HS2_HOST_PORT, transport='NOSASL')
     impala_results = execute_using_jdbc(query, exec_opts).data
 
     # Compare the results
     assert (impala_results is not None) and (hive_results is not None)
-    for impala, hive in zip(impala_results, hive_results):
-      assert impala == hive
+    assert compare(impala_results, hive_results)
 
   def load_query_test_file(self, workload, file_name, valid_section_names=None,
       encoding=None):

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e6e2baea/tests/metadata/test_metadata_query_statements.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_metadata_query_statements.py b/tests/metadata/test_metadata_query_statements.py
index 0a51acb..8e25a2a 100644
--- a/tests/metadata/test_metadata_query_statements.py
+++ b/tests/metadata/test_metadata_query_statements.py
@@ -18,6 +18,7 @@
 # Impala tests for queries that query metadata and set session settings
 
 import pytest
+import re
 
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.impala_test_suite import ImpalaTestSuite
@@ -75,13 +76,36 @@ class TestMetadataQueryStatements(ImpalaTestSuite):
   @SkipIfS3.hive
   @SkipIfLocal.hive
   def test_describe_formatted(self, vector, unique_database):
+    # For describe formmated, we try to match Hive's output as closely as possible.
+    # However, we're inconsistent with our handling of NULLs vs theirs - Impala sometimes
+    # specifies 'NULL' where Hive uses an empty string, and Hive somtimes specifies 'null'
+    # with padding where Impala uses a sequence of blank spaces - and for now
+    # we want to leave it that way to not affect users who rely on this output.
+    def compare_describe_formatted(impala_results, hive_results):
+      for impala, hive in zip(re.split(',|\n', impala_results),
+          re.split(',|\n', hive_results)):
+
+        if impala != hive:
+          # If they don't match, check if it's because of the inconsistent null handling.
+          impala = impala.replace(' ', '').lower()
+          hive = hive.replace(' ', '').lower()
+          if not ((impala == "'null'" and hive ==  "''") or
+              (impala == "''" and hive == "'null'")):
+            return False
+      return True
+
     # Describe a partitioned table.
-    self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.alltypes")
+    self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.alltypes",
+        compare=compare_describe_formatted)
     self.exec_and_compare_hive_and_impala_hs2(
-        "describe formatted functional_text_lzo.alltypes")
+        "describe formatted functional_text_lzo.alltypes",
+        compare=compare_describe_formatted)
+
     # Describe an unpartitioned table.
-    self.exec_and_compare_hive_and_impala_hs2("describe formatted tpch.lineitem")
-    self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.jointbl")
+    self.exec_and_compare_hive_and_impala_hs2("describe formatted tpch.lineitem",
+        compare=compare_describe_formatted)
+    self.exec_and_compare_hive_and_impala_hs2("describe formatted functional.jointbl",
+        compare=compare_describe_formatted)
 
     # Create and describe an unpartitioned and partitioned Avro table created
     # by Impala without any column definitions.
@@ -91,20 +115,19 @@ class TestMetadataQueryStatements(ImpalaTestSuite):
     self.client.execute((
         "create table %s.%s with serdeproperties ('avro.schema.url'='%s') stored as avro"
         % (unique_database, "avro_alltypes_nopart", self.AVRO_SCHEMA_LOC)))
-    self.exec_and_compare_hive_and_impala_hs2("describe formatted avro_alltypes_nopart")
+    self.exec_and_compare_hive_and_impala_hs2("describe formatted avro_alltypes_nopart",
+        compare=compare_describe_formatted)
 
     self.client.execute((
         "create table %s.%s partitioned by (year int, month int) "
         "with serdeproperties ('avro.schema.url'='%s') stored as avro"
         % (unique_database, "avro_alltypes_part", self.AVRO_SCHEMA_LOC)))
-    self.exec_and_compare_hive_and_impala_hs2("describe formatted avro_alltypes_part")
+    self.exec_and_compare_hive_and_impala_hs2("describe formatted avro_alltypes_part",
+        compare=compare_describe_formatted)
 
-    try:
-      # Describe a view
-      self.exec_and_compare_hive_and_impala_hs2(\
-          "describe formatted functional.alltypes_view_sub")
-    except AssertionError:
-      pytest.xfail("Investigate minor difference in displaying null vs empty values")
+    self.exec_and_compare_hive_and_impala_hs2(\
+        "describe formatted functional.alltypes_view_sub",
+        compare=compare_describe_formatted)
 
   @pytest.mark.execute_serially # due to data src setup/teardown
   def test_show_data_sources(self, vector):

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e6e2baea/tests/performance/query_exec_functions.py
----------------------------------------------------------------------
diff --git a/tests/performance/query_exec_functions.py b/tests/performance/query_exec_functions.py
index 3abcae2..f7d00ec 100644
--- a/tests/performance/query_exec_functions.py
+++ b/tests/performance/query_exec_functions.py
@@ -245,7 +245,7 @@ def execute_using_jdbc(query, query_config):
   cmd = query_config.jdbc_client_cmd + " -q \"%s\"" % query_string
   return run_query_capture_results(cmd, query, exit_on_error=False)
 
-def parse_jdbc_query_results(stdout, stderr):
+def parse_jdbc_query_results(stdout, stderr, query):
   """
   Parse query execution results for the Impala JDBC client
 
@@ -260,10 +260,10 @@ def parse_jdbc_query_results(stdout, stderr):
       time_taken = float(('%s.%s') % (match.group(1), match.group(2)))
       break
   result_data = re.findall(r'\[START\]----\n(.*?)\n----\[END\]', stdout, re.DOTALL)[0]
-  return create_exec_result(time_taken, result_data)
+  return create_exec_result(time_taken, result_data, query)
 
-def create_exec_result(time_taken, result_data):
-  exec_result = HiveQueryResult()
+def create_exec_result(time_taken, result_data, query):
+  exec_result = HiveQueryResult(query)
   if result_data:
     LOG.debug('Data:\n%s\n' % result_data)
     exec_result.data = result_data
@@ -296,7 +296,7 @@ def run_query_capture_results(cmd, query, exit_on_error):
     exec_result.query_error = msg
     return exec_result
   # The command completed
-  exec_result = parse_jdbc_query_results(stdout, stderr)
+  exec_result = parse_jdbc_query_results(stdout, stderr, query)
   exec_result.query = query
   exec_result.start_time = start_time
   return exec_result

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/e6e2baea/tests/performance/query_executor.py
----------------------------------------------------------------------
diff --git a/tests/performance/query_executor.py b/tests/performance/query_executor.py
index 56065b8..5d9fc61 100644
--- a/tests/performance/query_executor.py
+++ b/tests/performance/query_executor.py
@@ -91,6 +91,7 @@ class JdbcQueryExecConfig(ImpalaQueryExecConfig):
 
     Constructed on the fly, since the impalad it points to can change.
     """
+    assert self.transport is not None
     return JdbcQueryExecConfig.JDBC_CLIENT_PATH + ' -i "%s" -t %s' % (self._impalad,
                                                                       self.transport)
 


Mime
View raw message