superset-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From johnbod...@apache.org
Subject [incubator-superset] branch master updated: [DB Engine] Support old and new Presto syntax (#7977)
Date Mon, 05 Aug 2019 18:57:07 GMT
This is an automated email from the ASF dual-hosted git repository.

johnbodley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new d58dbad  [DB Engine] Support old and new Presto syntax (#7977)
d58dbad is described below

commit d58dbad076f61cfd05fa8280ad13442d29360873
Author: Erik Ritter <erik.ritter@airbnb.com>
AuthorDate: Mon Aug 5 11:56:56 2019 -0700

    [DB Engine] Support old and new Presto syntax (#7977)
---
 docs/installation.rst               | 12 ++++++++++++
 superset/db_engine_specs/presto.py  | 27 +++++++++++++++++++++------
 superset/views/database/__init__.py |  4 +++-
 tests/db_engine_specs_test.py       |  2 ++
 4 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/docs/installation.rst b/docs/installation.rst
index 68bc5ce..919d6ed 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -647,6 +647,18 @@ Note that you can run the ``superset refresh_druid`` command to refresh
the
 metadata from your Druid cluster(s)
 
 
+Presto
+------
+
+By default Superset assumes the most recent version of Presto is being used when
+querying the datasource. If you're using an older version of presto, you can configure
+it in the ``extra`` parameter::
+
+    {
+        "version": "0.123"
+    }
+
+
 CORS
 ----
 
diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py
index 4f4b03e..6708b06 100644
--- a/superset/db_engine_specs/presto.py
+++ b/superset/db_engine_specs/presto.py
@@ -16,6 +16,7 @@
 # under the License.
 # pylint: disable=C,R,W
 from collections import OrderedDict
+from distutils.version import StrictVersion
 import logging
 import re
 import textwrap
@@ -797,7 +798,7 @@ class PrestoEngineSpec(BaseEngineSpec):
         full_table_name = table_name
         if schema_name and "." not in table_name:
             full_table_name = "{}.{}".format(schema_name, table_name)
-        pql = cls._partition_query(full_table_name)
+        pql = cls._partition_query(full_table_name, database)
         col_names, latest_parts = cls.latest_partition(
             table_name, schema_name, database, show_first=True
         )
@@ -872,7 +873,9 @@ class PrestoEngineSpec(BaseEngineSpec):
         return utils.error_msg_from_exception(e)
 
     @classmethod
-    def _partition_query(cls, table_name, limit=0, order_by=None, filters=None):
+    def _partition_query(
+        cls, table_name, database, limit=0, order_by=None, filters=None
+    ):
         """Returns a partition query
 
         :param table_name: the name of the table to get partitions from
@@ -900,10 +903,20 @@ class PrestoEngineSpec(BaseEngineSpec):
                 l.append(f"{field} = '{value}'")
             where_clause = "WHERE " + " AND ".join(l)
 
+        presto_version = database.get_extra().get("version")
+
+        # Partition select syntax changed in v0.199, so check here.
+        # Default to the new syntax if version is unset.
+        partition_select_clause = (
+            f'SELECT * FROM "{table_name}$partitions"'
+            if not presto_version
+            or StrictVersion(presto_version) >= StrictVersion("0.199")
+            else f"SHOW PARTITIONS FROM {table_name}"
+        )
+
         sql = textwrap.dedent(
             f"""\
-            SELECT * FROM "{table_name}$partitions"
-
+            {partition_select_clause}
             {where_clause}
             {order_by_clause}
             {limit_clause}
@@ -965,7 +978,7 @@ class PrestoEngineSpec(BaseEngineSpec):
             )
         column_names = indexes[0]["column_names"]
         part_fields = [(column_name, True) for column_name in column_names]
-        sql = cls._partition_query(table_name, 1, part_fields)
+        sql = cls._partition_query(table_name, database, 1, part_fields)
         df = database.get_df(sql, schema)
         return column_names, cls._latest_partition_from_df(df)
 
@@ -1012,7 +1025,9 @@ class PrestoEngineSpec(BaseEngineSpec):
             if field not in kwargs.keys():
                 field_to_return = field
 
-        sql = cls._partition_query(table_name, 1, [(field_to_return, True)], kwargs)
+        sql = cls._partition_query(
+            table_name, database, 1, [(field_to_return, True)], kwargs
+        )
         df = database.get_df(sql, schema)
         if df.empty:
             return ""
diff --git a/superset/views/database/__init__.py b/superset/views/database/__init__.py
index 15b0d52..9b7c8cf 100644
--- a/superset/views/database/__init__.py
+++ b/superset/views/database/__init__.py
@@ -143,7 +143,9 @@ class DatabaseMixin:  # noqa
             'Specify it as **"schemas_allowed_for_csv_upload": '
             '["public", "csv_upload"]**. '
             "If database flavor does not support schema or any schema is allowed "
-            "to be accessed, just leave the list empty",
+            "to be accessed, just leave the list empty"
+            "4. the ``version`` field is a string specifying the this db's version. "
+            "This should be used with Presto DBs so that the syntax is correct",
             True,
         ),
         "impersonate_user": _(
diff --git a/tests/db_engine_specs_test.py b/tests/db_engine_specs_test.py
index c9c398f..a70c8cd 100644
--- a/tests/db_engine_specs_test.py
+++ b/tests/db_engine_specs_test.py
@@ -766,6 +766,7 @@ class DbEngineSpecsTestCase(SupersetTestCase):
     def test_presto_extra_table_metadata(self):
         db = mock.Mock()
         db.get_indexes = mock.Mock(return_value=[{"column_names": ["ds", "hour"]}])
+        db.get_extra = mock.Mock(return_value={})
         df = pd.DataFrame({"ds": ["01-01-19"], "hour": [1]})
         db.get_df = mock.Mock(return_value=df)
         result = PrestoEngineSpec.extra_table_metadata(db, "test_table", "test_schema")
@@ -774,6 +775,7 @@ class DbEngineSpecsTestCase(SupersetTestCase):
     def test_presto_where_latest_partition(self):
         db = mock.Mock()
         db.get_indexes = mock.Mock(return_value=[{"column_names": ["ds", "hour"]}])
+        db.get_extra = mock.Mock(return_value={})
         df = pd.DataFrame({"ds": ["01-01-19"], "hour": [1]})
         db.get_df = mock.Mock(return_value=df)
         columns = [{"name": "ds"}, {"name": "hour"}]


Mime
View raw message