superset-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From maximebeauche...@apache.org
Subject [incubator-superset] branch master updated: Making thrift, pyhive and tableschema as extra_requires (#6696)
Date Sat, 19 Jan 2019 22:27:24 GMT
This is an automated email from the ASF dual-hosted git repository.

maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new f742b98  Making thrift, pyhive and tableschema as extra_requires (#6696)
f742b98 is described below

commit f742b9876bc2787b7b2228fa3ce9a515da92f275
Author: Maxime Beauchemin <maximebeauchemin@gmail.com>
AuthorDate: Sat Jan 19 14:27:18 2019 -0800

    Making thrift, pyhive and tableschema as extra_requires (#6696)
    
    * Making thrift, pyhive and tableschema as extra_requires
    
    Looking at the dependency tree for license related questions, I noticed
    that tableschema had a huge tree, and only people running Hive really
    need it. Making this as well as pyhive and thrift optional.
    
    Also bumping some python dependencies
    
    * Run pip-compile
    
    * Removing refs to past.builtins (from future lib)
    
    * Add thrift
---
 UPDATING.md                        |  6 ++++++
 requirements-dev.txt               |  2 ++
 requirements.txt                   | 34 +++-------------------------------
 setup.py                           | 14 ++++++++------
 superset/connectors/base/models.py |  3 +--
 superset/connectors/sqla/views.py  |  3 +--
 superset/dataframe.py              |  5 ++---
 superset/db_engine_specs.py        |  8 ++++----
 superset/db_engines/hive.py        |  9 +++++----
 superset/utils/core.py             | 10 ++++------
 superset/viz.py                    |  5 ++---
 tests/celery_tests.py              |  4 +---
 12 files changed, 39 insertions(+), 64 deletions(-)

diff --git a/UPDATING.md b/UPDATING.md
index 97ed710..03ae1da 100644
--- a/UPDATING.md
+++ b/UPDATING.md
@@ -3,6 +3,12 @@
 This file documents any backwards-incompatible changes in Superset and
 assists people when migrating to a new version.
 
+## Superset 0.32.0
+* If you use `Hive` or `Presto`, we've moved some dependencies that were
+  in the main package as optional now. To get these packages,
+  run `pip install superset[presto]` and/or `pip install superset[hive]` as
+  required.
+
 ## Superset 0.31.0
 * boto3 / botocore was removed from the dependency list. If you use s3
 as a place to store your SQL Lab result set or Hive uploads, you may
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 0ac1c57..dc564fb 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -9,8 +9,10 @@ mysqlclient==1.3.13
 pip-tools==3.1.0
 psycopg2-binary==2.7.5
 pycodestyle==2.4.0
+pyhive==0.6.1
 pylint==1.9.2
 python-dotenv==0.10.1
 redis==2.10.6
 statsd==3.3.0
+thrift==0.11.0
 tox==3.5.3
diff --git a/requirements.txt b/requirements.txt
index c503e27..9767f28 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,11 +7,9 @@
 alembic==1.0.0            # via flask-migrate
 amqp==2.3.2               # via kombu
 asn1crypto==0.24.0        # via cryptography
-babel==2.6.0              # via flask-babel, flower
+babel==2.6.0              # via flask-babel
 billiard==3.5.0.4         # via celery
 bleach==3.0.2
-cachetools==3.0.0         # via google-auth
-cchardet==1.0.0           # via tabulator
 celery==4.2.0
 certifi==2018.8.24        # via requests
 cffi==1.11.5              # via cryptography
@@ -23,7 +21,6 @@ croniter==0.3.26
 cryptography==2.4.2
 decorator==4.3.0          # via retry
 defusedxml==0.5.0         # via python3-openid
-et-xmlfile==1.0.1         # via openpyxl
 flask-appbuilder==1.12.1
 flask-babel==0.11.1       # via flask-appbuilder
 flask-caching==1.4.0
@@ -34,67 +31,42 @@ flask-openid==1.2.5       # via flask-appbuilder
 flask-sqlalchemy==2.3.2   # via flask-appbuilder, flask-migrate
 flask-wtf==0.14.2
 flask==1.0.2
-flower==0.9.2
-future==0.16.0            # via pyhive
 geopy==1.11.0
-google-auth==1.6.1        # via gsheetsdb
-gsheetsdb==0.1.9
 gunicorn==19.8.0
 humanize==0.5.1
 idna==2.6
-ijson==2.3                # via tabulator
 isodate==0.6.0
 itsdangerous==0.24        # via flask
-jdcal==1.4                # via openpyxl
 jinja2==2.10              # via flask, flask-babel
-jsonlines==1.2.0          # via tabulator
-jsonschema==2.6.0         # via tableschema
 kombu==4.2.1              # via celery
-linear-tsv==1.1.0         # via tabulator
 mako==1.0.7               # via alembic
 markdown==3.0
 markupsafe==1.0           # via jinja2, mako
-mo-future==2.20.18317     # via moz-sql-parser
-moz-sql-parser==2.19.18318  # via gsheetsdb
 numpy==1.15.2             # via pandas
-openpyxl==2.4.11          # via tabulator
 pandas==0.23.1
 parsedatetime==2.0.0
 pathlib2==2.3.0
 polyline==1.3.2
 py==1.7.0                 # via retry
-pyasn1-modules==0.2.2     # via google-auth
-pyasn1==0.4.4             # via pyasn1-modules, rsa
 pycparser==2.19           # via cffi
 pydruid==0.5.0
-pyhive==0.5.1
-pyparsing==2.3.0          # via moz-sql-parser
 python-dateutil==2.6.1
 python-editor==1.0.3      # via alembic
 python-geohash==0.8.5
 python3-openid==3.1.0     # via flask-openid
-pytz==2018.5              # via babel, celery, flower, pandas
+pytz==2018.5              # via babel, celery, pandas
 pyyaml==3.13
 requests==2.20.0
 retry==0.9.2
-rfc3986==1.1.0            # via tableschema
-rsa==4.0                  # via google-auth
-sasl==0.2.1               # via thrift-sasl
 selenium==3.141.0
 simplejson==3.15.0
-six==1.11.0               # via bleach, cryptography, google-auth, gsheetsdb, isodate, jsonlines,
linear-tsv, pathlib2, polyline, pydruid, python-dateutil, sasl, sqlalchemy-utils, tableschema,
tabulator, thrift
+six==1.11.0               # via bleach, cryptography, isodate, pathlib2, polyline, pydruid,
python-dateutil, sqlalchemy-utils
 sqlalchemy-utils==0.32.21
 sqlalchemy==1.2.2
 sqlparse==0.2.4
-tableschema==1.1.0
-tabulator==1.15.0         # via tableschema
-thrift-sasl==0.3.0
-thrift==0.11.0
-tornado==5.1.1            # via flower
 unicodecsv==0.14.1
 urllib3==1.22             # via requests, selenium
 vine==1.1.4               # via amqp
 webencodings==0.5.1       # via bleach
 werkzeug==0.14.1          # via flask
 wtforms==2.2.1            # via flask-wtf
-xlrd==1.1.0               # via tabulator
diff --git a/setup.py b/setup.py
index 4b05139..01e4d3d 100644
--- a/setup.py
+++ b/setup.py
@@ -82,9 +82,7 @@ setup(
         'flask-compress',
         'flask-migrate',
         'flask-wtf',
-        'flower',  # deprecated
         'geopy',
-        'gsheetsdb>=0.1.9',
         'gunicorn',  # deprecated
         'humanize',
         'idna',
@@ -95,7 +93,6 @@ setup(
         'pathlib2',
         'polyline',
         'pydruid>=0.4.3',
-        'pyhive>=0.4.0',
         'python-dateutil',
         'python-geohash',
         'pyyaml>=3.13',
@@ -106,14 +103,19 @@ setup(
         'sqlalchemy',
         'sqlalchemy-utils',
         'sqlparse',
-        'tableschema',
-        'thrift>=0.9.3',
-        'thrift-sasl>=0.2.1',
         'unicodecsv',
     ],
     extras_require={
         'cors': ['flask-cors>=2.0.0'],
         'console_log': ['console_log==0.2.10'],
+        'hive': [
+            'pyhive>=0.4.0',
+            'tableschema',
+            'thrift-sasl>=0.2.1',
+            'thrift>=0.9.3',
+        ],
+        'presto': ['pyhive>=0.4.0'],
+        'gsheets': ['gsheetsdb>=0.1.9'],
     },
     author='Apache Software Foundation',
     author_email='dev@superset.incubator.apache.org',
diff --git a/superset/connectors/base/models.py b/superset/connectors/base/models.py
index 50ef6d8..39cc585 100644
--- a/superset/connectors/base/models.py
+++ b/superset/connectors/base/models.py
@@ -17,7 +17,6 @@
 # pylint: disable=C,R,W
 import json
 
-from past.builtins import basestring
 from sqlalchemy import (
     and_, Boolean, Column, Integer, String, Text,
 )
@@ -218,7 +217,7 @@ class BaseDatasource(AuditMixinNullable, ImportMixin):
             values, target_column_is_numeric=False, is_list_target=False):
         def handle_single_value(v):
             # backward compatibility with previous <select> components
-            if isinstance(v, basestring):
+            if isinstance(v, str):
                 v = v.strip('\t\n \'"')
                 if target_column_is_numeric:
                     # For backwards compatibility and edge cases
diff --git a/superset/connectors/sqla/views.py b/superset/connectors/sqla/views.py
index 2a14fa3..212d551 100644
--- a/superset/connectors/sqla/views.py
+++ b/superset/connectors/sqla/views.py
@@ -23,7 +23,6 @@ from flask_appbuilder.models.sqla.interface import SQLAInterface
 from flask_appbuilder.security.decorators import has_access
 from flask_babel import gettext as __
 from flask_babel import lazy_gettext as _
-from past.builtins import basestring
 
 from superset import appbuilder, db, security_manager
 from superset.connectors.base.views import DatasourceModelView
@@ -301,7 +300,7 @@ class TableModelView(DatasourceModelView, DeleteMixin, YamlExportMixin):
 # noqa
     def edit(self, pk):
         """Simple hack to redirect to explore view after saving"""
         resp = super(TableModelView, self).edit(pk)
-        if isinstance(resp, basestring):
+        if isinstance(resp, str):
             return resp
         return redirect('/superset/explore/table/{}/'.format(pk))
 
diff --git a/superset/dataframe.py b/superset/dataframe.py
index 69fcc53..5cd8ba9 100644
--- a/superset/dataframe.py
+++ b/superset/dataframe.py
@@ -29,7 +29,6 @@ import numpy as np
 import pandas as pd
 from pandas.core.common import _maybe_box_datetimelike
 from pandas.core.dtypes.dtypes import ExtensionDtype
-from past.builtins import basestring
 
 from superset.utils.core import JS_MAX_INTEGER
 
@@ -144,7 +143,7 @@ class SupersetDataFrame(object):
     def is_date(np_dtype, db_type_str):
 
         def looks_daty(s):
-            if isinstance(s, basestring):
+            if isinstance(s, str):
                 return any([s.lower().startswith(ss) for ss in ('time', 'date')])
             return False
 
@@ -203,7 +202,7 @@ class SupersetDataFrame(object):
 
             if not db_type_str or db_type_str.upper() == 'OBJECT':
                 v = sample[col].iloc[0] if not sample[col].empty else None
-                if isinstance(v, basestring):
+                if isinstance(v, str):
                     column['type'] = 'STRING'
                 elif isinstance(v, int):
                     column['type'] = 'INT'
diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py
index 00643c0..f724781 100644
--- a/superset/db_engine_specs.py
+++ b/superset/db_engine_specs.py
@@ -40,7 +40,6 @@ import time
 from flask import g
 from flask_babel import lazy_gettext as _
 import pandas
-from past.builtins import basestring
 import sqlalchemy as sqla
 from sqlalchemy import Column, select
 from sqlalchemy.engine import create_engine
@@ -48,7 +47,6 @@ from sqlalchemy.engine.url import make_url
 from sqlalchemy.sql import quoted_name, text
 from sqlalchemy.sql.expression import TextAsFrom
 import sqlparse
-from tableschema import Table
 from werkzeug.utils import secure_filename
 
 from superset import app, conf, db, sql_parse
@@ -143,7 +141,7 @@ class BaseEngineSpec(object):
 
     @classmethod
     def get_datatype(cls, type_code):
-        if isinstance(type_code, basestring) and len(type_code):
+        if isinstance(type_code, str) and len(type_code):
             return type_code.upper()
 
     @classmethod
@@ -709,7 +707,7 @@ class MySQLEngineSpec(BaseEngineSpec):
         datatype = type_code
         if isinstance(type_code, int):
             datatype = cls.type_code_map.get(type_code)
-        if datatype and isinstance(datatype, basestring) and len(datatype):
+        if datatype and isinstance(datatype, str) and len(datatype):
             return datatype
 
     @classmethod
@@ -1123,6 +1121,8 @@ class HiveEngineSpec(PrestoEngineSpec):
         upload_path = config['UPLOAD_FOLDER'] + \
             secure_filename(filename)
 
+        # Optional dependency
+        from tableschema import Table  # pylint: disable=import-error
         hive_table_schema = Table(upload_path).infer()
         column_name_and_type = []
         for column_info in hive_table_schema['fields']:
diff --git a/superset/db_engines/hive.py b/superset/db_engines/hive.py
index f0f8995..6334257 100644
--- a/superset/db_engines/hive.py
+++ b/superset/db_engines/hive.py
@@ -15,14 +15,11 @@
 # specific language governing permissions and limitations
 # under the License.
 # pylint: disable=C,R,W
-from pyhive import hive  # pylint: disable=no-name-in-module
-from TCLIService import ttypes
-from thrift import Thrift
 
 
 # TODO: contribute back to pyhive.
 def fetch_logs(self, max_rows=1024,
-               orientation=ttypes.TFetchOrientation.FETCH_NEXT):
+               orientation=None):
     """Mocked. Retrieve the logs produced by the execution of the query.
     Can be called multiple times to fetch the logs produced after
     the previous call.
@@ -31,6 +28,10 @@ def fetch_logs(self, max_rows=1024,
     .. note::
         This is not a part of DB-API.
     """
+    from pyhive import hive
+    from TCLIService import ttypes
+    from thrift import Thrift
+    orientation = orientation or ttypes.TFetchOrientation.FETCH_NEXT
     try:
         req = ttypes.TGetLogReq(operationHandle=self._operationHandle)
         logs = self._connection.client.GetLog(req).log
diff --git a/superset/utils/core.py b/superset/utils/core.py
index 2fb3bd6..3e38ea5 100644
--- a/superset/utils/core.py
+++ b/superset/utils/core.py
@@ -16,7 +16,6 @@
 # under the License.
 # pylint: disable=C,R,W
 """Utility functions used across Superset"""
-from builtins import object
 from datetime import date, datetime, time, timedelta
 import decimal
 from email.mime.application import MIMEApplication
@@ -48,7 +47,6 @@ import markdown as md
 import numpy
 import pandas as pd
 import parsedatetime
-from past.builtins import basestring
 from pydruid.utils.having import Having
 import sqlalchemy as sa
 from sqlalchemy import event, exc, select, Text
@@ -88,7 +86,7 @@ def flasher(msg, severity=None):
             logging.info(msg)
 
 
-class _memoized(object):  # noqa
+class _memoized:  # noqa
     """Decorator that caches a function's return value each time it is called
 
     If called later with the same arguments, the cached value is returned, and
@@ -503,7 +501,7 @@ def table_has_constraint(table, name, db):
     return False
 
 
-class timeout(object):
+class timeout:
     """
     To be used in a ``with`` block and timeout its content.
     """
@@ -569,7 +567,7 @@ def pessimistic_connection_handling(some_engine):
             connection.should_close_with_result = save_should_close_with_result
 
 
-class QueryStatus(object):
+class QueryStatus:
     """Enum-type class for query statuses"""
 
     STOPPED = 'stopped'
@@ -678,7 +676,7 @@ def send_MIME_email(e_from, e_to, mime_msg, config, dryrun=False):
 
 
 def get_email_address_list(address_string):
-    if isinstance(address_string, basestring):
+    if isinstance(address_string, str):
         if ',' in address_string:
             address_string = address_string.split(',')
         elif '\n' in address_string:
diff --git a/superset/viz.py b/superset/viz.py
index 3bdeb79..a548bc0 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -43,7 +43,6 @@ from markdown import markdown
 import numpy as np
 import pandas as pd
 from pandas.tseries.frequencies import to_offset
-from past.builtins import basestring
 import polyline
 import simplejson as json
 
@@ -1612,8 +1611,8 @@ class SankeyViz(BaseViz):
 
     def get_data(self, df):
         df.columns = ['source', 'target', 'value']
-        df['source'] = df['source'].astype(basestring)
-        df['target'] = df['target'].astype(basestring)
+        df['source'] = df['source'].astype(str)
+        df['target'] = df['target'].astype(str)
         recs = df.to_dict(orient='records')
 
         hierarchy = defaultdict(set)
diff --git a/tests/celery_tests.py b/tests/celery_tests.py
index dbbdf08..80b4101 100644
--- a/tests/celery_tests.py
+++ b/tests/celery_tests.py
@@ -20,8 +20,6 @@ import subprocess
 import time
 import unittest
 
-from past.builtins import basestring
-
 from superset import app, db
 from superset.models.helpers import QueryStatus
 from superset.models.sql_lab import Query
@@ -239,7 +237,7 @@ class CeleryTestCase(SupersetTestCase):
     @staticmethod
     def de_unicode_dict(d):
         def str_if_basestring(o):
-            if isinstance(o, basestring):
+            if isinstance(o, str):
                 return str(o)
             return o
         return {str_if_basestring(k): str_if_basestring(d[k]) for k in d}


Mime
View raw message