superset-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From maximebeauche...@apache.org
Subject [incubator-superset] branch master updated: Fix regression around low row limit for CSV exports (#5866)
Date Wed, 19 Sep 2018 20:30:34 GMT
This is an automated email from the ASF dual-hosted git repository.

maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new b9e3159  Fix regression around low row limit for CSV exports (#5866)
b9e3159 is described below

commit b9e3159f7c7fc5631e3bf6afc025281e5d27205b
Author: Maxime Beauchemin <maximebeauchemin@gmail.com>
AuthorDate: Wed Sep 19 13:30:25 2018 -0700

    Fix regression around low row limit for CSV exports (#5866)
    
    * Fix regression around low row limit for CSV exports
    
    * fix tests
    
    * Still trying to fix tests
---
 superset/config.py            | 11 ++++---
 superset/views/core.py        | 15 +++++++--
 tests/base_tests.py           |  2 +-
 tests/celery_tests.py         | 74 +++++++++++++++++++------------------------
 tests/superset_test_config.py |  2 --
 5 files changed, 53 insertions(+), 51 deletions(-)

diff --git a/superset/config.py b/superset/config.py
index a5e4f29..80f6f85 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -275,8 +275,13 @@ BACKUP_COUNT = 30
 # Set this API key to enable Mapbox visualizations
 MAPBOX_API_KEY = os.environ.get('MAPBOX_API_KEY', '')
 
-# Maximum number of rows returned in the SQL editor
-SQL_MAX_ROW = 1000
+# Maximum number of rows returned from a database
+# in async mode, no more than SQL_MAX_ROW will be returned and stored
+# in the results backend. This also becomes the limit when exporting CSVs
+SQL_MAX_ROW = 100000
+
+# Limit to be returned to the frontend.
+DISPLAY_MAX_ROW = 1000
 
 # Maximum number of tables/views displayed in the dropdown window in SQL Lab.
 MAX_TABLE_NAMES = 3000
@@ -302,8 +307,6 @@ class CeleryConfig(object):
 CELERY_CONFIG = CeleryConfig
 """
 CELERY_CONFIG = None
-SQL_CELERY_DB_FILE_PATH = os.path.join(DATA_DIR, 'celerydb.sqlite')
-SQL_CELERY_RESULTS_DB_FILE_PATH = os.path.join(DATA_DIR, 'celery_results.sqlite')
 
 # static http headers to be served by your Superset server.
 # This header prevents iFrames from other domains and
diff --git a/superset/views/core.py b/superset/views/core.py
index e021709..7a6a058 100755
--- a/superset/views/core.py
+++ b/superset/views/core.py
@@ -2354,7 +2354,18 @@ class Superset(BaseSupersetView):
             return json_error_response(security_manager.get_table_access_error_msg(
                 '{}'.format(rejected_tables)), status=403)
 
-        return json_success(utils.zlib_decompress_to_string(blob))
+        payload = utils.zlib_decompress_to_string(blob)
+        display_limit = app.config.get('DISPLAY_MAX_ROW', None)
+        if display_limit:
+            payload_json = json.loads(payload)
+            payload_json['data'] = payload_json['data'][:display_limit]
+        return json_success(
+            json.dumps(
+                payload_json,
+                default=utils.json_iso_dttm_ser,
+                ignore_nan=True,
+            ),
+        )
 
     @has_access_api
     @expose('/stop_query/', methods=['POST'])
@@ -2407,7 +2418,7 @@ class Superset(BaseSupersetView):
                 tmp_table_name,
             )
 
-        client_id = request.form.get('client_id') or utils.shortid()
+        client_id = request.form.get('client_id') or utils.shortid()[:10]
 
         query = Query(
             database_id=int(database_id),
diff --git a/tests/base_tests.py b/tests/base_tests.py
index 051e7c4..62d3938 100644
--- a/tests/base_tests.py
+++ b/tests/base_tests.py
@@ -172,7 +172,7 @@ class SupersetTestCase(unittest.TestCase):
                     perm.view_menu and table.perm in perm.view_menu.name):
                 security_manager.del_permission_role(public_role, perm)
 
-    def run_sql(self, sql, client_id, user_name=None, raise_on_error=False):
+    def run_sql(self, sql, client_id=None, user_name=None, raise_on_error=False):
         if user_name:
             self.logout()
             self.login(username=(user_name if user_name else 'admin'))
diff --git a/tests/celery_tests.py b/tests/celery_tests.py
index 58302db..06b1003 100644
--- a/tests/celery_tests.py
+++ b/tests/celery_tests.py
@@ -6,7 +6,6 @@ from __future__ import print_function
 from __future__ import unicode_literals
 
 import json
-import os
 import subprocess
 import time
 import unittest
@@ -14,7 +13,7 @@ import unittest
 import pandas as pd
 from past.builtins import basestring
 
-from superset import app, cli, db, security_manager
+from superset import app, db
 from superset.models.helpers import QueryStatus
 from superset.models.sql_lab import Query
 from superset.sql_parse import SupersetQuery
@@ -23,13 +22,12 @@ from .base_tests import SupersetTestCase
 
 
 BASE_DIR = app.config.get('BASE_DIR')
+CELERY_SLEEP_TIME = 5
 
 
 class CeleryConfig(object):
-    BROKER_URL = 'sqla+sqlite:///' + app.config.get('SQL_CELERY_DB_FILE_PATH')
+    BROKER_URL = app.config.get('CELERY_RESULT_BACKEND')
     CELERY_IMPORTS = ('superset.sql_lab', )
-    CELERY_RESULT_BACKEND = (
-        'db+sqlite:///' + app.config.get('SQL_CELERY_RESULTS_DB_FILE_PATH'))
     CELERY_ANNOTATIONS = {'sql_lab.add': {'rate_limit': '10/s'}}
     CONCURRENCY = 1
 
@@ -89,29 +87,13 @@ class CeleryTestCase(SupersetTestCase):
 
     @classmethod
     def setUpClass(cls):
-        try:
-            os.remove(app.config.get('SQL_CELERY_DB_FILE_PATH'))
-        except OSError as e:
-            app.logger.warn(str(e))
-        try:
-            os.remove(app.config.get('SQL_CELERY_RESULTS_DB_FILE_PATH'))
-        except OSError as e:
-            app.logger.warn(str(e))
-
-        security_manager.sync_role_definitions()
-
-        worker_command = BASE_DIR + '/bin/superset worker'
+        db.session.query(Query).delete()
+        db.session.commit()
+
+        worker_command = BASE_DIR + '/bin/superset worker -w 2'
         subprocess.Popen(
             worker_command, shell=True, stdout=subprocess.PIPE)
 
-        admin = security_manager.find_user('admin')
-        if not admin:
-            security_manager.add_user(
-                'admin', 'admin', ' user', 'admin@fab.org',
-                security_manager.find_role('Admin'),
-                password='general')
-        cli.load_examples_run(load_test_data=True)
-
     @classmethod
     def tearDownClass(cls):
         subprocess.call(
@@ -123,7 +105,7 @@ class CeleryTestCase(SupersetTestCase):
             shell=True,
         )
 
-    def run_sql(self, db_id, sql, client_id, cta='false', tmp_table='tmp',
+    def run_sql(self, db_id, sql, client_id=None, cta='false', tmp_table='tmp',
                 async_='false'):
         self.login()
         resp = self.client.post(
@@ -151,11 +133,13 @@ class CeleryTestCase(SupersetTestCase):
         main_db = get_main_database(db.session)
         db_id = main_db.id
         eng = main_db.get_sqla_engine()
+        tmp_table_name = 'tmp_async_22'
+        self.drop_table_if_exists(tmp_table_name, main_db)
         perm_name = 'can_sql_json'
         sql_where = (
             "SELECT name FROM ab_permission WHERE name='{}'".format(perm_name))
         result2 = self.run_sql(
-            db_id, sql_where, '2', tmp_table='tmp_table_2', cta='true')
+            db_id, sql_where, '2', tmp_table=tmp_table_name, cta='true')
         self.assertEqual(QueryStatus.SUCCESS, result2['query']['state'])
         self.assertEqual([], result2['data'])
         self.assertEqual([], result2['columns'])
@@ -170,8 +154,7 @@ class CeleryTestCase(SupersetTestCase):
         main_db = get_main_database(db.session)
         db_id = main_db.id
         sql_empty_result = 'SELECT * FROM ab_user WHERE id=666'
-        result3 = self.run_sql(
-            db_id, sql_empty_result, '3', tmp_table='tmp_table_3', cta='true')
+        result3 = self.run_sql(db_id, sql_empty_result, '3')
         self.assertEqual(QueryStatus.SUCCESS, result3['query']['state'])
         self.assertEqual([], result3['data'])
         self.assertEqual([], result3['columns'])
@@ -179,22 +162,31 @@ class CeleryTestCase(SupersetTestCase):
         query3 = self.get_query_by_id(result3['query']['serverId'])
         self.assertEqual(QueryStatus.SUCCESS, query3.status)
 
+    def drop_table_if_exists(self, table_name, database=None):
+        """Drop table if it exists, works on any DB"""
+        sql = 'DROP TABLE {}'.format(table_name)
+        db_id = database.id
+        if database:
+            database.allow_dml = True
+            db.session.flush()
+        return self.run_sql(db_id, sql)
+
     def test_run_async_query(self):
         main_db = get_main_database(db.session)
-        eng = main_db.get_sqla_engine()
+        db_id = main_db.id
+
+        self.drop_table_if_exists('tmp_async_1', main_db)
+
         sql_where = "SELECT name FROM ab_role WHERE name='Admin'"
         result = self.run_sql(
-            main_db.id, sql_where, '4', async_='true', tmp_table='tmp_async_1',
+            db_id, sql_where, '4', async_='true', tmp_table='tmp_async_1',
             cta='true')
         assert result['query']['state'] in (
             QueryStatus.PENDING, QueryStatus.RUNNING, QueryStatus.SUCCESS)
 
-        time.sleep(1)
+        time.sleep(CELERY_SLEEP_TIME)
 
         query = self.get_query_by_id(result['query']['serverId'])
-        df = pd.read_sql_query(query.select_sql, con=eng)
-        self.assertEqual(QueryStatus.SUCCESS, query.status)
-        self.assertEqual([{'name': 'Admin'}], df.to_dict(orient='records'))
         self.assertEqual(QueryStatus.SUCCESS, query.status)
         self.assertTrue('FROM tmp_async_1' in query.select_sql)
         self.assertEqual(
@@ -202,27 +194,25 @@ class CeleryTestCase(SupersetTestCase):
             "WHERE name='Admin' LIMIT 666", query.executed_sql)
         self.assertEqual(sql_where, query.sql)
         self.assertEqual(0, query.rows)
-        self.assertEqual(666, query.limit)
         self.assertEqual(False, query.limit_used)
         self.assertEqual(True, query.select_as_cta)
         self.assertEqual(True, query.select_as_cta_used)
 
     def test_run_async_query_with_lower_limit(self):
         main_db = get_main_database(db.session)
-        eng = main_db.get_sqla_engine()
+        db_id = main_db.id
+        self.drop_table_if_exists('tmp_async_2', main_db)
+
         sql_where = "SELECT name FROM ab_role WHERE name='Alpha' LIMIT 1"
         result = self.run_sql(
-            main_db.id, sql_where, '5', async_='true', tmp_table='tmp_async_2',
+            db_id, sql_where, '5', async_='true', tmp_table='tmp_async_2',
             cta='true')
         assert result['query']['state'] in (
             QueryStatus.PENDING, QueryStatus.RUNNING, QueryStatus.SUCCESS)
 
-        time.sleep(1)
+        time.sleep(CELERY_SLEEP_TIME)
 
         query = self.get_query_by_id(result['query']['serverId'])
-        df = pd.read_sql_query(query.select_sql, con=eng)
-        self.assertEqual(QueryStatus.SUCCESS, query.status)
-        self.assertEqual([{'name': 'Alpha'}], df.to_dict(orient='records'))
         self.assertEqual(QueryStatus.SUCCESS, query.status)
         self.assertTrue('FROM tmp_async_2' in query.select_sql)
         self.assertEqual(
diff --git a/tests/superset_test_config.py b/tests/superset_test_config.py
index 3076a05..aacbd6a 100644
--- a/tests/superset_test_config.py
+++ b/tests/superset_test_config.py
@@ -12,7 +12,6 @@ SUPERSET_WEBSERVER_PORT = 8081
 if 'SUPERSET__SQLALCHEMY_DATABASE_URI' in os.environ:
     SQLALCHEMY_DATABASE_URI = os.environ.get('SUPERSET__SQLALCHEMY_DATABASE_URI')
 
-SQL_CELERY_RESULTS_DB_FILE_PATH = os.path.join(DATA_DIR, 'celery_results.sqlite')
 SQL_SELECT_AS_CTA = True
 SQL_MAX_ROW = 666
 
@@ -28,7 +27,6 @@ CACHE_CONFIG = {'CACHE_TYPE': 'simple'}
 class CeleryConfig(object):
     BROKER_URL = 'redis://localhost'
     CELERY_IMPORTS = ('superset.sql_lab', )
-    CELERY_RESULT_BACKEND = 'db+sqlite:///' + SQL_CELERY_RESULTS_DB_FILE_PATH
     CELERY_ANNOTATIONS = {'sql_lab.add': {'rate_limit': '10/s'}}
     CONCURRENCY = 1
 


Mime
View raw message