superset-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From maximebeauche...@apache.org
Subject [incubator-superset] branch master updated: [BUGFIX]: Check datatype of results before converting to DataFrame (#4108)
Date Wed, 24 Jan 2018 04:58:09 GMT
This is an automated email from the ASF dual-hosted git repository.

maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 4bc5fe5  [BUGFIX]: Check datatype of results before converting to DataFrame (#4108)
4bc5fe5 is described below

commit 4bc5fe549574a74b574a7ec101099b1455cd4a24
Author: Marcus Levine <marcusianl@gmail.com>
AuthorDate: Tue Jan 23 23:58:06 2018 -0500

    [BUGFIX]: Check datatype of results before converting to DataFrame (#4108)
    
    * conditional check on datatype of results before converting to df
    
    fix type checking
    
    fix conditional checks
    
    remove trailing whitespace and fix df_data fallback def
    
    actually remove trailing whitespace
    
    generalized type check to check all columns for dict
    
    refactor dict col check
    
    * move df conversion to helper and add unit test
    
    add missing newlines
    
    another missing newline
    
    fix quotes
    
    more quote fixes
---
 superset/sql_lab.py   | 27 ++++++++++++++++++++++-----
 tests/sqllab_tests.py | 17 +++++++++++++++++
 2 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/superset/sql_lab.py b/superset/sql_lab.py
index 63225f3..87a6b44 100644
--- a/superset/sql_lab.py
+++ b/superset/sql_lab.py
@@ -10,6 +10,7 @@ from time import sleep
 import uuid
 
 from celery.exceptions import SoftTimeLimitExceeded
+import numpy as np
 import pandas as pd
 import sqlalchemy
 from sqlalchemy.orm import sessionmaker
@@ -85,6 +86,26 @@ def get_session(nullpool):
     return session
 
 
+def convert_results_to_df(cursor_description, data):
+    """Convert raw query results to a DataFrame."""
+    column_names = (
+        [col[0] for col in cursor_description] if cursor_description else [])
+    column_names = dedup(column_names)
+
+    # check whether the result set has any nested dict columns
+    if data:
+        first_row = data[0]
+        has_dict_col = any([isinstance(c, dict) for c in first_row])
+        df_data = list(data) if has_dict_col else np.array(data)
+    else:
+        df_data = []
+
+    cdf = dataframe.SupersetDataFrame(
+        pd.DataFrame(df_data, columns=column_names))
+
+    return cdf
+
+
 @celery_app.task(bind=True, soft_time_limit=SQLLAB_TIMEOUT)
 def get_sql_results(
         ctask, query_id, return_results=True, store_results=False,
@@ -224,11 +245,7 @@ def execute_sql(
             },
             default=utils.json_iso_dttm_ser)
 
-    column_names = (
-        [col[0] for col in cursor_description] if cursor_description else [])
-    column_names = dedup(column_names)
-    cdf = dataframe.SupersetDataFrame(
-        pd.DataFrame(list(data), columns=column_names))
+    cdf = convert_results_to_df(cursor_description, data)
 
     query.rows = cdf.size
     query.progress = 100
diff --git a/tests/sqllab_tests.py b/tests/sqllab_tests.py
index 2caf4c2..53144ea 100644
--- a/tests/sqllab_tests.py
+++ b/tests/sqllab_tests.py
@@ -12,6 +12,7 @@ from flask_appbuilder.security.sqla import models as ab_models
 
 from superset import appbuilder, db, sm, utils
 from superset.models.sql_lab import Query
+from superset.sql_lab import convert_results_to_df
 from .base_tests import SupersetTestCase
 
 
@@ -200,6 +201,22 @@ class SqlLabTests(SupersetTestCase):
             user_name='admin',
             raise_on_error=True)
 
+    def test_df_conversion_no_dict(self):
+        cols = [['string_col'], ['int_col']]
+        data = [['a', 4]]
+        cdf = convert_results_to_df(cols, data)
+
+        self.assertEquals(len(data), cdf.size)
+        self.assertEquals(len(cols), len(cdf.columns))
+
+    def test_df_conversion_dict(self):
+        cols = [['string_col'], ['dict_col'], ['int_col']]
+        data = [['a', {'c1': 1, 'c2': 2, 'c3': 3}, 4]]
+        cdf = convert_results_to_df(cols, data)
+
+        self.assertEquals(len(data), cdf.size)
+        self.assertEquals(len(cols), len(cdf.columns))
+
 
 if __name__ == '__main__':
     unittest.main()

-- 
To stop receiving notification emails like this one, please contact
maximebeauchemin@apache.org.

Mime
View raw message