superset-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From maximebeauche...@apache.org
Subject [incubator-superset] branch master updated: [bugfix] convert metrics to numeric in dataframe (#4726)
Date Tue, 03 Apr 2018 04:48:16 GMT
This is an automated email from the ASF dual-hosted git repository.

maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new f6fe11f  [bugfix] convert metrics to numeric in dataframe (#4726)
f6fe11f is described below

commit f6fe11f76e0c890184ac37b25e1a4baf30bc94d1
Author: Maxime Beauchemin <maximebeauchemin@gmail.com>
AuthorDate: Mon Apr 2 21:48:14 2018 -0700

    [bugfix] convert metrics to numeric in dataframe (#4726)
    
    * [bugfix] convert metrics to numeric in dataframe
    
    It appears sometimes the dbapi driver and pandas's read_sql fail at
    returning the proper numeric types for metrics and they show up as
    `object` in the dataframe. This results in "No numeric types to
    aggregate" errors when trying to perform aggregations or pivoting in
    pandas.
    
    This PR looks for metrics in dataframes that are typed as "object"
    and uses pandas' to_numeric to convert.
    
    * Fix tests
    
    * Remove all iteritems
---
 superset/models/core.py |  2 +-
 superset/viz.py         | 15 ++++++++++++---
 tests/viz_tests.py      |  2 ++
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/superset/models/core.py b/superset/models/core.py
index c32ac5c..6eef48c 100644
--- a/superset/models/core.py
+++ b/superset/models/core.py
@@ -702,7 +702,7 @@ class Database(Model, AuditMixinNullable, ImportMixin):
                 return True
             return False
 
-        for k, v in df.dtypes.iteritems():
+        for k, v in df.dtypes.items():
             if v.type == numpy.object_ and needs_conversion(df[k]):
                 df[k] = df[k].apply(utils.json_dumps_w_dates)
         return df
diff --git a/superset/viz.py b/superset/viz.py
index 5a92cd3..cb20e11 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -170,11 +170,21 @@ class BaseViz(object):
                 if self.datasource.offset:
                     df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset)
                 df[DTTM_ALIAS] += self.time_shift
+
+            self.df_metrics_to_num(df, query_obj.get('metrics') or [])
+
             df.replace([np.inf, -np.inf], np.nan)
             fillna = self.get_fillna_for_columns(df.columns)
             df = df.fillna(fillna)
         return df
 
+    @staticmethod
+    def df_metrics_to_num(df, metrics):
+        """Converting metrics to numeric when pandas.read_sql cannot"""
+        for col, dtype in df.dtypes.items():
+            if dtype.type == np.object_ and col in metrics:
+                df[col] = pd.to_numeric(df[col])
+
     def query_obj(self):
         """Building a query object"""
         form_data = self.form_data
@@ -1060,7 +1070,6 @@ class NVD3TimeSeriesViz(NVD3Viz):
         df = df.fillna(0)
         if fd.get('granularity') == 'all':
             raise Exception(_('Pick a time granularity for your time series'))
-
         if not aggregate:
             df = df.pivot_table(
                 index=DTTM_ALIAS,
@@ -1384,7 +1393,7 @@ class DistributionBarViz(DistributionPieViz):
             pt = (pt / pt.sum()).T
         pt = pt.reindex(row.index)
         chart_data = []
-        for name, ys in pt.iteritems():
+        for name, ys in pt.items():
             if pt[name].dtype.kind not in 'biufc' or name in self.groupby:
                 continue
             if isinstance(name, string_types):
@@ -1395,7 +1404,7 @@ class DistributionBarViz(DistributionPieViz):
                 l = [str(s) for s in name[1:]]  # noqa: E741
                 series_title = ', '.join(l)
             values = []
-            for i, v in ys.iteritems():
+            for i, v in ys.items():
                 x = i
                 if isinstance(x, (tuple, list)):
                     x = ', '.join([text_type(s) for s in x])
diff --git a/tests/viz_tests.py b/tests/viz_tests.py
index 6822837..a5adfc1 100644
--- a/tests/viz_tests.py
+++ b/tests/viz_tests.py
@@ -77,6 +77,8 @@ class BaseVizTestCase(unittest.TestCase):
         results.df.empty = False
         datasource.query = Mock(return_value=results)
         test_viz = viz.BaseViz(datasource, form_data)
+
+        test_viz.df_metrics_to_num = Mock()
         test_viz.get_fillna_for_columns = Mock(return_value=0)
         test_viz.get_df(query_obj)
         mock_call = df.__setitem__.mock_calls[0]

-- 
To stop receiving notification emails like this one, please contact
maximebeauchemin@apache.org.

Mime
View raw message