superset-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From maximebeauche...@apache.org
Subject [incubator-superset] branch master updated: Deprecate auto-generated metrics (#5461)
Date Thu, 17 Jan 2019 06:32:20 GMT
This is an automated email from the ASF dual-hosted git repository.

maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 88964b7  Deprecate auto-generated metrics (#5461)
88964b7 is described below

commit 88964b7dfcc8355ef4e42b1599197bdff156e8f5
Author: Maxime Beauchemin <maximebeauchemin@gmail.com>
AuthorDate: Wed Jan 16 22:32:13 2019 -0800

    Deprecate auto-generated metrics (#5461)
    
    * [WiP] deprecate auto-generated metrics & fix examples
    
    Picking up some leftover pieces not needed anymore since the
    MetricsControl
    
    * db merge
    
    * db merge
    
    * fix migration
    
    * Creating metrics required for tests
---
 superset/connectors/base/models.py                 |  9 +--
 superset/connectors/base/views.py                  |  2 +-
 superset/connectors/druid/models.py                | 80 +---------------------
 superset/connectors/druid/views.py                 | 12 +---
 superset/connectors/sqla/models.py                 | 40 +----------
 superset/data/birth_names.py                       |  8 ++-
 superset/data/country_map.py                       | 17 ++++-
 superset/data/energy.py                            |  8 +++
 superset/data/misc_dashboard.py                    |  4 +-
 superset/data/multi_line.py                        |  5 +-
 superset/data/multiformat_time_series.py           |  8 +--
 superset/data/world_bank.py                        | 13 ++++
 .../versions/7467e77870e4_remove_aggs.py           | 44 ++++++++++++
 superset/migrations/versions/c829ff0b37d0_.py      | 22 ++++++
 superset/migrations/versions/fbd55e0f83eb_.py      | 22 ++++++
 superset/views/core.py                             |  2 +-
 tests/druid_tests.py                               |  5 --
 17 files changed, 150 insertions(+), 151 deletions(-)

diff --git a/superset/connectors/base/models.py b/superset/connectors/base/models.py
index 8ed9e78..50ef6d8 100644
--- a/superset/connectors/base/models.py
+++ b/superset/connectors/base/models.py
@@ -350,13 +350,8 @@ class BaseColumn(AuditMixinNullable, ImportMixin):
     verbose_name = Column(String(1024))
     is_active = Column(Boolean, default=True)
     type = Column(String(32))
-    groupby = Column(Boolean, default=False)
-    count_distinct = Column(Boolean, default=False)
-    sum = Column(Boolean, default=False)
-    avg = Column(Boolean, default=False)
-    max = Column(Boolean, default=False)
-    min = Column(Boolean, default=False)
-    filterable = Column(Boolean, default=False)
+    groupby = Column(Boolean, default=True)
+    filterable = Column(Boolean, default=True)
     description = Column(Text)
     is_dttm = None
 
diff --git a/superset/connectors/base/views.py b/superset/connectors/base/views.py
index b2f203e..d73dc19 100644
--- a/superset/connectors/base/views.py
+++ b/superset/connectors/base/views.py
@@ -27,4 +27,4 @@ class DatasourceModelView(SupersetModelView):
             raise SupersetException(Markup(
                 'Cannot delete a datasource that has slices attached to it.'
                 "Here's the list of associated charts: " +
-                ''.join([o.slice_link for o in obj.slices])))
+                ''.join([o.slice_name for o in obj.slices])))
diff --git a/superset/connectors/druid/models.py b/superset/connectors/druid/models.py
index 105dfba..5776790 100644
--- a/superset/connectors/druid/models.py
+++ b/superset/connectors/druid/models.py
@@ -237,12 +237,6 @@ class DruidCluster(Model, AuditMixinNullable, ImportMixin):
                     if col_obj.type == 'STRING':
                         col_obj.groupby = True
                         col_obj.filterable = True
-                    if col_obj.type == 'hyperUnique' or col_obj.type == 'thetaSketch':
-                        col_obj.count_distinct = True
-                    if col_obj.is_num:
-                        col_obj.sum = True
-                        col_obj.min = True
-                        col_obj.max = True
                 datasource.refresh_metrics()
         session.commit()
 
@@ -280,8 +274,7 @@ class DruidColumn(Model, BaseColumn):
 
     export_fields = (
         'datasource_id', 'column_name', 'is_active', 'type', 'groupby',
-        'count_distinct', 'sum', 'avg', 'max', 'min', 'filterable',
-        'description', 'dimension_spec_json', 'verbose_name',
+        'filterable', 'description', 'dimension_spec_json', 'verbose_name',
     )
     update_from_object_fields = export_fields
     export_parent = 'datasource'
@@ -306,77 +299,6 @@ class DruidColumn(Model, BaseColumn):
             metric_type='count',
             json=json.dumps({'type': 'count', 'name': 'count'}),
         )
-        # Somehow we need to reassign this for UDAFs
-        if self.type in ('DOUBLE', 'FLOAT'):
-            corrected_type = 'DOUBLE'
-        else:
-            corrected_type = self.type
-
-        if self.sum and self.is_num:
-            mt = corrected_type.lower() + 'Sum'
-            name = 'sum__' + self.column_name
-            metrics[name] = DruidMetric(
-                metric_name=name,
-                metric_type='sum',
-                verbose_name='SUM({})'.format(self.column_name),
-                json=json.dumps({
-                    'type': mt, 'name': name, 'fieldName': self.column_name}),
-            )
-
-        if self.avg and self.is_num:
-            mt = corrected_type.lower() + 'Avg'
-            name = 'avg__' + self.column_name
-            metrics[name] = DruidMetric(
-                metric_name=name,
-                metric_type='avg',
-                verbose_name='AVG({})'.format(self.column_name),
-                json=json.dumps({
-                    'type': mt, 'name': name, 'fieldName': self.column_name}),
-            )
-
-        if self.min and self.is_num:
-            mt = corrected_type.lower() + 'Min'
-            name = 'min__' + self.column_name
-            metrics[name] = DruidMetric(
-                metric_name=name,
-                metric_type='min',
-                verbose_name='MIN({})'.format(self.column_name),
-                json=json.dumps({
-                    'type': mt, 'name': name, 'fieldName': self.column_name}),
-            )
-        if self.max and self.is_num:
-            mt = corrected_type.lower() + 'Max'
-            name = 'max__' + self.column_name
-            metrics[name] = DruidMetric(
-                metric_name=name,
-                metric_type='max',
-                verbose_name='MAX({})'.format(self.column_name),
-                json=json.dumps({
-                    'type': mt, 'name': name, 'fieldName': self.column_name}),
-            )
-        if self.count_distinct:
-            name = 'count_distinct__' + self.column_name
-            if self.type == 'hyperUnique' or self.type == 'thetaSketch':
-                metrics[name] = DruidMetric(
-                    metric_name=name,
-                    verbose_name='COUNT(DISTINCT {})'.format(self.column_name),
-                    metric_type=self.type,
-                    json=json.dumps({
-                        'type': self.type,
-                        'name': name,
-                        'fieldName': self.column_name,
-                    }),
-                )
-            else:
-                metrics[name] = DruidMetric(
-                    metric_name=name,
-                    verbose_name='COUNT(DISTINCT {})'.format(self.column_name),
-                    metric_type='count_distinct',
-                    json=json.dumps({
-                        'type': 'cardinality',
-                        'name': name,
-                        'fieldNames': [self.column_name]}),
-                )
         return metrics
 
     def refresh_metrics(self):
diff --git a/superset/connectors/druid/views.py b/superset/connectors/druid/views.py
index 3191043..703ea39 100644
--- a/superset/connectors/druid/views.py
+++ b/superset/connectors/druid/views.py
@@ -50,11 +50,9 @@ class DruidColumnInlineView(CompactCRUDMixin, SupersetModelView):  # noqa
 
     edit_columns = [
         'column_name', 'verbose_name', 'description', 'dimension_spec_json', 'datasource',
-        'groupby', 'filterable', 'count_distinct', 'sum', 'min', 'max']
+        'groupby', 'filterable']
     add_columns = edit_columns
-    list_columns = [
-        'column_name', 'verbose_name', 'type', 'groupby', 'filterable', 'count_distinct',
-        'sum', 'min', 'max']
+    list_columns = ['column_name', 'verbose_name', 'type', 'groupby', 'filterable']
     can_delete = False
     page_size = 500
     label_columns = {
@@ -63,12 +61,6 @@ class DruidColumnInlineView(CompactCRUDMixin, SupersetModelView):  # noqa
         'datasource': _('Datasource'),
         'groupby': _('Groupable'),
         'filterable': _('Filterable'),
-        'count_distinct': _('Count Distinct'),
-        'sum': _('Sum'),
-        'min': _('Min'),
-        'max': _('Max'),
-        'verbose_name': _('Verbose Name'),
-        'description': _('Description'),
     }
     description_columns = {
         'filterable': _(
diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py
index 1b65be7..10591d3 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -102,7 +102,7 @@ class TableColumn(Model, BaseColumn):
 
     export_fields = (
         'table_id', 'column_name', 'verbose_name', 'is_dttm', 'is_active',
-        'type', 'groupby', 'count_distinct', 'sum', 'avg', 'max', 'min',
+        'type', 'groupby',
         'filterable', 'expression', 'description', 'python_date_format',
         'database_expression',
     )
@@ -185,43 +185,6 @@ class TableColumn(Model, BaseColumn):
                 self.type or '', dttm)
             return s or "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S.%f'))
 
-    def get_metrics(self):
-        # TODO deprecate, this is not needed since MetricsControl
-        metrics = []
-        M = SqlMetric  # noqa
-        quoted = self.column_name
-        if self.sum:
-            metrics.append(M(
-                metric_name='sum__' + self.column_name,
-                metric_type='sum',
-                expression='SUM({})'.format(quoted),
-            ))
-        if self.avg:
-            metrics.append(M(
-                metric_name='avg__' + self.column_name,
-                metric_type='avg',
-                expression='AVG({})'.format(quoted),
-            ))
-        if self.max:
-            metrics.append(M(
-                metric_name='max__' + self.column_name,
-                metric_type='max',
-                expression='MAX({})'.format(quoted),
-            ))
-        if self.min:
-            metrics.append(M(
-                metric_name='min__' + self.column_name,
-                metric_type='min',
-                expression='MIN({})'.format(quoted),
-            ))
-        if self.count_distinct:
-            metrics.append(M(
-                metric_name='count_distinct__' + self.column_name,
-                metric_type='count_distinct',
-                expression='COUNT(DISTINCT {})'.format(quoted),
-            ))
-        return {m.metric_name: m for m in metrics}
-
 
 class SqlMetric(Model, BaseMetric):
 
@@ -878,7 +841,6 @@ class SqlaTable(Model, BaseDatasource):
             self.columns.append(dbcol)
             if not any_date_col and dbcol.is_time:
                 any_date_col = col.name
-            metrics += dbcol.get_metrics().values()
 
         metrics.append(M(
             metric_name='count',
diff --git a/superset/data/birth_names.py b/superset/data/birth_names.py
index 05cbb34..379fdc8 100644
--- a/superset/data/birth_names.py
+++ b/superset/data/birth_names.py
@@ -23,7 +23,7 @@ import pandas as pd
 from sqlalchemy import DateTime, String
 
 from superset import db, security_manager
-from superset.connectors.sqla.models import TableColumn
+from superset.connectors.sqla.models import SqlMetric, TableColumn
 from superset.utils.core import get_or_create_main_db
 from .helpers import (
     config,
@@ -71,6 +71,12 @@ def load_birth_names():
             expression="CASE WHEN state = 'CA' THEN num ELSE 0 END",
         ))
 
+    if not any(col.metric_name == 'sum__num' for col in obj.metrics):
+        obj.metrics.append(SqlMetric(
+            metric_name='sum__num',
+            expression='SUM(num)',
+        ))
+
     db.session.merge(obj)
     db.session.commit()
     obj.fetch_metadata()
diff --git a/superset/data/country_map.py b/superset/data/country_map.py
index 207d50d..c1c2b41 100644
--- a/superset/data/country_map.py
+++ b/superset/data/country_map.py
@@ -21,6 +21,7 @@ import pandas as pd
 from sqlalchemy import BigInteger, Date, String
 
 from superset import db
+from superset.connectors.sqla.models import SqlMetric
 from superset.utils import core as utils
 from .helpers import (
     DATA_FOLDER,
@@ -67,6 +68,11 @@ def load_country_map_data():
         obj = TBL(table_name='birth_france_by_region')
     obj.main_dttm_col = 'dttm'
     obj.database = utils.get_or_create_main_db()
+    if not any(col.metric_name == 'avg__2004' for col in obj.metrics):
+        obj.metrics.append(SqlMetric(
+            metric_name='avg__2004',
+            expression='AVG(2004)',
+        ))
     db.session.merge(obj)
     db.session.commit()
     obj.fetch_metadata()
@@ -79,7 +85,16 @@ def load_country_map_data():
         'where': '',
         'viz_type': 'country_map',
         'entity': 'DEPT_ID',
-        'metric': 'avg__2004',
+        'metric': {
+            'expressionType': 'SIMPLE',
+            'column': {
+                'type': 'INT',
+                'column_name': '2004',
+            },
+            'aggregate': 'AVG',
+            'label': 'Boys',
+            'optionName': 'metric_112342',
+        },
         'row_limit': 500000,
     }
 
diff --git a/superset/data/energy.py b/superset/data/energy.py
index d8ca325..c04eb46 100644
--- a/superset/data/energy.py
+++ b/superset/data/energy.py
@@ -24,6 +24,7 @@ import pandas as pd
 from sqlalchemy import Float, String
 
 from superset import db
+from superset.connectors.sqla.models import SqlMetric
 from superset.utils import core as utils
 from .helpers import DATA_FOLDER, merge_slice, misc_dash_slices, Slice, TBL
 
@@ -51,6 +52,13 @@ def load_energy():
         tbl = TBL(table_name=tbl_name)
     tbl.description = 'Energy consumption'
     tbl.database = utils.get_or_create_main_db()
+
+    if not any(col.metric_name == 'sum__value' for col in tbl.metrics):
+        tbl.metrics.append(SqlMetric(
+            metric_name='sum__value',
+            expression='SUM(value)',
+        ))
+
     db.session.merge(tbl)
     db.session.commit()
     tbl.fetch_metadata()
diff --git a/superset/data/misc_dashboard.py b/superset/data/misc_dashboard.py
index 31028c6..bfecc59 100644
--- a/superset/data/misc_dashboard.py
+++ b/superset/data/misc_dashboard.py
@@ -88,9 +88,9 @@ def load_misc_dashboard():
         "children": [],
         "id": "CHART-r19KVMNCE7",
         "meta": {
-            "chartId": 3978,
+            "chartId": 3971,
             "height": 34,
-            "sliceName": "Calendar Heatmap multiformat 7",
+            "sliceName": "Calendar Heatmap multiformat 0",
             "width": 4
         },
         "type": "CHART"
diff --git a/superset/data/multi_line.py b/superset/data/multi_line.py
index 3bd3126..d6c0ac8 100644
--- a/superset/data/multi_line.py
+++ b/superset/data/multi_line.py
@@ -45,8 +45,11 @@ def load_multi_line():
             'viz_type': 'line_multi',
             'line_charts': [ids[0]],
             'line_charts_2': [ids[1]],
-            'since': '1960-01-01',
+            'since': '1970',
+            'until': '1995',
             'prefix_metric_with_slice_name': True,
+            'show_legend': False,
+            'x_axis_format': '%Y',
         }),
     )
 
diff --git a/superset/data/multiformat_time_series.py b/superset/data/multiformat_time_series.py
index edf68e8..5dec85a 100644
--- a/superset/data/multiformat_time_series.py
+++ b/superset/data/multiformat_time_series.py
@@ -89,8 +89,8 @@ def load_multiformat_time_series():
             'metrics': ['count'],
             'granularity_sqla': col.column_name,
             'row_limit': config.get('ROW_LIMIT'),
-            'since': '1 year ago',
-            'until': 'now',
+            'since': '2015',
+            'until': '2016',
             'where': '',
             'viz_type': 'cal_heatmap',
             'domain_granularity': 'month',
@@ -98,11 +98,11 @@ def load_multiformat_time_series():
         }
 
         slc = Slice(
-            slice_name='Calendar Heatmap multiformat ' + str(i),
+            slice_name=f'Calendar Heatmap multiformat {i}',
             viz_type='cal_heatmap',
             datasource_type='table',
             datasource_id=tbl.id,
             params=get_slice_json(slice_data),
         )
         merge_slice(slc)
-    misc_dash_slices.add(slc.slice_name)
+    misc_dash_slices.add('Calendar Heatmap multiformat 0')
diff --git a/superset/data/world_bank.py b/superset/data/world_bank.py
index c9d8c1f..9ad28f0 100644
--- a/superset/data/world_bank.py
+++ b/superset/data/world_bank.py
@@ -25,6 +25,7 @@ import pandas as pd
 from sqlalchemy import DateTime, String
 
 from superset import db
+from superset.connectors.sqla.models import SqlMetric
 from superset.utils import core as utils
 from .helpers import (
     config,
@@ -67,6 +68,18 @@ def load_world_bank_health_n_pop():
     tbl.main_dttm_col = 'year'
     tbl.database = utils.get_or_create_main_db()
     tbl.filter_select_enabled = True
+
+    metrics = [
+        'sum__SP_POP_TOTL', 'sum__SH_DYN_AIDS', 'sum__SH_DYN_AIDS',
+        'sum__SP_RUR_TOTL_ZS', 'sum__SP_DYN_LE00_IN',
+    ]
+    for m in metrics:
+        if not any(col.metric_name == m for col in tbl.metrics):
+            tbl.metrics.append(SqlMetric(
+                metric_name=m,
+                expression=f'{m[:3]}({m[5:]})',
+            ))
+
     db.session.merge(tbl)
     db.session.commit()
     tbl.fetch_metadata()
diff --git a/superset/migrations/versions/7467e77870e4_remove_aggs.py b/superset/migrations/versions/7467e77870e4_remove_aggs.py
new file mode 100644
index 0000000..5e28a11
--- /dev/null
+++ b/superset/migrations/versions/7467e77870e4_remove_aggs.py
@@ -0,0 +1,44 @@
+"""remove_aggs
+
+Revision ID: 7467e77870e4
+Revises: c829ff0b37d0
+Create Date: 2018-07-22 08:50:01.078218
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = '7467e77870e4'
+down_revision = 'c829ff0b37d0'
+
+
+def upgrade():
+    with op.batch_alter_table('table_columns') as batch_op:
+        batch_op.drop_column('avg')
+        batch_op.drop_column('max')
+        batch_op.drop_column('sum')
+        batch_op.drop_column('count_distinct')
+        batch_op.drop_column('min')
+
+    with op.batch_alter_table('columns') as batch_op:
+        batch_op.drop_column('avg')
+        batch_op.drop_column('max')
+        batch_op.drop_column('sum')
+        batch_op.drop_column('count_distinct')
+        batch_op.drop_column('min')
+
+
+def downgrade():
+    op.add_column('table_columns', sa.Column('min', sa.Boolean(), nullable=True))
+    op.add_column(
+        'table_columns', sa.Column('count_distinct', sa.Boolean(), nullable=True))
+    op.add_column('table_columns', sa.Column('sum', sa.Boolean(), nullable=True))
+    op.add_column('table_columns', sa.Column('max', sa.Boolean(), nullable=True))
+    op.add_column('table_columns', sa.Column('avg', sa.Boolean(), nullable=True))
+
+    op.add_column('columns', sa.Column('min', sa.Boolean(), nullable=True))
+    op.add_column('columns', sa.Column('count_distinct', sa.Boolean(), nullable=True))
+    op.add_column('columns', sa.Column('sum', sa.Boolean(), nullable=True))
+    op.add_column('columns', sa.Column('max', sa.Boolean(), nullable=True))
+    op.add_column('columns', sa.Column('avg', sa.Boolean(), nullable=True))
diff --git a/superset/migrations/versions/c829ff0b37d0_.py b/superset/migrations/versions/c829ff0b37d0_.py
new file mode 100644
index 0000000..939dddb
--- /dev/null
+++ b/superset/migrations/versions/c829ff0b37d0_.py
@@ -0,0 +1,22 @@
+"""empty message
+
+Revision ID: c829ff0b37d0
+Revises: ('4451805bbaa1', '1d9e835a84f9')
+Create Date: 2018-07-22 08:49:48.936117
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'c829ff0b37d0'
+down_revision = ('4451805bbaa1', '1d9e835a84f9')
+
+from alembic import op
+import sqlalchemy as sa
+
+
+def upgrade():
+    pass
+
+
+def downgrade():
+    pass
diff --git a/superset/migrations/versions/fbd55e0f83eb_.py b/superset/migrations/versions/fbd55e0f83eb_.py
new file mode 100644
index 0000000..2ca1c18
--- /dev/null
+++ b/superset/migrations/versions/fbd55e0f83eb_.py
@@ -0,0 +1,22 @@
+"""empty message
+
+Revision ID: fbd55e0f83eb
+Revises: ('7467e77870e4', 'de021a1ca60d')
+Create Date: 2018-12-22 17:26:16.113317
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'fbd55e0f83eb'
+down_revision = ('7467e77870e4', 'de021a1ca60d')
+
+from alembic import op
+import sqlalchemy as sa
+
+
+def upgrade():
+    pass
+
+
+def downgrade():
+    pass
diff --git a/superset/views/core.py b/superset/views/core.py
index 94a18da..02ff66a 100755
--- a/superset/views/core.py
+++ b/superset/views/core.py
@@ -1410,7 +1410,7 @@ class Superset(BaseSupersetView):
                 form_data.pop('slice_id')  # don't save old slice_id
             slc = models.Slice(owners=[g.user] if g.user else [])
 
-        slc.params = json.dumps(form_data)
+        slc.params = json.dumps(form_data, indent=2, sort_keys=True)
         slc.datasource_name = datasource_name
         slc.viz_type = form_data['viz_type']
         slc.datasource_type = datasource_type
diff --git a/tests/druid_tests.py b/tests/druid_tests.py
index e4109b1..ab26a98 100644
--- a/tests/druid_tests.py
+++ b/tests/druid_tests.py
@@ -372,11 +372,6 @@ class DruidTests(SupersetTestCase):
             .filter(DruidMetric.metric_name.like('%__metric1'))
         )
 
-        self.assertEqual(
-            {metric.metric_name for metric in metrics},
-            {'max__metric1', 'min__metric1', 'sum__metric1'},
-        )
-
         for metric in metrics:
             agg, _ = metric.metric_name.split('__')
 


Mime
View raw message