superset-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From maximebeauche...@apache.org
Subject [incubator-superset] branch master updated: [New Viz] Partition Diagram (#3642)
Date Fri, 13 Oct 2017 04:55:01 GMT
This is an automated email from the ASF dual-hosted git repository.

maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new bad6938  [New Viz] Partition Diagram (#3642)
bad6938 is described below

commit bad6938d1a65453b94f7bfe4e9936e6e066b0775
Author: Jeff Niu <jeffniu22@gmail.com>
AuthorDate: Thu Oct 12 21:54:59 2017 -0700

    [New Viz] Partition Diagram (#3642)
    
    * Added Partition Visualization
    
    * added unit tests
---
 .../assets/images/viz_thumbnails/partition.png     | Bin 0 -> 198125 bytes
 .../javascripts/components/OptionDescription.jsx   |  28 ++
 .../assets/javascripts/explore/stores/controls.jsx | 101 +++++++
 .../assets/javascripts/explore/stores/visTypes.js  |  27 ++
 superset/assets/package.json                       |   1 +
 superset/assets/visualizations/main.js             |   1 +
 superset/assets/visualizations/partition.css       |  27 ++
 superset/assets/visualizations/partition.js        | 333 +++++++++++++++++++++
 superset/viz.py                                    | 157 +++++++++-
 tests/viz_tests.py                                 | 225 ++++++++++++++
 10 files changed, 894 insertions(+), 6 deletions(-)

diff --git a/superset/assets/images/viz_thumbnails/partition.png b/superset/assets/images/viz_thumbnails/partition.png
new file mode 100644
index 0000000..7cf6e13
Binary files /dev/null and b/superset/assets/images/viz_thumbnails/partition.png differ
diff --git a/superset/assets/javascripts/components/OptionDescription.jsx b/superset/assets/javascripts/components/OptionDescription.jsx
new file mode 100644
index 0000000..60cc731
--- /dev/null
+++ b/superset/assets/javascripts/components/OptionDescription.jsx
@@ -0,0 +1,28 @@
+import React from 'react';
+import PropTypes from 'prop-types';
+
+import InfoTooltipWithTrigger from './InfoTooltipWithTrigger';
+
+const propTypes = {
+  option: PropTypes.object.isRequired,
+};
+
+// This component provides a general tooltip for options
+// in a SelectControl
+export default function OptionDescription({ option }) {
+  return (
+    <span>
+      <span className="m-r-5 option-label">
+        {option.label}
+      </span>
+      {option.description &&
+        <InfoTooltipWithTrigger
+          className="m-r-5 text-muted"
+          icon="question-circle-o"
+          tooltip={option.description}
+          label={`descr-${option.label}`}
+        />
+      }
+    </span>);
+}
+OptionDescription.propTypes = propTypes;
diff --git a/superset/assets/javascripts/explore/stores/controls.jsx b/superset/assets/javascripts/explore/stores/controls.jsx
index 78ef33c..da8f22d 100644
--- a/superset/assets/javascripts/explore/stores/controls.jsx
+++ b/superset/assets/javascripts/explore/stores/controls.jsx
@@ -4,6 +4,7 @@ import * as v from '../validators';
 import { ALL_COLOR_SCHEMES, spectrums } from '../../modules/colors';
 import MetricOption from '../../components/MetricOption';
 import ColumnOption from '../../components/ColumnOption';
+import OptionDescription from '../../components/OptionDescription';
 import { t } from '../../locales';
 
 const D3_FORMAT_DOCS = 'D3 format syntax: https://github.com/d3/d3-format';
@@ -98,6 +99,7 @@ export const controls = {
     }),
     description: t('One or many metrics to display'),
   },
+
   y_axis_bounds: {
     type: 'BoundsControl',
     label: t('Y Axis Bounds'),
@@ -108,6 +110,7 @@ export const controls = {
     "this feature will only expand the axis range. It won't " +
     "narrow the data's extent."),
   },
+
   order_by_cols: {
     type: 'SelectControl',
     multi: true,
@@ -909,6 +912,16 @@ export const controls = {
     description: D3_FORMAT_DOCS,
   },
 
+  date_time_format: {
+    type: 'SelectControl',
+    freeForm: true,
+    label: t('Date Time Format'),
+    renderTrigger: true,
+    default: 'smart_date',
+    choices: D3_TIME_FORMAT_OPTIONS,
+    description: D3_FORMAT_DOCS,
+  },
+
   markup_type: {
     type: 'SelectControl',
     label: t('Markup Type'),
@@ -1136,6 +1149,14 @@ export const controls = {
     description: t('Use a log scale for the X axis'),
   },
 
+  log_scale: {
+    type: 'CheckboxControl',
+    label: t('Log Scale'),
+    default: false,
+    renderTrigger: true,
+    description: t('Use a log scale'),
+  },
+
   donut: {
     type: 'CheckboxControl',
     label: t('Donut'),
@@ -1456,5 +1477,85 @@ export const controls = {
     controlName: 'TimeSeriesColumnControl',
   },
 
+  time_series_option: {
+    type: 'SelectControl',
+    label: t('Options'),
+    validators: [v.nonEmpty],
+    default: 'not_time',
+    valueKey: 'value',
+    options: [
+      {
+        label: t('Not Time Series'),
+        value: 'not_time',
+        description: t('Ignore time'),
+      },
+      {
+        label: t('Time Series'),
+        value: 'time_series',
+        description: t('Standard time series'),
+      },
+      {
+        label: t('Aggregate Mean'),
+        value: 'agg_mean',
+        description: t('Mean of values over specified period'),
+      },
+      {
+        label: t('Aggregate Sum'),
+        value: 'agg_sum',
+        description: t('Sum of values over specified period'),
+      },
+      {
+        label: t('Difference'),
+        value: 'point_diff',
+        description: t('Metric change in value from `since` to `until`'),
+      },
+      {
+        label: t('Percent Change'),
+        value: 'point_percent',
+        description: t('Metric percent change in value from `since` to `until`'),
+      },
+      {
+        label: t('Factor'),
+        value: 'point_factor',
+        description: t('Metric factor change from `since` to `until`'),
+      },
+      {
+        label: t('Advanced Analytics'),
+        value: 'adv_anal',
+        description: t('Use the Advanced Analytics options below'),
+      },
+    ],
+    optionRenderer: op => <OptionDescription option={op} />,
+    valueRenderer: op => <OptionDescription option={op} />,
+    description: t('Settings for time series'),
+  },
+
+  equal_date_size: {
+    type: 'CheckboxControl',
+    label: t('Equal Date Sizes'),
+    default: true,
+    renderTrigger: true,
+    description: t('Check to force date partitions to have the same height'),
+  },
+
+  partition_limit: {
+    type: 'TextControl',
+    label: t('Partition Limit'),
+    isInt: true,
+    default: '5',
+    description:
+      t('The maximum number of subdivisions of each group; ' +
+      'lower values are pruned first'),
+  },
+
+  partition_threshold: {
+    type: 'TextControl',
+    label: t('Partition Threshold'),
+    isFloat: true,
+    default: '0.05',
+    description:
+      t('Partitions whose height to parent height proportions are ' +
+      'below this value are pruned'),
+  },
 };
 export default controls;
diff --git a/superset/assets/javascripts/explore/stores/visTypes.js b/superset/assets/javascripts/explore/stores/visTypes.js
index da142ae..0975555 100644
--- a/superset/assets/javascripts/explore/stores/visTypes.js
+++ b/superset/assets/javascripts/explore/stores/visTypes.js
@@ -1155,6 +1155,33 @@ export const visTypes = {
       },
     ],
   },
+
+  partition: {
+    label: 'Partition Diagram',
+    showOnExplore: true,
+    controlPanelSections: [
+      sections.NVD3TimeSeries[0],
+      {
+        label: t('Time Series Options'),
+        expanded: true,
+        controlSetRows: [
+          ['time_series_option'],
+        ],
+      },
+      {
+        label: t('Chart Options'),
+        expanded: true,
+        controlSetRows: [
+          ['color_scheme'],
+          ['number_format', 'date_time_format'],
+          ['partition_limit', 'partition_threshold'],
+          ['log_scale', 'equal_date_size'],
+          ['rich_tooltip'],
+        ],
+      },
+      sections.NVD3TimeSeries[1],
+    ],
+  },
 };
 
 export default visTypes;
diff --git a/superset/assets/package.json b/superset/assets/package.json
index 3dfdb78..06ae765 100644
--- a/superset/assets/package.json
+++ b/superset/assets/package.json
@@ -52,6 +52,7 @@
     "d3-sankey": "^0.4.2",
     "d3-svg-legend": "^1.x",
     "d3-tip": "^0.6.7",
+    "d3-hierarchy": "^1.1.5",
     "datamaps": "^0.5.8",
     "datatables.net-bs": "^1.10.15",
     "distributions": "^1.0.0",
diff --git a/superset/assets/visualizations/main.js b/superset/assets/visualizations/main.js
index dc5ee30..78e81ab 100644
--- a/superset/assets/visualizations/main.js
+++ b/superset/assets/visualizations/main.js
@@ -35,5 +35,6 @@ const vizMap = {
   dual_line: require('./nvd3_vis.js'),
   event_flow: require('./EventFlow.jsx'),
   paired_ttest: require('./paired_ttest.jsx'),
+  partition: require('./partition.js'),
 };
 export default vizMap;
diff --git a/superset/assets/visualizations/partition.css b/superset/assets/visualizations/partition.css
new file mode 100644
index 0000000..e23cca7
--- /dev/null
+++ b/superset/assets/visualizations/partition.css
@@ -0,0 +1,27 @@
+.partition .chart {
+  display: block;
+  margin: auto;
+  font-size: 11px;
+}
+
+.partition rect {
+  stroke: #eee;
+  fill: #aaa;
+  fill-opacity: .8;
+  transition: fill-opacity 180ms linear;
+  cursor: pointer;
+}
+
+.partition rect:hover {
+  fill-opacity: 1;
+}
+
+.partition g text {
+  font-weight: bold;
+  pointer-events: none;
+  fill: rgba(0, 0, 0, 0.8);
+}
+
+.partition g:hover text {
+  fill: rgba(0, 0, 0, 1);
+}
diff --git a/superset/assets/visualizations/partition.js b/superset/assets/visualizations/partition.js
new file mode 100644
index 0000000..a91611c
--- /dev/null
+++ b/superset/assets/visualizations/partition.js
@@ -0,0 +1,333 @@
+/* eslint no-param-reassign: [2, {"props": false}] */
+/* eslint no-use-before-define: ["error", { "functions": false }] */
+import d3 from 'd3';
+import {
+  d3TimeFormatPreset,
+} from '../javascripts/modules/utils';
+import { getColorFromScheme } from '../javascripts/modules/colors';
+
+import './partition.css';
+
+d3.hierarchy = require('d3-hierarchy').hierarchy;
+d3.partition = require('d3-hierarchy').partition;
+
+function init(root) {
+  // Compute dx, dy, x, y for each node and
+  // return an array of nodes in breadth-first order
+  const flat = [];
+  const dy = 1.0 / (root.height + 1);
+  let prev = null;
+  root.each((n) => {
+    n.y = dy * n.depth;
+    n.dy = dy;
+    if (!n.parent) {
+      n.x = 0;
+      n.dx = 1;
+    } else {
+      n.x = prev.depth === n.parent.depth ? 0 : prev.x + prev.dx;
+      n.dx = n.weight / n.parent.sum * n.parent.dx;
+    }
+    prev = n;
+    flat.push(n);
+  });
+  return flat;
+}
+
+// This vis is based on
+// http://mbostock.github.io/d3/talk/20111018/partition.html
+function partitionVis(slice, payload) {
+  const data = payload.data;
+  const fd = slice.formData;
+  const div = d3.select(slice.selector);
+  const metrics = fd.metrics || [];
+
+  // Chart options
+  const logScale = fd.log_scale || false;
+  const chartType = fd.time_series_option || 'not_time';
+  const hasTime = ['adv_anal', 'time_series'].indexOf(chartType) >= 0;
+  const format = d3.format(fd.number_format);
+  const timeFormat = d3TimeFormatPreset(fd.date_time_format);
+
+  div.selectAll('*').remove();
+  d3.selectAll('.nvtooltip').remove();
+  const tooltip = d3
+    .select('body')
+    .append('div')
+    .attr('class', 'nvtooltip')
+    .style('opacity', 0)
+    .style('top', 0)
+    .style('left', 0)
+    .style('position', 'fixed');
+
+  function drawVis(i, dat) {
+    const datum = dat[i];
+    const w = slice.width();
+    const h = slice.height() / data.length;
+    const x = d3.scale.linear().range([0, w]);
+    const y = d3.scale.linear().range([0, h]);
+
+    const viz = div
+      .append('div')
+      .attr('class', 'chart')
+      .style('width', w + 'px')
+      .style('height', h + 'px')
+      .append('svg:svg')
+      .attr('width', w)
+      .attr('height', h);
+
+    // Add padding between multiple visualizations
+    if (i !== data.length - 1 && data.length > 1) {
+      viz.style('padding-bottom', '3px');
+    }
+    if (i !== 0 && data.length > 1) {
+      viz.style('padding-top', '3px');
+    }
+
+    const root = d3.hierarchy(datum);
+
+    function hasDateNode(n) {
+      return metrics.indexOf(n.data.name) >= 0 && hasTime;
+    }
+
+    // node.name is the metric/group name
+    // node.disp is the display value
+    // node.value determines sorting order
+    // node.weight determines partition height
+    // node.sum is the sum of children weights
+    root.eachAfter((n) => {
+      n.disp = n.data.val;
+      n.value = n.disp < 0 ? -n.disp : n.disp;
+      n.weight = n.value;
+      n.name = n.data.name;
+      // If the parent is a metric and we still have
+      // the time column, perform a date-time format
+      if (n.parent && hasDateNode(n.parent)) {
+        // Format timestamp values
+        n.weight = fd.equal_date_size ? 1 : n.value;
+        n.value = n.name;
+        n.name = timeFormat(n.name);
+      }
+      if (logScale) n.weight = Math.log(n.weight + 1);
+      n.disp = n.disp && !isNaN(n.disp) && isFinite(n.disp) ? format(n.disp)
: '';
+    });
+    // Perform sort by weight
+    root.sort((a, b) => {
+      const v = b.value - a.value;
+      if (v === 0) {
+        return b.name > a.name ? 1 : -1;
+      }
+      return v;
+    });
+
+    // Prune data based on partition limit and threshold
+    // both are applied at the same time
+    if (fd.partition_threshold && fd.partition_threshold >= 0) {
+      // Compute weight sums as we go
+      root.each((n) => {
+        n.sum = n.children ? n.children.reduce((a, v) => a + v.weight, 0) || 1 : 1;
+        if (n.children) {
+          // Dates are not ordered by weight
+          if (hasDateNode(n)) {
+            if (fd.equal_date_size) {
+              return;
+            }
+            const removeIndices = [];
+            // Keep at least one child
+            for (let j = 1; j < n.children.length; j++) {
+              if (n.children[j].weight / n.sum < fd.partition_threshold) {
+                removeIndices.push(j);
+              }
+            }
+            for (let j = removeIndices.length - 1; j >= 0; j--) {
+              n.children.splice(removeIndices[j], 1);
+            }
+          } else {
+            // Find first child that falls below the threshold
+            let j;
+            for (j = 1; j < n.children.length; j++) {
+              if (n.children[j].weight / n.sum < fd.partition_threshold) {
+                break;
+              }
+            }
+            n.children = n.children.slice(0, j);
+          }
+        }
+      });
+    }
+    if (fd.partition_limit && fd.partition_limit >= 0) {
+      root.each((n) => {
+        if (n.children && n.children.length > fd.partition_limit) {
+          if (!hasDateNode(n)) {
+            n.children = n.children.slice(0, fd.partition_limit);
+          }
+        }
+      });
+    }
+    // Compute final weight sums
+    root.eachAfter((n) => {
+      n.sum = n.children ? n.children.reduce((a, v) => a + v.weight, 0) || 1 : 1;
+    });
+
+    const verboseMap = slice.datasource.verbose_map;
+    function getCategory(depth) {
+      if (!depth) {
+        return 'Metric';
+      }
+      if (hasTime && depth === 1) {
+        return 'Date';
+      }
+      const col = fd.groupby[depth - (hasTime ? 2 : 1)];
+      return verboseMap[col] || col;
+    }
+
+    function getAncestors(d) {
+      const ancestors = [d];
+      let node = d;
+      while (node.parent) {
+        ancestors.push(node.parent);
+        node = node.parent;
+      }
+      return ancestors;
+    }
+
+    function positionAndPopulate(tip, d) {
+      let t = '<table>';
+      if (!fd.rich_tooltip) {
+        t += (
+          '<thead><tr><td colspan="3">' +
+            `<strong class='x-value'>${getCategory(d.depth)}</strong>` +
+            '</td></tr></thead><tbody>'
+        );
+        t += (
+          '<tr class="emph">' +
+            '<td class="legend-color-guide" style="opacity: 0.75">' +
+              `<div style='border: thin solid grey; background-color: ${d.color};'` +
+              '></div>' +
+            '</td>' +
+            `<td>${d.name}</td>` +
+            `<td>${d.disp}</td>` +
+          '</tr>'
+        );
+      } else {
+        const nodes = getAncestors(d);
+        nodes.forEach((n) => {
+          const atNode = n.depth === d.depth;
+          t += '<tbody>';
+          t += (
+            `<tr class='${atNode ? 'emph' : ''}'>` +
+              `<td class='legend-color-guide' style='opacity: ${atNode ? '1' : '0.75'}'>`
+
+                '<div ' +
+                  `style='border: 2px solid ${atNode ? 'black' : 'transparent'};` +
+                    `background-color: ${n.color};'` +
+                '></div>' +
+              '</td>' +
+              `<td>${n.name}</td>` +
+              `<td>${n.disp}</td>` +
+              `<td>${getCategory(n.depth)}</td>` +
+            '</tr>'
+          );
+        });
+      }
+      t += '</tbody></table>';
+      tip.html(t)
+        .style('left', (d3.event.pageX + 13) + 'px')
+        .style('top', (d3.event.pageY - 10) + 'px');
+    }
+
+    const g = viz
+      .selectAll('g')
+      .data(init(root))
+      .enter()
+      .append('svg:g')
+      .attr('transform', d => `translate(${x(d.y)},${y(d.x)})`)
+      .on('click', click)
+      .on('mouseover', (d) => {
+        tooltip
+          .interrupt()
+          .transition()
+          .duration(100)
+          .style('opacity', 0.9);
+        positionAndPopulate(tooltip, d);
+      })
+      .on('mousemove', (d) => {
+        positionAndPopulate(tooltip, d);
+      })
+      .on('mouseout', () => {
+        tooltip
+          .interrupt()
+          .transition()
+          .duration(250)
+          .style('opacity', 0);
+      });
+
+    let kx = w / root.dx;
+    let ky = h / 1;
+
+    g.append('svg:rect')
+      .attr('width', root.dy * kx)
+      .attr('height', d => d.dx * ky);
+
+    g.append('svg:text')
+      .attr('transform', transform)
+      .attr('dy', '0.35em')
+      .style('opacity', d => d.dx * ky > 12 ? 1 : 0)
+      .text((d) => {
+        if (!d.disp) {
+          return d.name;
+        }
+        return `${d.name}: ${d.disp}`;
+      });
+
+    // Apply color scheme
+    g.selectAll('rect')
+      .style('fill', (d) => {
+        d.color = getColorFromScheme(d.name, fd.color_scheme);
+        return d.color;
+      });
+
+    // Zoom out when clicking outside vis
+    // d3.select(window)
+    // .on('click', () => click(root));
+
+    // Keep text centered in its division
+    function transform(d) {
+      return `translate(8,${d.dx * ky / 2})`;
+    }
+
+    // When clicking a subdivision, the vis will zoom in to it
+    function click(d) {
+      if (!d.children) {
+        if (d.parent) {
+          // Clicking on the rightmost level should zoom in
+          return click(d.parent);
+        }
+        return false;
+      }
+      kx = (d.y ? w - 40 : w) / (1 - d.y);
+      ky = h / d.dx;
+      x.domain([d.y, 1]).range([d.y ? 40 : 0, w]);
+      y.domain([d.x, d.x + d.dx]);
+
+      const t = g
+        .transition()
+        .duration(d3.event.altKey ? 7500 : 750)
+        .attr('transform', nd => `translate(${x(nd.y)},${y(nd.x)})`);
+
+      t.select('rect')
+        .attr('width', d.dy * kx)
+        .attr('height', nd => nd.dx * ky);
+
+      t.select('text')
+      .attr('transform', transform)
+      .style('opacity', nd => nd.dx * ky > 12 ? 1 : 0);
+
+      d3.event.stopPropagation();
+      return true;
+    }
+  }
+  for (let i = 0; i < data.length; i++) {
+    drawVis(i, data);
+  }
+}
+
+module.exports = partitionVis;
diff --git a/superset/viz.py b/superset/viz.py
index a800bc0..1d701b0 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -27,6 +27,7 @@ from flask_babel import lazy_gettext as _
 from markdown import markdown
 import simplejson as json
 from six import string_types, PY3
+from six.moves import reduce
 from dateutil import relativedelta as rdelta
 
 from superset import app, utils, cache, get_manifest_file
@@ -915,7 +916,7 @@ class NVD3TimeSeriesViz(NVD3Viz):
             if isinstance(series_title, string_types):
                 series_title += title_suffix
             elif title_suffix and isinstance(series_title, (list, tuple)):
-                series_title.append(title_suffix)
+                series_title = series_title + (title_suffix,)
 
             d = {
                 "key": series_title,
@@ -928,16 +929,24 @@ class NVD3TimeSeriesViz(NVD3Viz):
             chart_data.append(d)
         return chart_data
 
-    def process_data(self, df):
+    def process_data(self, df, aggregate=False):
         fd = self.form_data
         df = df.fillna(0)
         if fd.get("granularity") == "all":
             raise Exception(_("Pick a time granularity for your time series"))
 
-        df = df.pivot_table(
-            index=DTTM_ALIAS,
-            columns=fd.get('groupby'),
-            values=fd.get('metrics'))
+        if not aggregate:
+            df = df.pivot_table(
+                index=DTTM_ALIAS,
+                columns=fd.get('groupby'),
+                values=fd.get('metrics'))
+        else:
+            df = df.pivot_table(
+                index=DTTM_ALIAS,
+                columns=fd.get('groupby'),
+                values=fd.get('metrics'),
+                fill_value=0,
+                aggfunc=sum)
 
         fm = fd.get("resample_fillmethod")
         if not fm:
@@ -1782,6 +1791,142 @@ class PairedTTestViz(BaseViz):
         return data
 
 
+class PartitionViz(NVD3TimeSeriesViz):
+
+    """
+    A hierarchical data visualization with support for time series.
+    """
+
+    viz_type = 'partition'
+    verbose_name = _("Partition Diagram")
+
+    def query_obj(self):
+        query_obj = super(PartitionViz, self).query_obj()
+        time_op = self.form_data.get('time_series_option', 'not_time')
+        # Return time series data if the user specifies so
+        query_obj['is_timeseries'] = time_op != 'not_time'
+        return query_obj
+
+    def levels_for(self, time_op, groups, df):
+        """
+        Compute the partition at each `level` from the dataframe.
+        """
+        levels = {}
+        for i in range(0, len(groups) + 1):
+            agg_df = df.groupby(groups[:i]) if i else df
+            levels[i] = (
+                agg_df.mean() if time_op == 'agg_mean'
+                else agg_df.sum(numeric_only=True))
+        return levels
+
+    def levels_for_diff(self, time_op, groups, df):
+        # Obtain a unique list of the time grains
+        times = list(set(df[DTTM_ALIAS]))
+        times.sort()
+        until = times[len(times) - 1]
+        since = times[0]
+        # Function describing how to calculate the difference
+        func = {
+            'point_diff': [
+                pd.Series.sub,
+                lambda a, b, fill_value: a - b,
+            ],
+            'point_factor': [
+                pd.Series.div,
+                lambda a, b, fill_value: a / float(b),
+            ],
+            'point_percent': [
+                lambda a, b, fill_value=0: a.div(b, fill_value=fill_value) - 1,
+                lambda a, b, fill_value: a / float(b) - 1,
+            ],
+        }[time_op]
+        agg_df = df.groupby(DTTM_ALIAS).sum()
+        levels = {0: pd.Series({
+            m: func[1](agg_df[m][until], agg_df[m][since], 0)
+            for m in agg_df.columns})}
+        for i in range(1, len(groups) + 1):
+            agg_df = df.groupby([DTTM_ALIAS] + groups[:i]).sum()
+            levels[i] = pd.DataFrame({
+                m: func[0](agg_df[m][until], agg_df[m][since], fill_value=0)
+                for m in agg_df.columns})
+        return levels
+
+    def levels_for_time(self, groups, df):
+        procs = {}
+        for i in range(0, len(groups) + 1):
+            self.form_data['groupby'] = groups[:i]
+            df_drop = df.drop(groups[i:], 1)
+            procs[i] = self.process_data(df_drop, aggregate=True).fillna(0)
+        self.form_data['groupby'] = groups
+        return procs
+
+    def nest_values(self, levels, level=0, metric=None, dims=()):
+        """
+        Nest values at each level on the back-end with
+        access and setting, instead of summing from the bottom.
+        """
+        if not level:
+            return [{
+                'name': m,
+                'val': levels[0][m],
+                'children': self.nest_values(levels, 1, m),
+            } for m in levels[0].index]
+        if level == 1:
+            return [{
+                'name': i,
+                'val': levels[1][metric][i],
+                'children': self.nest_values(levels, 2, metric, (i,)),
+            } for i in levels[1][metric].index]
+        if level >= len(levels):
+            return []
+        return [{
+            'name': i,
+            'val': levels[level][metric][dims][i],
+            'children': self.nest_values(
+                levels, level + 1, metric, dims + (i,)
+            ),
+        } for i in levels[level][metric][dims].index]
+
+    def nest_procs(self, procs, level=-1, dims=(), time=None):
+        if level == -1:
+            return [{
+                'name': m,
+                'children': self.nest_procs(procs, 0, (m,)),
+            } for m in procs[0].columns]
+        if not level:
+            return [{
+                'name': t,
+                'val': procs[0][dims[0]][t],
+                'children': self.nest_procs(procs, 1, dims, t),
+            } for t in procs[0].index]
+        if level >= len(procs):
+            return []
+        return [{
+            'name': i,
+            'val': procs[level][dims][i][time],
+            'children': self.nest_procs(procs, level + 1, dims + (i,), time)
+        } for i in procs[level][dims].columns]
+
+    def get_data(self, df):
+        fd = self.form_data
+        groups = fd.get('groupby', [])
+        time_op = fd.get('time_series_option', 'not_time')
+        if not len(groups):
+            raise ValueError('Please choose at least one groupby')
+        if time_op == 'not_time':
+            levels = self.levels_for('agg_sum', groups, df)
+        elif time_op in ['agg_sum', 'agg_mean']:
+            levels = self.levels_for(time_op, groups, df)
+        elif time_op in ['point_diff', 'point_factor', 'point_percent']:
+            levels = self.levels_for_diff(time_op, groups, df)
+        elif time_op == 'adv_anal':
+            procs = self.levels_for_time(groups, df)
+            return self.nest_procs(procs)
+        else:
+            levels = self.levels_for('agg_sum', [DTTM_ALIAS] + groups, df)
+        return self.nest_values(levels)
+
+
 viz_types = {
     o.viz_type: o for o in globals().values()
     if (
diff --git a/tests/viz_tests.py b/tests/viz_tests.py
index a4beab3..fec424a 100644
--- a/tests/viz_tests.py
+++ b/tests/viz_tests.py
@@ -3,6 +3,7 @@ import pandas as pd
 import superset.viz as viz
 
 from superset.utils import DTTM_ALIAS
+from mock import Mock, patch
 
 
 class PairedTTestTestCase(unittest.TestCase):
@@ -135,3 +136,227 @@ class PairedTTestTestCase(unittest.TestCase):
             ],
         }
         self.assertEquals(data, expected)
+
+
+class PartitionVizTestCase(unittest.TestCase):
+
+    @patch('superset.viz.BaseViz.query_obj')
+    def test_query_obj_time_series_option(self, super_query_obj):
+        datasource = Mock()
+        form_data = {}
+        test_viz = viz.PartitionViz(datasource, form_data)
+        super_query_obj.return_value = {}
+        query_obj = test_viz.query_obj()
+        self.assertFalse(query_obj['is_timeseries'])
+        test_viz.form_data['time_series_option'] = 'agg_sum'
+        query_obj = test_viz.query_obj()
+        self.assertTrue(query_obj['is_timeseries'])
+
+    def test_levels_for_computes_levels(self):
+        raw = {}
+        raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
+        raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
+        raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
+        raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
+        raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+        raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
+        raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
+        df = pd.DataFrame(raw)
+        groups = ['groupA', 'groupB', 'groupC']
+        time_op = 'agg_sum'
+        test_viz = viz.PartitionViz(Mock(), {})
+        levels = test_viz.levels_for(time_op, groups, df)
+        self.assertEqual(4, len(levels))
+        expected = {
+            DTTM_ALIAS: 1800,
+            'metric1': 45,
+            'metric2': 450,
+            'metric3': 4500,
+        }
+        self.assertEqual(expected, levels[0].to_dict())
+        expected = {
+            DTTM_ALIAS: {'a1': 600, 'b1': 600, 'c1': 600},
+            'metric1': {'a1': 6, 'b1': 15, 'c1': 24},
+            'metric2': {'a1': 60, 'b1': 150, 'c1': 240},
+            'metric3': {'a1': 600, 'b1': 1500, 'c1': 2400},
+        }
+        self.assertEqual(expected, levels[1].to_dict())
+        self.assertEqual(['groupA', 'groupB'], levels[2].index.names)
+        self.assertEqual(
+            ['groupA', 'groupB', 'groupC'],
+            levels[3].index.names,
+        )
+        time_op = 'agg_mean'
+        levels = test_viz.levels_for(time_op, groups, df)
+        self.assertEqual(4, len(levels))
+        expected = {
+            DTTM_ALIAS: 200.0,
+            'metric1': 5.0,
+            'metric2': 50.0,
+            'metric3': 500.0,
+        }
+        self.assertEqual(expected, levels[0].to_dict())
+        expected = {
+            DTTM_ALIAS: {'a1': 200, 'c1': 200, 'b1': 200},
+            'metric1': {'a1': 2, 'b1': 5, 'c1': 8},
+            'metric2': {'a1': 20, 'b1': 50, 'c1': 80},
+            'metric3': {'a1': 200, 'b1': 500, 'c1': 800},
+        }
+        self.assertEqual(expected, levels[1].to_dict())
+        self.assertEqual(['groupA', 'groupB'], levels[2].index.names)
+        self.assertEqual(
+            ['groupA', 'groupB', 'groupC'],
+            levels[3].index.names,
+        )
+
+    def test_levels_for_diff_computes_difference(self):
+        raw = {}
+        raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
+        raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
+        raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
+        raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
+        raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+        raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
+        raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
+        df = pd.DataFrame(raw)
+        groups = ['groupA', 'groupB', 'groupC']
+        test_viz = viz.PartitionViz(Mock(), {})
+        time_op = 'point_diff'
+        levels = test_viz.levels_for_diff(time_op, groups, df)
+        expected = {
+            'metric1': 6,
+            'metric2': 60,
+            'metric3': 600,
+        }
+        self.assertEqual(expected, levels[0].to_dict())
+        expected = {
+            'metric1': {'a1': 2, 'b1': 2, 'c1': 2},
+            'metric2': {'a1': 20, 'b1': 20, 'c1': 20},
+            'metric3': {'a1': 200, 'b1': 200, 'c1': 200},
+        }
+        self.assertEqual(expected, levels[1].to_dict())
+        self.assertEqual(4, len(levels))
+        self.assertEqual(['groupA', 'groupB', 'groupC'], levels[3].index.names)
+
+    def test_levels_for_time_calls_process_data_and_drops_cols(self):
+        raw = {}
+        raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
+        raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
+        raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
+        raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
+        raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+        raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
+        raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
+        df = pd.DataFrame(raw)
+        groups = ['groupA', 'groupB', 'groupC']
+        test_viz = viz.PartitionViz(Mock(), {'groupby': groups})
+
+        def return_args(df_drop, aggregate):
+            return df_drop
+        test_viz.process_data = Mock(side_effect=return_args)
+        levels = test_viz.levels_for_time(groups, df)
+        self.assertEqual(4, len(levels))
+        cols = [DTTM_ALIAS, 'metric1', 'metric2', 'metric3']
+        self.assertEqual(sorted(cols), sorted(levels[0].columns.tolist()))
+        cols += ['groupA']
+        self.assertEqual(sorted(cols), sorted(levels[1].columns.tolist()))
+        cols += ['groupB']
+        self.assertEqual(sorted(cols), sorted(levels[2].columns.tolist()))
+        cols += ['groupC']
+        self.assertEqual(sorted(cols), sorted(levels[3].columns.tolist()))
+        self.assertEqual(4, len(test_viz.process_data.mock_calls))
+
+    def test_nest_values_returns_hierarchy(self):
+        raw = {}
+        raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
+        raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
+        raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
+        raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+        raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
+        raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
+        df = pd.DataFrame(raw)
+        test_viz = viz.PartitionViz(Mock(), {})
+        groups = ['groupA', 'groupB', 'groupC']
+        levels = test_viz.levels_for('agg_sum', groups, df)
+        nest = test_viz.nest_values(levels)
+        self.assertEqual(3, len(nest))
+        for i in range(0, 3):
+            self.assertEqual('metric' + str(i + 1), nest[i]['name'])
+        self.assertEqual(3, len(nest[0]['children']))
+        self.assertEqual(1, len(nest[0]['children'][0]['children']))
+        self.assertEqual(1, len(nest[0]['children'][0]['children'][0]['children']))
+
+    def test_nest_procs_returns_hierarchy(self):
+        raw = {}
+        raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
+        raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
+        raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
+        raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
+        raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+        raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
+        raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
+        df = pd.DataFrame(raw)
+        test_viz = viz.PartitionViz(Mock(), {})
+        groups = ['groupA', 'groupB', 'groupC']
+        metrics = ['metric1', 'metric2', 'metric3']
+        procs = {}
+        for i in range(0, 4):
+            df_drop = df.drop(groups[i:], 1)
+            pivot = df_drop.pivot_table(
+                index=DTTM_ALIAS,
+                columns=groups[:i],
+                values=metrics,
+            )
+            procs[i] = pivot
+        nest = test_viz.nest_procs(procs)
+        self.assertEqual(3, len(nest))
+        for i in range(0, 3):
+            self.assertEqual('metric' + str(i + 1), nest[i]['name'])
+            self.assertEqual(None, nest[i].get('val'))
+        self.assertEqual(3, len(nest[0]['children']))
+        self.assertEqual(3, len(nest[0]['children'][0]['children']))
+        self.assertEqual(1, len(nest[0]['children'][0]['children'][0]['children']))
+        self.assertEqual(1,
+            len(nest[0]['children']
+                [0]['children']
+                [0]['children']
+                [0]['children'])
+        )
+
+    def test_get_data_calls_correct_method(self):
+        test_viz = viz.PartitionViz(Mock(), {})
+        df = Mock()
+        with self.assertRaises(ValueError):
+            test_viz.get_data(df)
+        test_viz.levels_for = Mock(return_value=1)
+        test_viz.nest_values = Mock(return_value=1)
+        test_viz.form_data['groupby'] = ['groups']
+        test_viz.form_data['time_series_option'] = 'not_time'
+        test_viz.get_data(df)
+        self.assertEqual('agg_sum', test_viz.levels_for.mock_calls[0][1][0])
+        test_viz.form_data['time_series_option'] = 'agg_sum'
+        test_viz.get_data(df)
+        self.assertEqual('agg_sum', test_viz.levels_for.mock_calls[1][1][0])
+        test_viz.form_data['time_series_option'] = 'agg_mean'
+        test_viz.get_data(df)
+        self.assertEqual('agg_mean', test_viz.levels_for.mock_calls[2][1][0])
+        test_viz.form_data['time_series_option'] = 'point_diff'
+        test_viz.levels_for_diff = Mock(return_value=1)
+        test_viz.get_data(df)
+        self.assertEqual('point_diff', test_viz.levels_for_diff.mock_calls[0][1][0])
+        test_viz.form_data['time_series_option'] = 'point_percent'
+        test_viz.get_data(df)
+        self.assertEqual('point_percent', test_viz.levels_for_diff.mock_calls[1][1][0])
+        test_viz.form_data['time_series_option'] = 'point_factor'
+        test_viz.get_data(df)
+        self.assertEqual('point_factor', test_viz.levels_for_diff.mock_calls[2][1][0])
+        test_viz.levels_for_time = Mock(return_value=1)
+        test_viz.nest_procs = Mock(return_value=1)
+        test_viz.form_data['time_series_option'] = 'adv_anal'
+        test_viz.get_data(df)
+        self.assertEqual(1, len(test_viz.levels_for_time.mock_calls))
+        self.assertEqual(1, len(test_viz.nest_procs.mock_calls))
+        test_viz.form_data['time_series_option'] = 'time_series'
+        test_viz.get_data(df)
+        self.assertEqual('agg_sum', test_viz.levels_for.mock_calls[3][1][0])
+        self.assertEqual(7, len(test_viz.nest_values.mock_calls))

-- 
To stop receiving notification emails like this one, please contact
['"commits@superset.apache.org" <commits@superset.apache.org>'].

Mime
View raw message