superset-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From maximebeauche...@apache.org
Subject [incubator-superset] branch master updated: [load_examples] download data at runtime (#7314)
Date Wed, 17 Apr 2019 20:19:22 GMT
This is an automated email from the ASF dual-hosted git repository.

maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 3d08266  [load_examples] download data at runtime (#7314)
3d08266 is described below

commit 3d082667140fdd3c9c8918e3a0408b8339d80405
Author: Maxime Beauchemin <maximebeauchemin@gmail.com>
AuthorDate: Wed Apr 17 13:19:14 2019 -0700

    [load_examples] download data at runtime (#7314)
    
    * [load_examples] download data at runtime
    
    When running `superset load_examples` to load example data sets,
    Superset used to load from the local package. This created a few issues
    notably around licensing (what are these datasets licensed as?) and
    around package size.
    
    For now, I moved the data sets here:
    https://github.com/apache-superset/examples-data
    
    Altered the logic to download the data from where it is stored.
    
    * flakes
---
 superset/data/airports.csv.gz                      | Bin 9836 -> 0 bytes
 superset/data/bart-lines.json.gz                   | Bin 1267 -> 0 bytes
 superset/data/bart_lines.py                        |  15 ++--
 .../data/birth_france_data_for_country_map.csv     |  97 ---------------------
 superset/data/birth_names.json.gz                  | Bin 734913 -> 0 bytes
 superset/data/birth_names.py                       |   8 +-
 superset/data/countries.json.gz                    | Bin 14752439 -> 0 bytes
 superset/data/country_map.py                       |   8 +-
 superset/data/energy.json.gz                       | Bin 985 -> 0 bytes
 superset/data/energy.py                            |  10 +--
 superset/data/flight_data.csv.gz                   | Bin 1897423 -> 0 bytes
 superset/data/flights.py                           |  13 ++-
 superset/data/helpers.py                           |  15 ++++
 superset/data/long_lat.py                          |   8 +-
 superset/data/multiformat_time_series.json.gz      | Bin 38387 -> 0 bytes
 superset/data/multiformat_time_series.py           |   9 +-
 superset/data/paris.py                             |  10 +--
 superset/data/paris_iris.json.gz                   | Bin 4427142 -> 0 bytes
 superset/data/random_time_series.json.gz           | Bin 264250 -> 0 bytes
 superset/data/random_time_series.py                |   8 +-
 superset/data/san_francisco.csv.gz                 | Bin 2103726 -> 0 bytes
 superset/data/sf_population.json.gz                | Bin 47217 -> 0 bytes
 superset/data/sf_population_polygons.py            |  10 +--
 superset/data/unicode_test_data.py                 |   8 +-
 superset/data/unicode_utf8_unixnl_test.csv         |  42 ---------
 superset/data/world_bank.py                        |   6 +-
 26 files changed, 64 insertions(+), 203 deletions(-)

diff --git a/superset/data/airports.csv.gz b/superset/data/airports.csv.gz
deleted file mode 100644
index 3043486..0000000
Binary files a/superset/data/airports.csv.gz and /dev/null differ
diff --git a/superset/data/bart-lines.json.gz b/superset/data/bart-lines.json.gz
deleted file mode 100644
index 91f50fb..0000000
Binary files a/superset/data/bart-lines.json.gz and /dev/null differ
diff --git a/superset/data/bart_lines.py b/superset/data/bart_lines.py
index 3244a0a..f4e0b1f 100644
--- a/superset/data/bart_lines.py
+++ b/superset/data/bart_lines.py
@@ -14,9 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import gzip
 import json
-import os
 
 import pandas as pd
 import polyline
@@ -24,16 +22,17 @@ from sqlalchemy import String, Text
 
 from superset import db
 from superset.utils.core import get_or_create_main_db
-from .helpers import DATA_FOLDER, TBL
+from .helpers import TBL, get_example_data
 
 
 def load_bart_lines():
     tbl_name = 'bart_lines'
-    with gzip.open(os.path.join(DATA_FOLDER, 'bart-lines.json.gz')) as f:
-        df = pd.read_json(f, encoding='latin-1')
-        df['path_json'] = df.path.map(json.dumps)
-        df['polyline'] = df.path.map(polyline.encode)
-        del df['path']
+    content = get_example_data('bart-lines.json.gz')
+    df = pd.read_json(content, encoding='latin-1')
+    df['path_json'] = df.path.map(json.dumps)
+    df['polyline'] = df.path.map(polyline.encode)
+    del df['path']
+
     df.to_sql(
         tbl_name,
         db.engine,
diff --git a/superset/data/birth_france_data_for_country_map.csv b/superset/data/birth_france_data_for_country_map.csv
deleted file mode 100644
index 5de8d45..0000000
--- a/superset/data/birth_france_data_for_country_map.csv
+++ /dev/null
@@ -1,97 +0,0 @@
-DEPT_ID,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014
-FR-01,6866,6706,6976,7228,6949,7323,7157,7282,7265,7242,7296,7354
-FR-02,6841,6761,6889,7041,6847,7012,6941,7050,6939,6755,6559,6468
-FR-03,3391,3335,3363,3503,3277,3289,3308,3402,3196,3288,3198,3152
-FR-04,1460,1522,1514,1536,1569,1569,1513,1547,1578,1561,1629,1538
-FR-05,1408,1403,1395,1461,1448,1441,1513,1470,1399,1441,1406,1383
-FR-06,11144,11514,11631,11754,11633,12275,11949,12257,11999,12087,12149,12170
-FR-07,3367,3176,3414,3484,3484,3447,3307,3380,3360,3405,3179,3254
-FR-08,3532,3422,3420,3343,3552,3522,3312,3254,3137,3258,3021,2966
-FR-09,1350,1412,1389,1499,1570,1493,1452,1473,1404,1425,1413,1364
-FR-10,3428,3553,3692,3685,3619,3721,3745,3722,3635,3587,3436,3377
-FR-11,3421,3321,3502,3661,3723,3778,3797,3770,3789,3669,3618,3516
-FR-12,2558,2614,2701,2829,2769,2748,2640,2694,2682,2615,2475,2555
-FR-13,23908,24056,24411,25371,25126,25412,25547,26410,25889,26328,26762,26384
-FR-14,8231,8257,8251,8531,8310,8183,8304,8111,8041,7833,7644,7466
-FR-15,1344,1396,1391,1398,1357,1300,1377,1274,1237,1230,1290,1214
-FR-16,3401,3514,3570,3653,3618,3666,3408,3564,3459,3490,3472,3378
-FR-17,5935,5900,6069,6089,5903,6136,6209,6185,6065,5916,5778,5846
-FR-18,3301,3271,3313,3231,3341,3303,3229,3341,3159,3120,3128,3097
-FR-19,2133,2250,2319,2327,2245,2263,2231,2247,2196,2163,2055,2094
-FR-21,6079,6052,5844,5986,6015,5960,5852,5963,5906,5905,5769,5779
-FR-22,6413,6317,6287,6743,6473,6494,6559,6438,6221,6184,5927,5790
-FR-23,1011,957,1054,1038,1013,1029,1044,919,967,998,897,879
-FR-24,3607,3690,3662,3758,3760,3832,3672,3665,3645,3547,3486,3479
-FR-25,6529,6798,6782,6993,6804,7097,6914,7105,6826,6778,6732,6659
-FR-26,5525,5703,5579,5945,5833,5927,5846,5915,5978,5912,6026,5965
-FR-27,7213,7220,7386,7402,7471,7717,7714,7715,7738,7676,7352,7242
-FR-28,5370,5363,5585,5632,5440,5677,5573,5716,5540,5548,5312,5295
-FR-29,9900,9963,9851,10184,9962,10040,9733,9823,9615,9597,9277,9088
-FR-2A,1232,1228,1348,1337,1284,1370,1422,1408,1422,1398,1317,1371
-FR-2B,1455,1444,1525,1474,1564,1569,1580,1591,1662,1612,1599,1616
-FR-30,7446,7777,7901,8384,8190,8449,8354,8494,8467,8196,8427,8216
-FR-31,13989,13900,14233,14957,14968,15415,15317,15770,16031,16347,16290,16641
-FR-32,1635,1625,1666,1580,1669,1689,1718,1671,1587,1668,1648,1643
-FR-33,15610,15819,15722,16539,16514,16636,17072,17271,17098,17097,17265,17303
-FR-34,11380,11562,11636,12191,12252,12564,12531,12658,13000,12902,12899,13008
-FR-35,12134,12072,12405,12687,12606,12837,12917,12876,13033,12892,12729,12555
-FR-36,2312,2314,2394,2283,2341,2371,2178,2221,2137,2136,2006,2030
-FR-37,6620,6594,6644,6813,6434,6811,6828,6886,6696,6796,6594,6718
-FR-38,14885,15356,15447,15830,15646,15999,15916,16136,15739,15948,15724,15664
-FR-39,2964,3017,2924,3021,3037,3045,2897,2865,2758,2741,2675,2637
-FR-40,3477,3621,3574,3755,3953,3862,3914,3993,3853,3880,3864,3696
-FR-41,3617,3678,3724,3815,3752,3847,3786,3777,3667,3704,3581,3517
-FR-42,8804,8906,8975,9184,9222,9357,9174,9403,9357,9473,9086,9183
-FR-43,2458,2416,2485,2426,2301,2398,2390,2348,2300,2244,2247,2157
-FR-44,15795,15988,16301,16530,16664,16763,16766,17159,16747,16821,16822,16700
-FR-45,8265,8424,8200,8635,8644,8524,8499,8757,8686,8689,8526,8355
-FR-46,1537,1430,1477,1563,1511,1555,1435,1506,1423,1487,1345,1415
-FR-47,3173,3245,3341,3426,3399,3378,3445,3359,3397,3332,3361,3347
-FR-48,768,772,760,784,781,779,798,736,695,711,663,651
-FR-49,10018,10085,10148,10548,10227,10270,10165,10312,10320,10061,10016,9781
-FR-50,5490,5487,5538,5448,5356,5384,5231,5238,5193,5282,4998,4911
-FR-51,6916,6979,7108,7118,6932,7065,7061,7182,7070,6761,7000,6887
-FR-52,2100,2095,2029,2104,2062,2037,1944,1889,1916,1847,1923,1881
-FR-53,3846,3932,3981,4118,3835,3912,3897,3962,3733,3750,3656,3456
-FR-54,8398,8671,8542,8743,8421,8559,8487,8536,8499,8387,8197,8135
-FR-55,2218,2287,2158,2294,2296,2220,2122,2221,2119,2107,2070,1928
-FR-56,7817,8036,7802,8221,7968,8288,7942,8029,7894,7909,7645,7554
-FR-57,11710,11970,12048,12114,11853,12012,11831,11856,11474,11579,11421,11385
-FR-58,2123,2181,2115,2137,2151,2049,1986,1982,1999,1942,1850,1801
-FR-59,36099,36257,35960,36858,36531,36572,36508,36703,36678,36513,36354,35923
-FR-60,10696,10630,10753,11144,11097,11162,11013,10960,11032,10941,10814,10802
-FR-61,3323,3243,3117,3276,3316,3185,3248,3192,3105,2933,2834,2810
-FR-62,18888,19304,19407,19780,19668,19902,19661,19784,19720,19017,19054,18809
-FR-63,6576,6632,6701,6902,6896,6865,6774,7131,6828,6933,6699,6908
-FR-64,6436,6338,6395,6680,6288,6455,6652,6569,6459,6490,6269,6497
-FR-65,2144,2186,2095,2284,2266,2095,2161,2149,2110,2201,2057,2111
-FR-66,4456,4320,4563,4779,4638,4756,4837,4869,4843,4943,4914,4800
-FR-67,13024,12828,13195,13388,13152,13231,13218,13346,13030,12895,13043,13262
-FR-68,9045,8945,8912,9324,8941,8909,8938,9177,8927,8818,8713,8826
-FR-69,23376,23796,24270,24808,24465,25120,25528,25973,25921,26294,25914,26712
-FR-70,2675,2773,2827,2975,2888,2755,2785,2761,2643,2609,2510,2458
-FR-71,5717,5709,5789,5876,5736,5860,5838,5865,5811,5752,5514,5552
-FR-72,6871,6935,6770,7133,6808,6909,6957,6942,6810,6703,6645,6664
-FR-73,4687,4736,4795,4903,5000,4971,4863,5074,4917,4786,4762,4798
-FR-74,8839,8753,8967,9124,8939,9333,9271,9521,9476,9829,9893,9982
-FR-75,31493,31817,31378,31748,30820,30623,31063,31447,30094,29291,28945,29134
-FR-76,15862,15650,15691,16004,16066,16041,15947,16338,16146,16014,15574,15199
-FR-77,17501,17729,18317,18986,18978,19240,19331,19712,19824,19678,19331,19708
-FR-78,19937,19431,19766,20438,19899,19895,19868,20312,19886,19827,19886,19525
-FR-79,3994,4100,4191,4057,4037,4331,4157,4060,4006,4029,3986,3718
-FR-80,7134,7035,7024,7021,6939,7094,6838,7103,6989,6843,6743,6506
-FR-81,3579,3611,3837,3933,3869,4056,4030,3925,4006,3939,3829,3831
-FR-82,2398,2591,2590,2823,2858,2932,2935,2926,2978,2940,2827,2829
-FR-83,10388,10622,10646,10889,10938,11131,10955,11159,11146,11240,10917,11123
-FR-84,6547,6629,6608,6805,6694,7000,7014,6967,7008,7107,7171,7058
-FR-85,6874,7062,7299,7589,7647,7629,7718,7601,7442,7436,7164,7070
-FR-86,4594,4568,4725,4850,4753,4909,4953,5006,4885,4880,4708,4686
-FR-87,3449,3659,3834,3754,3829,3891,3985,3848,3907,3825,3723,3724
-FR-88,4291,4264,4310,4416,4274,4215,4252,4057,3883,3715,3796,3679
-FR-89,3710,3844,3821,3929,3917,4045,3991,3842,3699,3729,3780,3621
-FR-90,1896,1766,1837,1888,1880,1818,1822,1802,1794,1763,1675,1707
-FR-91,17122,17614,17753,18281,17932,18134,18040,18509,18493,18506,18510,18903
-FR-92,24607,24649,24588,25426,24937,25217,25192,25194,25083,24790,24614,24675
-FR-93,25868,26313,26760,27916,27743,28062,28313,28513,28362,28675,28687,29471
-FR-94,19637,19866,19947,20948,20331,20736,21022,21391,20991,20967,20748,21566
-FR-95,17346,17863,18012,19015,18624,18761,18728,19506,19551,19495,19550,19737
\ No newline at end of file
diff --git a/superset/data/birth_names.json.gz b/superset/data/birth_names.json.gz
deleted file mode 100644
index 2652cf7..0000000
Binary files a/superset/data/birth_names.json.gz and /dev/null differ
diff --git a/superset/data/birth_names.py b/superset/data/birth_names.py
index 379fdc8..4f11ac5 100644
--- a/superset/data/birth_names.py
+++ b/superset/data/birth_names.py
@@ -14,9 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import gzip
 import json
-import os
 import textwrap
 
 import pandas as pd
@@ -28,7 +26,7 @@ from superset.utils.core import get_or_create_main_db
 from .helpers import (
     config,
     Dash,
-    DATA_FOLDER,
+    get_example_data,
     get_slice_json,
     merge_slice,
     Slice,
@@ -39,8 +37,8 @@ from .helpers import (
 
 def load_birth_names():
     """Loading birth name dataset from a zip file in the repo"""
-    with gzip.open(os.path.join(DATA_FOLDER, 'birth_names.json.gz')) as f:
-        pdf = pd.read_json(f)
+    data = get_example_data('birth_names.json.gz')
+    pdf = pd.read_json(data)
     pdf.ds = pd.to_datetime(pdf.ds, unit='ms')
     pdf.to_sql(
         'birth_names',
diff --git a/superset/data/countries.json.gz b/superset/data/countries.json.gz
deleted file mode 100644
index 6c71c0c..0000000
Binary files a/superset/data/countries.json.gz and /dev/null differ
diff --git a/superset/data/country_map.py b/superset/data/country_map.py
index c1c2b41..e74638b 100644
--- a/superset/data/country_map.py
+++ b/superset/data/country_map.py
@@ -15,7 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 import datetime
-import os
 
 import pandas as pd
 from sqlalchemy import BigInteger, Date, String
@@ -24,7 +23,7 @@ from superset import db
 from superset.connectors.sqla.models import SqlMetric
 from superset.utils import core as utils
 from .helpers import (
-    DATA_FOLDER,
+    get_example_data,
     get_slice_json,
     merge_slice,
     misc_dash_slices,
@@ -35,8 +34,9 @@ from .helpers import (
 
 def load_country_map_data():
     """Loading data for map with country map"""
-    csv_path = os.path.join(DATA_FOLDER, 'birth_france_data_for_country_map.csv')
-    data = pd.read_csv(csv_path, encoding='utf-8')
+    csv_bytes = get_example_data(
+        'birth_france_data_for_country_map.csv', is_gzip=False, make_bytes=True)
+    data = pd.read_csv(csv_bytes, encoding='utf-8')
     data['dttm'] = datetime.datetime.now().date()
     data.to_sql(  # pylint: disable=no-member
         'birth_france_by_region',
diff --git a/superset/data/energy.json.gz b/superset/data/energy.json.gz
deleted file mode 100644
index 624d71d..0000000
Binary files a/superset/data/energy.json.gz and /dev/null differ
diff --git a/superset/data/energy.py b/superset/data/energy.py
index c04eb46..e1d48e7 100644
--- a/superset/data/energy.py
+++ b/superset/data/energy.py
@@ -16,8 +16,6 @@
 # under the License.
 """Loads datasets, dashboards and slices in a new superset instance"""
 # pylint: disable=C,R,W
-import gzip
-import os
 import textwrap
 
 import pandas as pd
@@ -26,14 +24,16 @@ from sqlalchemy import Float, String
 from superset import db
 from superset.connectors.sqla.models import SqlMetric
 from superset.utils import core as utils
-from .helpers import DATA_FOLDER, merge_slice, misc_dash_slices, Slice, TBL
+from .helpers import (
+    DATA_FOLDER, get_example_data, merge_slice, misc_dash_slices, Slice, TBL,
+)
 
 
 def load_energy():
     """Loads an energy related dataset to use with sankey and graphs"""
     tbl_name = 'energy_usage'
-    with gzip.open(os.path.join(DATA_FOLDER, 'energy.json.gz')) as f:
-        pdf = pd.read_json(f)
+    data = get_example_data('energy.json.gz')
+    pdf = pd.read_json(data)
     pdf.to_sql(
         tbl_name,
         db.engine,
diff --git a/superset/data/flight_data.csv.gz b/superset/data/flight_data.csv.gz
deleted file mode 100644
index bbdebdf..0000000
Binary files a/superset/data/flight_data.csv.gz and /dev/null differ
diff --git a/superset/data/flights.py b/superset/data/flights.py
index 1ed575b..2511244 100644
--- a/superset/data/flights.py
+++ b/superset/data/flights.py
@@ -14,26 +14,23 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import gzip
-import os
-
 import pandas as pd
 from sqlalchemy import DateTime
 
 from superset import db
 from superset.utils import core as utils
-from .helpers import DATA_FOLDER, TBL
+from .helpers import get_example_data, TBL
 
 
 def load_flights():
     """Loading random time series data from a zip file in the repo"""
     tbl_name = 'flights'
-    with gzip.open(os.path.join(DATA_FOLDER, 'flight_data.csv.gz')) as f:
-        pdf = pd.read_csv(f, encoding='latin-1')
+    data = get_example_data('flight_data.csv.gz', make_bytes=True)
+    pdf = pd.read_csv(data, encoding='latin-1')
 
     # Loading airports info to join and get lat/long
-    with gzip.open(os.path.join(DATA_FOLDER, 'airports.csv.gz')) as f:
-        airports = pd.read_csv(f, encoding='latin-1')
+    airports_bytes = get_example_data('airports.csv.gz', make_bytes=True)
+    airports = pd.read_csv(airports_bytes, encoding='latin-1')
     airports = airports.set_index('IATA_CODE')
 
     pdf['ds'] = pdf.YEAR.map(str) + '-0' + pdf.MONTH.map(str) + '-0' + pdf.DAY.map(str)
diff --git a/superset/data/helpers.py b/superset/data/helpers.py
index d6192b6..f876dc9 100644
--- a/superset/data/helpers.py
+++ b/superset/data/helpers.py
@@ -16,13 +16,19 @@
 # under the License.
 """Loads datasets, dashboards and slices in a new superset instance"""
 # pylint: disable=C,R,W
+from io import BytesIO
 import json
 import os
+import zlib
+
+import requests
 
 from superset import app, db
 from superset.connectors.connector_registry import ConnectorRegistry
 from superset.models import core as models
 
+BASE_URL = 'https://github.com/apache-superset/examples-data/blob/master/'
+
 # Shortcuts
 DB = models.Database
 Slice = models.Slice
@@ -60,3 +66,12 @@ def get_slice_json(defaults, **kwargs):
     d = defaults.copy()
     d.update(kwargs)
     return json.dumps(d, indent=4, sort_keys=True)
+
+
+def get_example_data(filepath, is_gzip=True, make_bytes=False):
+    content = requests.get(f'{BASE_URL}{filepath}?raw=true').content
+    if is_gzip:
+        content = zlib.decompress(content, zlib.MAX_WBITS|16)
+    if make_bytes:
+        content = BytesIO(content)
+    return content
diff --git a/superset/data/long_lat.py b/superset/data/long_lat.py
index 40895d5..18f477c 100644
--- a/superset/data/long_lat.py
+++ b/superset/data/long_lat.py
@@ -15,8 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 import datetime
-import gzip
-import os
 import random
 
 import geohash
@@ -26,7 +24,7 @@ from sqlalchemy import DateTime, Float, String
 from superset import db
 from superset.utils import core as utils
 from .helpers import (
-    DATA_FOLDER,
+    get_example_data,
     get_slice_json,
     merge_slice,
     misc_dash_slices,
@@ -37,8 +35,8 @@ from .helpers import (
 
 def load_long_lat_data():
     """Loading lat/long data from a csv file in the repo"""
-    with gzip.open(os.path.join(DATA_FOLDER, 'san_francisco.csv.gz')) as f:
-        pdf = pd.read_csv(f, encoding='utf-8')
+    data = get_example_data('san_francisco.csv.gz', make_bytes=True)
+    pdf = pd.read_csv(data, encoding='utf-8')
     start = datetime.datetime.now().replace(
         hour=0, minute=0, second=0, microsecond=0)
     pdf['datetime'] = [
diff --git a/superset/data/multiformat_time_series.json.gz b/superset/data/multiformat_time_series.json.gz
deleted file mode 100644
index e0877b7..0000000
Binary files a/superset/data/multiformat_time_series.json.gz and /dev/null differ
diff --git a/superset/data/multiformat_time_series.py b/superset/data/multiformat_time_series.py
index 5dec85a..58ff7fb 100644
--- a/superset/data/multiformat_time_series.py
+++ b/superset/data/multiformat_time_series.py
@@ -14,8 +14,6 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import gzip
-import os
 
 import pandas as pd
 from sqlalchemy import BigInteger, Date, DateTime, String
@@ -24,7 +22,7 @@ from superset import db
 from superset.utils import core as utils
 from .helpers import (
     config,
-    DATA_FOLDER,
+    get_example_data,
     get_slice_json,
     merge_slice,
     misc_dash_slices,
@@ -35,8 +33,9 @@ from .helpers import (
 
 def load_multiformat_time_series():
     """Loading time series data from a zip file in the repo"""
-    with gzip.open(os.path.join(DATA_FOLDER, 'multiformat_time_series.json.gz')) as f:
-        pdf = pd.read_json(f)
+    data = get_example_data('multiformat_time_series.json.gz')
+    pdf = pd.read_json(data)
+
     pdf.ds = pd.to_datetime(pdf.ds, unit='s')
     pdf.ds2 = pd.to_datetime(pdf.ds2, unit='s')
     pdf.to_sql(
diff --git a/superset/data/paris.py b/superset/data/paris.py
index e32588d..2ed3f8e 100644
--- a/superset/data/paris.py
+++ b/superset/data/paris.py
@@ -14,24 +14,22 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import gzip
 import json
-import os
 
 import pandas as pd
 from sqlalchemy import String, Text
 
 from superset import db
 from superset.utils import core as utils
-from .helpers import DATA_FOLDER, TBL
+from .helpers import TBL, get_example_data
 
 
 def load_paris_iris_geojson():
     tbl_name = 'paris_iris_mapping'
 
-    with gzip.open(os.path.join(DATA_FOLDER, 'paris_iris.json.gz')) as f:
-        df = pd.read_json(f)
-        df['features'] = df.features.map(json.dumps)
+    data = get_example_data('paris_iris.json.gz')
+    df = pd.read_json(data)
+    df['features'] = df.features.map(json.dumps)
 
     df.to_sql(
         tbl_name,
diff --git a/superset/data/paris_iris.json.gz b/superset/data/paris_iris.json.gz
deleted file mode 100644
index 4a964c9..0000000
Binary files a/superset/data/paris_iris.json.gz and /dev/null differ
diff --git a/superset/data/random_time_series.json.gz b/superset/data/random_time_series.json.gz
deleted file mode 100644
index 5275d55..0000000
Binary files a/superset/data/random_time_series.json.gz and /dev/null differ
diff --git a/superset/data/random_time_series.py b/superset/data/random_time_series.py
index cfc13e1..ee7450a 100644
--- a/superset/data/random_time_series.py
+++ b/superset/data/random_time_series.py
@@ -14,8 +14,6 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import gzip
-import os
 
 import pandas as pd
 from sqlalchemy import DateTime
@@ -24,7 +22,7 @@ from superset import db
 from superset.utils import core as utils
 from .helpers import (
     config,
-    DATA_FOLDER,
+    get_example_data,
     get_slice_json,
     merge_slice,
     Slice,
@@ -34,8 +32,8 @@ from .helpers import (
 
 def load_random_time_series_data():
     """Loading random time series data from a zip file in the repo"""
-    with gzip.open(os.path.join(DATA_FOLDER, 'random_time_series.json.gz')) as f:
-        pdf = pd.read_json(f)
+    data = get_example_data('random_time_series.json.gz')
+    pdf = pd.read_json(data)
     pdf.ds = pd.to_datetime(pdf.ds, unit='s')
     pdf.to_sql(
         'random_time_series',
diff --git a/superset/data/san_francisco.csv.gz b/superset/data/san_francisco.csv.gz
deleted file mode 100644
index 1d977a4..0000000
Binary files a/superset/data/san_francisco.csv.gz and /dev/null differ
diff --git a/superset/data/sf_population.json.gz b/superset/data/sf_population.json.gz
deleted file mode 100644
index 53ba13a..0000000
Binary files a/superset/data/sf_population.json.gz and /dev/null differ
diff --git a/superset/data/sf_population_polygons.py b/superset/data/sf_population_polygons.py
index 6da85f7..2248a48 100644
--- a/superset/data/sf_population_polygons.py
+++ b/superset/data/sf_population_polygons.py
@@ -14,24 +14,22 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import gzip
 import json
-import os
 
 import pandas as pd
 from sqlalchemy import BigInteger, Text
 
 from superset import db
 from superset.utils import core as utils
-from .helpers import DATA_FOLDER, TBL
+from .helpers import TBL, get_example_data
 
 
 def load_sf_population_polygons():
     tbl_name = 'sf_population_polygons'
 
-    with gzip.open(os.path.join(DATA_FOLDER, 'sf_population.json.gz')) as f:
-        df = pd.read_json(f)
-        df['contour'] = df.contour.map(json.dumps)
+    data = get_example_data('sf_population.json.gz')
+    df = pd.read_json(data)
+    df['contour'] = df.contour.map(json.dumps)
 
     df.to_sql(
         tbl_name,
diff --git a/superset/data/unicode_test_data.py b/superset/data/unicode_test_data.py
index 42e6bdb..03c00a7 100644
--- a/superset/data/unicode_test_data.py
+++ b/superset/data/unicode_test_data.py
@@ -16,7 +16,6 @@
 # under the License.
 import datetime
 import json
-import os
 import random
 
 import pandas as pd
@@ -27,7 +26,7 @@ from superset.utils import core as utils
 from .helpers import (
     config,
     Dash,
-    DATA_FOLDER,
+    get_example_data,
     get_slice_json,
     merge_slice,
     Slice,
@@ -38,8 +37,9 @@ from .helpers import (
 
 def load_unicode_test_data():
     """Loading unicode test dataset from a csv file in the repo"""
-    df = pd.read_csv(os.path.join(DATA_FOLDER, 'unicode_utf8_unixnl_test.csv'),
-                     encoding='utf-8')
+    data = get_example_data(
+        'unicode_utf8_unixnl_test.csv', is_gzip=False, make_bytes=True)
+    df = pd.read_csv(data, encoding='utf-8')
     # generate date/numeric data
     df['dttm'] = datetime.datetime.now().date()
     df['value'] = [random.randint(1, 100) for _ in range(len(df))]
diff --git a/superset/data/unicode_utf8_unixnl_test.csv b/superset/data/unicode_utf8_unixnl_test.csv
deleted file mode 100644
index 9b0235b..0000000
--- a/superset/data/unicode_utf8_unixnl_test.csv
+++ /dev/null
@@ -1,42 +0,0 @@
-phrase,short_phrase,with_missing
-"Под южно дърво, цъфтящо в синьо, бягаше малко пухкаво
зайче.",Под южно д,Fam hx-cardiovas dis NEC
-Příliš žluťoučký kůň úpěl ďábelské ódy.,Příliš žlu,
-視野無限廣,窗外有藍天,視野無限廣,窗外有藍,Sparganosis
-微風迎客,軟語伴茶,微風迎客,軟語伴茶,Var mgr NEC wo ntc mgr
-中国智造,慧及全球,中国智造,慧及全球,Mech prob w internal org
-"Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen Walther spillede på xylofon.",Quizdeltag,Corneal
dystrophy NOS
-Pa’s wijze lynx bezag vroom het fikse aquaduct.,Pa’s wijze,Edema in preg-unspec
-Eĥoŝanĝo ĉiuĵaŭde.,Eĥoŝanĝo ĉ,
-See väike mölder jõuab rongile hüpata,See väike ,Twin NOS-nonhosp
-Viekas kettu punaturkki laiskan koiran takaa kurkki.,Viekas ket,Postgastric surgery synd
-Voix ambiguë d’un cœur qui au zéphyr préfère les jattes de kiwis.,Voix ambig,Loose
body-mult joints
-Portez ce vieux whisky au juge blond qui fume.,Portez ce ,Late eff acc poisoning
-Zwölf Boxkämpfer jagen Viktor quer über den großen Sylter Deich,Zwölf Boxk,Opn brain
inj w/o coma
-Franz jagt im komplett verwahrlosten Taxi quer durch Bayern.,Franz jagt,TB of ear-unspec
-Θέλει αρετή και τόλμη η ελευθερία. (Ανδρέας Κάλβος),Θέλει
αρετ,Chr peptic ulcer w perf
-Ο καλύμνιος σφουγγαράς ψιθύρισε πως θα βουτήξει
χωρίς να διστάζει.,Ο καλύμνιο,Cns TB NEC-cult dx
-דג סקרן שט לו בים זך אך לפתע פגש חבורה נחמדה שצצה כך.,דג
סקרן שט,Polyhydramnios-delivered
-Árvíztűrő tükörfúrógép,Árvíztűrő ,Malign neopl scrotum
-"Egy hűtlen vejét fülöncsípő, dühös mexikói úr Wesselényinél mázol Quitóban.",Egy
hűtlen,Tubal/broad lig anom NOS
-Saya lihat foto Hamengkubuwono XV bersama enam zebra purba cantik yang jatuh dari Al Quranmu.,Saya
lihat,Ben carcinoid duodenum
-"Ma la volpe, col suo balzo, ha raggiunto il quieto Fido.",Ma la volp,Ch leu un cl wo ach
rmsn
-いろはにほへと ちりぬるを わかよたれそ つねならむ うゐのおくやま
けふこえて あさきゆめみし ゑひもせす,いろはにほへと ちり,Mycotic
arthritis-pelvis
-다람쥐 헌 쳇바퀴에 타고파,다람쥐 헌 쳇바퀴에,Paral polio NEC-type 1
-Sarkanās jūrascūciņas peld pa jūru.,Sarkanās j,Fx larynx/trachea-open
-En god stil må først og fremst være klar. Den må være passende. Aristoteles.,En god
sti,Dermatophytosis site NOS
-Pchnąć w tę łódź jeża lub ośm skrzyń fig,Pchnąć w t,Anxiety disorder oth dis
-A rápida raposa castanha salta por cima do cão lento.,A rápida r,Adenoid vegetations
-A ligeira raposa marrom ataca o cão preguiçoso.,A ligeira ,Consanguinity
-Zebras caolhas de Java querem passar fax para moças gigantes de New York,Zebras cao,"Hypotony
NOS, eye"
-Agera vulpe maronie sare peste câinele cel leneş.,Agera vulp,Urethral syndrome NOS
-Съешь ещё этих мягких французских булок да выпей
же чаю,Съешь ещё ,Coccidioidomycosis NOS
-Чешће цeђење мрeжастим џаком побољшава фертилизацију
генских хибрида.,Чешће цeђе,
-Češće ceđenje mrežastim džakom poboljšava fertilizaciju genskih hibrida.,Češće
ceđe,Scrn-hemoglobinopath NEC
-Kŕdeľ šťastných ďatľov učí pri ústí Váhu mĺkveho koňa obhrýzať kôru a žrať
čerstvé mäso.,Kŕdeľ šťas,
-V kožuščku hudobnega fanta stopiclja mizar in kliče 0619872345.,V kožuščku,
-El veloz murciélago hindú comía feliz cardillo y kiwi. La cigüeña tocaba el saxofón
detrás del palenque de paja.,El veloz m,Cervical syndrome NEC
-Flygande bäckasiner söka hwila på mjuka tuvor,Flygande b,Letterer-siwe dis abdom
-เป็นมนุษย์สุดประเสริฐเลิศคุณค่า
กว่าบรรดาฝูงสัตว์เดรัจฉาน จงฝ่าฟันพัฒนาวิชาการ
อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า
หัดอภัยเหมือนกีฬาอัชฌาสัย ปฏิบัติประพฤติกฎกำหนดใจ
พูดจาให้จ๊ะ ๆ จ๋า ๆ น่าฟังเอยฯ,เป็นมนุษย์,Balantidiasis
-"Pijamalı hasta, yağız şoföre çabucak güvendi",Pijamalı h,Epilepsy-delivered w p/p
-زۆھرەگۈل ئابدۇۋاجىت فرانسىيەنىڭ پارىژدىكى خېلى
بىشەم ئوقۇغۇچى.,زۆھرەگۈل ئ,Fit/adj non-vsc cath NEC
-ئاۋۇ بىر جۈپ خوراز فرانسىيەنىڭ پارىژ شەھرىگە يېقىن
تاغقا كۆچەلمىدى.,ئاۋۇ بىر ج,Sat cerv smr-no trnsfrm
diff --git a/superset/data/world_bank.py b/superset/data/world_bank.py
index 910b338..16aa0cb 100644
--- a/superset/data/world_bank.py
+++ b/superset/data/world_bank.py
@@ -16,7 +16,6 @@
 # under the License.
 """Loads datasets, dashboards and slices in a new superset instance"""
 # pylint: disable=C,R,W
-import gzip
 import json
 import os
 import textwrap
@@ -31,6 +30,7 @@ from .helpers import (
     config,
     Dash,
     DATA_FOLDER,
+    get_example_data,
     get_slice_json,
     merge_slice,
     misc_dash_slices,
@@ -43,8 +43,8 @@ from .helpers import (
 def load_world_bank_health_n_pop():
     """Loads the world bank health dataset, slices and a dashboard"""
     tbl_name = 'wb_health_population'
-    with gzip.open(os.path.join(DATA_FOLDER, 'countries.json.gz')) as f:
-        pdf = pd.read_json(f)
+    data = get_example_data('countries.json.gz')
+    pdf = pd.read_json(data)
     pdf.columns = [col.replace('.', '_') for col in pdf.columns]
     pdf.year = pd.to_datetime(pdf.year)
     pdf.to_sql(


Mime
View raw message