superset-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From grace...@apache.org
Subject [incubator-superset] branch master updated: fixes to csv - hive upload (#4488)
Date Wed, 28 Feb 2018 06:13:08 GMT
This is an automated email from the ASF dual-hosted git repository.

graceguo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 404e2d5  fixes to csv - hive upload (#4488)
404e2d5 is described below

commit 404e2d552ae430d549226df54e74fe760f7cb8d1
Author: timifasubaa <30888507+timifasubaa@users.noreply.github.com>
AuthorDate: Tue Feb 27 22:13:06 2018 -0800

    fixes to csv - hive upload (#4488)
---
 setup.py                    | 1 +
 superset/db_engine_specs.py | 9 +++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/setup.py b/setup.py
index 4bc9412..c32696e 100644
--- a/setup.py
+++ b/setup.py
@@ -87,6 +87,7 @@ setup(
         'thrift>=0.9.3',
         'thrift-sasl>=0.2.1',
         'unidecode>=0.04.21',
+        'unicodecsv==0.14.1',
         'bleach==2.1.2',
     ],
     extras_require={
diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py
index 0c5d5ec..4d373df 100644
--- a/superset/db_engine_specs.py
+++ b/superset/db_engine_specs.py
@@ -18,7 +18,6 @@ from __future__ import print_function
 from __future__ import unicode_literals
 
 from collections import defaultdict, namedtuple
-import csv
 import inspect
 import logging
 import os
@@ -35,6 +34,7 @@ from sqlalchemy.engine import create_engine
 from sqlalchemy.engine.url import make_url
 from sqlalchemy.sql import text
 import sqlparse
+import unicodecsv
 from werkzeug.utils import secure_filename
 
 from superset import app, cache_util, conf, db, utils
@@ -850,7 +850,7 @@ class HiveEngineSpec(PrestoEngineSpec):
         """Uploads a csv file and creates a superset datasource in Hive."""
         def get_column_names(filepath):
             with open(filepath, 'rb') as f:
-                return csv.reader(f).next()
+                return unicodecsv.reader(f, encoding='utf-8-sig').next()
 
         table_name = form.name.data
         filename = form.csv_file.data.filename
@@ -874,11 +874,12 @@ class HiveEngineSpec(PrestoEngineSpec):
         s3 = boto3.client('s3')
         location = os.path.join('s3a://', bucket_path, upload_prefix, table_name)
         s3.upload_file(
-            upload_path, 'airbnb-superset',
+            upload_path, bucket_path,
             os.path.join(upload_prefix, table_name, filename))
         sql = """CREATE EXTERNAL TABLE {table_name} ( {schema_definition} )
             ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS
-            TEXTFILE LOCATION '{location}'""".format(**locals())
+            TEXTFILE LOCATION '{location}'
+            tblproperties ('skip.header.line.count'='1')""".format(**locals())
         logging.info(form.con.data)
         engine = create_engine(form.con.data.sqlalchemy_uri)
         engine.execute(sql)

-- 
To stop receiving notification emails like this one, please contact
graceguo@apache.org.

Mime
View raw message