airflow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From fo...@apache.org
Subject incubator-airflow git commit: [AIRFLOW-2139] Remove unncecessary boilerplate to get DataFrame using pandas_gbq
Date Sat, 24 Feb 2018 19:48:24 GMT
Repository: incubator-airflow
Updated Branches:
  refs/heads/master beadcd327 -> ad308ea44


[AIRFLOW-2139] Remove unncecessary boilerplate to get DataFrame using pandas_gbq

Closes #3066 from mremes/improvement/cleanup-
bigquery-hook


Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/ad308ea4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/ad308ea4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/ad308ea4

Branch: refs/heads/master
Commit: ad308ea441372f2b44b4292c3779eb745f2ed48c
Parents: beadcd3
Author: Matti Remes <matti.remes@nextgames.com>
Authored: Sat Feb 24 20:48:17 2018 +0100
Committer: Fokko Driesprong <fokkodriesprong@godatadriven.com>
Committed: Sat Feb 24 20:48:17 2018 +0100

----------------------------------------------------------------------
 airflow/contrib/hooks/bigquery_hook.py | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/ad308ea4/airflow/contrib/hooks/bigquery_hook.py
----------------------------------------------------------------------
diff --git a/airflow/contrib/hooks/bigquery_hook.py b/airflow/contrib/hooks/bigquery_hook.py
index cd0318f..d937f1e 100644
--- a/airflow/contrib/hooks/bigquery_hook.py
+++ b/airflow/contrib/hooks/bigquery_hook.py
@@ -28,10 +28,9 @@ from airflow.hooks.dbapi_hook import DbApiHook
 from airflow.utils.log.logging_mixin import LoggingMixin
 from apiclient.discovery import HttpError, build
 from googleapiclient import errors
-from pandas.tools.merge import concat
 from pandas_gbq.gbq import \
     _check_google_client_version as gbq_check_google_client_version
-from pandas_gbq.gbq import _parse_data as gbq_parse_data
+from pandas_gbq import read_gbq
 from pandas_gbq.gbq import \
     _test_google_api_imports as gbq_test_google_api_imports
 from pandas_gbq.gbq import GbqConnector
@@ -96,24 +95,13 @@ class BigQueryHook(GoogleCloudBaseHook, DbApiHook, LoggingMixin):
             defaults to use `self.use_legacy_sql` if not specified
         :type dialect: string in {'legacy', 'standard'}
         """
-        service = self.get_service()
-        project = self._get_field('project')
-
         if dialect is None:
             dialect = 'legacy' if self.use_legacy_sql else 'standard'
 
-        connector = BigQueryPandasConnector(project, service, dialect=dialect)
-        schema, pages = connector.run_query(bql)
-        dataframe_list = []
-
-        while len(pages) > 0:
-            page = pages.pop()
-            dataframe_list.append(gbq_parse_data(schema, page))
-
-        if len(dataframe_list) > 0:
-            return concat(dataframe_list, ignore_index=True)
-        else:
-            return gbq_parse_data(schema, [])
+        return read_gbq(bql,
+                        project_id=self._get_field('project'),
+                        dialect=dialect,
+                        verbose=False)
 
     def table_exists(self, project_id, dataset_id, table_id):
         """


Mime
View raw message