beam-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chamik...@apache.org
Subject [1/2] beam git commit: Updates BigQuery read transform to correctly process empty repeated fields.
Date Thu, 23 Mar 2017 00:16:58 GMT
Repository: beam
Updated Branches:
  refs/heads/master eb347bf0d -> 7bc2938a2


Updates BigQuery read transform to correctly process empty repeated fields.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/08de9e98
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/08de9e98
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/08de9e98

Branch: refs/heads/master
Commit: 08de9e98d16b762f650631d8553276c28d044f1b
Parents: eb347bf
Author: chamikara@google.com <chamikara@google.com>
Authored: Wed Mar 22 13:17:26 2017 -0700
Committer: Chamikara Jayalath <chamikara@google.com>
Committed: Wed Mar 22 17:11:44 2017 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/io/gcp/bigquery.py      | 13 +++++++++++--
 sdks/python/apache_beam/io/gcp/bigquery_test.py |  3 ++-
 2 files changed, 13 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/08de9e98/sdks/python/apache_beam/io/gcp/bigquery.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py
index 3186a55..a917d51 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery.py
@@ -1069,8 +1069,17 @@ class BigQueryWrapper(object):
         cell = row['f'][index]
         value = cell['v'] if 'v' in cell else None
       if field.mode == 'REPEATED':
-        result[field.name] = [self._convert_cell_value_to_dict(x['v'], field)
-                              for x in value]
+        if value is None:
+          # We receive 'None' for repeated fields without any values when
+          # 'flatten_results' is 'False'.
+          # When 'flatten_results' is 'True', we receive individual values
+          # instead of a list of values hence we do not hit this condition.
+          # We return an empty list here instead of 'None' to be consistent with
+          # other runners and to be backwards compatible to users.
+          result[field.name] = []
+        else:
+          result[field.name] = [self._convert_cell_value_to_dict(x['v'], field)
+                                for x in value]
       elif value is None:
         if not field.mode == 'NULLABLE':
           raise ValueError('Received \'None\' as the value for the field %s '

http://git-wip-us.apache.org/repos/asf/beam/blob/08de9e98/sdks/python/apache_beam/io/gcp/bigquery_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcp/bigquery_test.py b/sdks/python/apache_beam/io/gcp/bigquery_test.py
index fbf073c..2b83079 100644
--- a/sdks/python/apache_beam/io/gcp/bigquery_test.py
+++ b/sdks/python/apache_beam/io/gcp/bigquery_test.py
@@ -402,7 +402,8 @@ class TestBigQueryReader(unittest.TestCase):
             bigquery.TableCell(v=None),
             bigquery.TableCell(v=None),
             bigquery.TableCell(v=None),
-            bigquery.TableCell(v=to_json_value([]))])]
+            # REPEATED field without any values.
+            bigquery.TableCell(v=None)])]
     return table_rows, schema, expected_rows
 
   def test_read_from_table(self):


Mime
View raw message