airflow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From san...@apache.org
Subject incubator-airflow git commit: [AIRFLOW-611] source_format in BigQueryBaseCursor
Date Fri, 11 Nov 2016 17:59:47 GMT
Repository: incubator-airflow
Updated Branches:
  refs/heads/master 868bc8313 -> 98f32184a


[AIRFLOW-611] source_format in BigQueryBaseCursor

Check source_format in BigQueryBaseCursor

The edits to `bigquery_hook.py` are made to
`BigQueryBaseCursor`.

Closes #1873 from Jalepeno112/bug/AIRFLOW-611


Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/98f32184
Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/98f32184
Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/98f32184

Branch: refs/heads/master
Commit: 98f32184a64bdd478045064b1f8cc6d220aae60f
Parents: 868bc83
Author: Giovanni Briggs <gbriggs2012@gmail.com>
Authored: Fri Nov 11 09:57:47 2016 -0800
Committer: Siddharth Anand <siddharthanand@yahoo.com>
Committed: Fri Nov 11 09:57:54 2016 -0800

----------------------------------------------------------------------
 airflow/contrib/hooks/bigquery_hook.py | 13 +++++++++++++
 tests/contrib/hooks/bigquery_hook.py   |  7 +++++++
 2 files changed, 20 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/98f32184/airflow/contrib/hooks/bigquery_hook.py
----------------------------------------------------------------------
diff --git a/airflow/contrib/hooks/bigquery_hook.py b/airflow/contrib/hooks/bigquery_hook.py
index e8528ac..700a39e 100644
--- a/airflow/contrib/hooks/bigquery_hook.py
+++ b/airflow/contrib/hooks/bigquery_hook.py
@@ -373,6 +373,19 @@ class BigQueryBaseCursor(object):
         :param field_delimiter: The delimiter to use when loading from a CSV.
         :type field_delimiter: string
         """
+
+        # bigquery only allows certain source formats
+        # we check to make sure the passed source format is valid
+        # if it's not, we raise a ValueError
+        # Refer to this link for more details: 
+        #   https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query.tableDefinitions.(key).sourceFormat
+        source_format = source_format.upper()
+        allowed_formats = ["CSV", "NEWLINE_DELIMITED_JSON", "AVRO", "GOOGLE_SHEETS"]
+        if source_format not in allowed_formats:
+            raise ValueError("{0} is not a valid source format. " 
+                    "Please use one of the following types: {1}"
+                    .format(source_format, allowed_formats))
+
         destination_project, destination_dataset, destination_table = \
             _split_tablename(table_input=destination_project_dataset_table,
                              default_project_id=self.project_id,

http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/98f32184/tests/contrib/hooks/bigquery_hook.py
----------------------------------------------------------------------
diff --git a/tests/contrib/hooks/bigquery_hook.py b/tests/contrib/hooks/bigquery_hook.py
index d8695ed..3a58766 100644
--- a/tests/contrib/hooks/bigquery_hook.py
+++ b/tests/contrib/hooks/bigquery_hook.py
@@ -104,6 +104,13 @@ class TestBigQueryTableSplitter(unittest.TestCase):
         self.assertIn('Format exception for var_x:',
                       str(context.exception), "")
 
+class TestBigQueryHookSourceFormat(unittest.TestCase):
+    def test_invalid_source_format(self):
+        with self.assertRaises(Exception) as context:
+            hook.BigQueryBaseCursor("test", "test").run_load("test.test", "test_schema.json",
["test_data.json"], source_format="json")
+        
+        # since we passed 'json' in, and it's not valid, make sure it's present in the error
string.
+        self.assertIn("json", str(context.exception))
 
 if __name__ == '__main__':
     unittest.main()


Mime
View raw message