airflow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bo...@apache.org
Subject incubator-airflow git commit: [AIRFLOW-1170] DbApiHook insert_rows inserts parameters separately
Date Sat, 13 May 2017 12:49:11 GMT
Repository: incubator-airflow
Updated Branches:
  refs/heads/master 03704ce02 -> 3b589a9f7


[AIRFLOW-1170] DbApiHook insert_rows inserts parameters separately

Instead of creating a sql statement with all
values, we send the values
separately to prevent sql injection

Closes #2270 from NielsZeilemaker/AIRFLOW-1170


Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/3b589a9f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/3b589a9f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/3b589a9f

Branch: refs/heads/master
Commit: 3b589a9f73bed018bf7e2c7b7265bfce5da91ca0
Parents: 03704ce
Author: Niels Zeilemaker <nielszeilemaker@godatadriven.com>
Authored: Sat May 13 14:49:05 2017 +0200
Committer: Bolke de Bruin <bolke@xs4all.nl>
Committed: Sat May 13 14:49:05 2017 +0200

----------------------------------------------------------------------
 airflow/hooks/dbapi_hook.py    | 22 +++++++++-------------
 airflow/hooks/mysql_hook.py    |  9 +++++----
 airflow/hooks/postgres_hook.py | 13 ++++++++-----
 3 files changed, 22 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/3b589a9f/airflow/hooks/dbapi_hook.py
----------------------------------------------------------------------
diff --git a/airflow/hooks/dbapi_hook.py b/airflow/hooks/dbapi_hook.py
index 75ca409..fe85153 100644
--- a/airflow/hooks/dbapi_hook.py
+++ b/airflow/hooks/dbapi_hook.py
@@ -180,7 +180,7 @@ class DbApiHook(BaseHook):
     def insert_rows(self, table, rows, target_fields=None, commit_every=1000):
         """
         A generic way to insert a set of tuples into a table,
-        the whole set of inserts is treated as one transaction
+        a new transaction is created every commit_every rows
 
         :param table: Name of the target table
         :type table: str
@@ -210,11 +210,12 @@ class DbApiHook(BaseHook):
                     for cell in row:
                         l.append(self._serialize_cell(cell, conn))
                     values = tuple(l)
+                    placeholders = ["%s",]*len(values)
                     sql = "INSERT INTO {0} {1} VALUES ({2});".format(
                         table,
                         target_fields,
-                        ",".join(values))
-                    cur.execute(sql)
+                        ",".join(placeholders))
+                    cur.execute(sql, values)
                     if commit_every and i % commit_every == 0:
                         conn.commit()
                         logging.info(
@@ -237,16 +238,11 @@ class DbApiHook(BaseHook):
         :rtype: str
         """
 
-        if isinstance(cell, basestring):
-            return "'" + str(cell).replace("'", "''") + "'"
-        elif cell is None:
-            return 'NULL'
-        elif isinstance(cell, numpy.datetime64):
-            return "'" + str(cell) + "'"
-        elif isinstance(cell, datetime):
-            return "'" + cell.isoformat() + "'"
-        else:
-            return str(cell)
+        if cell is None:
+            return None
+        if isinstance(cell, datetime):
+            return cell.isoformat()
+        return str(cell)
 
     def bulk_dump(self, table, tmp_file):
         """

http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/3b589a9f/airflow/hooks/mysql_hook.py
----------------------------------------------------------------------
diff --git a/airflow/hooks/mysql_hook.py b/airflow/hooks/mysql_hook.py
index bf1a721..a0f62b5 100644
--- a/airflow/hooks/mysql_hook.py
+++ b/airflow/hooks/mysql_hook.py
@@ -87,14 +87,15 @@ class MySqlHook(DbApiHook):
     @staticmethod
     def _serialize_cell(cell, conn):
         """
-        Returns the MySQL literal of the cell as a string.
+        MySQLdb converts an argument to a literal when passing those seperately to execute.
+        Hence, this method does nothing.
 
         :param cell: The cell to insert into the table
         :type cell: object
         :param conn: The database connection
         :type conn: connection object
-        :return: The serialized cell
-        :rtype: str
+        :return: The same cell
+        :rtype: object
         """
 
-        return conn.literal(cell)
+        return cell

http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/3b589a9f/airflow/hooks/postgres_hook.py
----------------------------------------------------------------------
diff --git a/airflow/hooks/postgres_hook.py b/airflow/hooks/postgres_hook.py
index 372e4e5..c8de85b 100644
--- a/airflow/hooks/postgres_hook.py
+++ b/airflow/hooks/postgres_hook.py
@@ -50,14 +50,17 @@ class PostgresHook(DbApiHook):
     @staticmethod
     def _serialize_cell(cell, conn):
         """
-        Returns the Postgres literal of the cell as a string.
+        Postgresql will adapt all arguments to the execute() method internally,
+        hence we return cell without any conversion.
+        
+        See http://initd.org/psycopg/docs/advanced.html#adapting-new-types for 
+        more information.
 
         :param cell: The cell to insert into the table
         :type cell: object
         :param conn: The database connection
         :type conn: connection object
-        :return: The serialized cell
-        :rtype: str
+        :return: The cell
+        :rtype: object
         """
-
-        return psycopg2.extensions.adapt(cell).getquoted().decode('utf-8')
+        return cell


Mime
View raw message