madlib-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [madlib] reductionista commented on a change in pull request #520: DL: Restrict access to the custom functions table
Date Thu, 08 Oct 2020 04:57:04 GMT

reductionista commented on a change in pull request #520:
URL: https://github.com/apache/madlib/pull/520#discussion_r501417095



##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_automl.py_in
##########
@@ -183,7 +183,11 @@ class KerasAutoML():
         self.compile_params_grid = compile_params_grid
         self.fit_params_grid = fit_params_grid
 
+        if object_table is not None:
+            object_table = "{0}.{1}".format(schema_madlib, object_table)

Review comment:
       quote_ident

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_model_selection.py_in
##########
@@ -254,13 +261,17 @@ class MstSearch():
                  object_table=None,
                  **kwargs):
 
+        self.schema_madlib = schema_madlib
         self.model_arch_table = model_arch_table
         self.model_selection_table = model_selection_table
         self.model_selection_summary_table = add_postfix(
             model_selection_table, "_summary")
         self.model_id_list = sorted(list(set(model_id_list)))
 
+        if object_table is not None and schema_madlib not in object_table:
+            object_table = "{0}.{1}".format(schema_madlib, object_table)

Review comment:
       This also looks insecure.
   Let's add this test as well:
   
     `object_table="my_insecure_schema.madlib_vulnerability"`

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras.py_in
##########
@@ -103,6 +103,9 @@ def fit(schema_madlib, source_table, model, model_arch_table,
     else:
         accessible_gpus_for_seg = get_seg_number()*[0]
 
+    if object_table is not None:
+        object_table = "{0}.{1}".format(schema_madlib, object_table)

Review comment:
       Also:  above example would be a good one to add to the dev-check tests.  We should
make sure no future changes accidentally open up a hole where a query like this would work.

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in
##########
@@ -61,49 +61,58 @@ def _validate_object(object, **kwargs):
         plpy.error("{0}: Invalid function object".format(module_name, e))
 
 @MinWarning("error")
-def load_custom_function(object_table, object, name, description=None, **kwargs):
-    object_table = quote_ident(object_table)
+def load_custom_function(schema_madlib, object_table, object, name, description=None, **kwargs):
+
+    if object_table is not None:
+        object_table = "{0}.{1}".format(schema_madlib, quote_ident(object_table))
     _validate_object(object)
     _assert(name is not None,
             "{0}: function name cannot be NULL!".format(module_name))
-    if not table_exists(object_table):
-        col_defs = get_col_name_type_sql_string(CustomFunctionSchema.col_names,
-                                                CustomFunctionSchema.col_types)
-
-        sql = "CREATE TABLE {0} ({1}, PRIMARY KEY({2}))" \
-            .format(object_table, col_defs, CustomFunctionSchema.FN_NAME)
-
-        plpy.execute(sql, 0)
-        plpy.info("{0}: Created new custom function table {1}." \
-                  .format(module_name, object_table))
-    else:
-        missing_cols = columns_missing_from_table(object_table,
-                                                  CustomFunctionSchema.col_names)
-        if len(missing_cols) > 0:
-            plpy.error("{0}: Invalid custom function table {1},"
-                       " missing columns: {2}".format(module_name,
-                                                      object_table,
-                                                      missing_cols))
-
-    insert_query = plpy.prepare("INSERT INTO {0} "
-                                "VALUES(DEFAULT, $1, $2, $3);".format(object_table),
-                                CustomFunctionSchema.col_types[1:])
     try:
+        if not table_exists(object_table):
+            col_defs = get_col_name_type_sql_string(CustomFunctionSchema.col_names,
+                                                    CustomFunctionSchema.col_types)
+
+            sql = """CREATE TABLE {object_table}
+                                  ({col_defs}, PRIMARY KEY({fn_name}))
+                """.format(fn_name=CustomFunctionSchema.FN_NAME,**locals())
+
+            plpy.execute(sql, 0)
+            plpy.info("{0}: Created new custom function table {1}." \
+                      .format(module_name, object_table))
+            plpy.execute("GRANT SELECT ON {0} TO PUBLIC".format(object_table))
+        else:
+            missing_cols = columns_missing_from_table(object_table,
+                                                      CustomFunctionSchema.col_names)
+            if len(missing_cols) > 0:
+                plpy.error("{0}: Invalid custom function table {1},"
+                           " missing columns: {2}".format(module_name,
+                                                          object_table,
+                                                          missing_cols))
+
+        insert_query = plpy.prepare("INSERT INTO {object_table} "
+                                    "VALUES(DEFAULT, $1, $2, $3);".format(**locals()),
+                                    CustomFunctionSchema.col_types[1:])
+
         plpy.execute(insert_query,[name, description, object], 0)
     # spiexceptions.UniqueViolation is only supported for PG>=9.2. For
     # GP5(based of PG8.4) it cannot be used. Therefore, checking exception
     # message for duplicate key error.
     except Exception as e:
         if 'duplicate key' in e.message:
             plpy.error("Function '{0}' already exists in {1}".format(name, object_table))
+        elif 'permission denied' in e.message:
+            plpy.error("This function can only be used by a superuser.".format(name, object_table))
         plpy.error(e)
 
     plpy.info("{0}: Added function {1} to {2} table".
               format(module_name, name, object_table))
 
 @MinWarning("error")
-def delete_custom_function(object_table, id=None, name=None, **kwargs):
-    object_table = quote_ident(object_table)
+def delete_custom_function(schema_madlib, object_table, id=None, name=None, **kwargs):
+
+    if object_table is not None and schema_madlib not in object_table:

Review comment:
       ^^

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras.py_in
##########
@@ -103,6 +103,9 @@ def fit(schema_madlib, source_table, model, model_arch_table,
     else:
         accessible_gpus_for_seg = get_seg_number()*[0]
 
+    if object_table is not None:
+        object_table = "{0}.{1}".format(schema_madlib, object_table)

Review comment:
       It's important we use `quote_ident(object_table)` here, for this to be fully secure.
   Without it, the user could pass this as the input:
   
       object_table="secure_table UNION insecure_schema.malicious_table"
   
   allowing them to load and call their own malicious functions from outside madlib schema,
without gpadmin approving it.

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in
##########
@@ -61,49 +61,58 @@ def _validate_object(object, **kwargs):
         plpy.error("{0}: Invalid function object".format(module_name, e))
 
 @MinWarning("error")
-def load_custom_function(object_table, object, name, description=None, **kwargs):
-    object_table = quote_ident(object_table)
+def load_custom_function(schema_madlib, object_table, object, name, description=None, **kwargs):
+
+    if object_table is not None:
+        object_table = "{0}.{1}".format(schema_madlib, quote_ident(object_table))
     _validate_object(object)
     _assert(name is not None,
             "{0}: function name cannot be NULL!".format(module_name))
-    if not table_exists(object_table):
-        col_defs = get_col_name_type_sql_string(CustomFunctionSchema.col_names,
-                                                CustomFunctionSchema.col_types)
-
-        sql = "CREATE TABLE {0} ({1}, PRIMARY KEY({2}))" \
-            .format(object_table, col_defs, CustomFunctionSchema.FN_NAME)
-
-        plpy.execute(sql, 0)
-        plpy.info("{0}: Created new custom function table {1}." \
-                  .format(module_name, object_table))
-    else:
-        missing_cols = columns_missing_from_table(object_table,
-                                                  CustomFunctionSchema.col_names)
-        if len(missing_cols) > 0:
-            plpy.error("{0}: Invalid custom function table {1},"
-                       " missing columns: {2}".format(module_name,
-                                                      object_table,
-                                                      missing_cols))
-
-    insert_query = plpy.prepare("INSERT INTO {0} "
-                                "VALUES(DEFAULT, $1, $2, $3);".format(object_table),
-                                CustomFunctionSchema.col_types[1:])
     try:
+        if not table_exists(object_table):
+            col_defs = get_col_name_type_sql_string(CustomFunctionSchema.col_names,
+                                                    CustomFunctionSchema.col_types)
+
+            sql = """CREATE TABLE {object_table}
+                                  ({col_defs}, PRIMARY KEY({fn_name}))
+                """.format(fn_name=CustomFunctionSchema.FN_NAME,**locals())
+
+            plpy.execute(sql, 0)
+            plpy.info("{0}: Created new custom function table {1}." \
+                      .format(module_name, object_table))
+            plpy.execute("GRANT SELECT ON {0} TO PUBLIC".format(object_table))

Review comment:
       This makes me wonder if some db admins might want to restrict access to this table
to certain people.  I guess this works as default behavior, and if they know what they're
doing they can change it
   
   If there's some way of copying the permissions from other relations in the madlib schema,
that might be a better default behavior if there are only certain users who are supposed to
be able to access the madlib schema and use madlib.  I'm not sure if anyone actually uses
it that way though.

##########
File path: src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in
##########
@@ -61,49 +61,58 @@ def _validate_object(object, **kwargs):
         plpy.error("{0}: Invalid function object".format(module_name, e))
 
 @MinWarning("error")
-def load_custom_function(object_table, object, name, description=None, **kwargs):
-    object_table = quote_ident(object_table)
+def load_custom_function(schema_madlib, object_table, object, name, description=None, **kwargs):
+
+    if object_table is not None:
+        object_table = "{0}.{1}".format(schema_madlib, quote_ident(object_table))
     _validate_object(object)
     _assert(name is not None,
             "{0}: function name cannot be NULL!".format(module_name))
-    if not table_exists(object_table):
-        col_defs = get_col_name_type_sql_string(CustomFunctionSchema.col_names,
-                                                CustomFunctionSchema.col_types)
-
-        sql = "CREATE TABLE {0} ({1}, PRIMARY KEY({2}))" \
-            .format(object_table, col_defs, CustomFunctionSchema.FN_NAME)
-
-        plpy.execute(sql, 0)
-        plpy.info("{0}: Created new custom function table {1}." \
-                  .format(module_name, object_table))
-    else:
-        missing_cols = columns_missing_from_table(object_table,
-                                                  CustomFunctionSchema.col_names)
-        if len(missing_cols) > 0:
-            plpy.error("{0}: Invalid custom function table {1},"
-                       " missing columns: {2}".format(module_name,
-                                                      object_table,
-                                                      missing_cols))
-
-    insert_query = plpy.prepare("INSERT INTO {0} "
-                                "VALUES(DEFAULT, $1, $2, $3);".format(object_table),
-                                CustomFunctionSchema.col_types[1:])
     try:
+        if not table_exists(object_table):
+            col_defs = get_col_name_type_sql_string(CustomFunctionSchema.col_names,
+                                                    CustomFunctionSchema.col_types)
+
+            sql = """CREATE TABLE {object_table}
+                                  ({col_defs}, PRIMARY KEY({fn_name}))
+                """.format(fn_name=CustomFunctionSchema.FN_NAME,**locals())
+
+            plpy.execute(sql, 0)
+            plpy.info("{0}: Created new custom function table {1}." \
+                      .format(module_name, object_table))
+            plpy.execute("GRANT SELECT ON {0} TO PUBLIC".format(object_table))
+        else:
+            missing_cols = columns_missing_from_table(object_table,
+                                                      CustomFunctionSchema.col_names)
+            if len(missing_cols) > 0:
+                plpy.error("{0}: Invalid custom function table {1},"
+                           " missing columns: {2}".format(module_name,
+                                                          object_table,
+                                                          missing_cols))
+
+        insert_query = plpy.prepare("INSERT INTO {object_table} "
+                                    "VALUES(DEFAULT, $1, $2, $3);".format(**locals()),
+                                    CustomFunctionSchema.col_types[1:])
+
         plpy.execute(insert_query,[name, description, object], 0)
     # spiexceptions.UniqueViolation is only supported for PG>=9.2. For
     # GP5(based of PG8.4) it cannot be used. Therefore, checking exception
     # message for duplicate key error.
     except Exception as e:
         if 'duplicate key' in e.message:
             plpy.error("Function '{0}' already exists in {1}".format(name, object_table))
+        elif 'permission denied' in e.message:
+            plpy.error("This function can only be used by a superuser.".format(name, object_table))

Review comment:
       I guess technically they just need INSERT or CREATE permission on the madlib schema.
 But hopefully they will be equivalent, assuming the admin doesn't mess with the permissions
on it.
   
   I can't think of a reason why they would, but we should probably warn them in the docs
not to give CREATE or INSERT permission on madlib schema to anyone besides a superuser, since
it will give them equivalent privileges. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



Mime
View raw message