madlib-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [madlib] orhankislal commented on a change in pull request #521: utilities/debug.py_in
Date Wed, 07 Oct 2020 14:32:18 GMT

orhankislal commented on a change in pull request #521:
URL: https://github.com/apache/madlib/pull/521#discussion_r501049285



##########
File path: src/ports/postgres/modules/utilities/debug.py_in
##########
@@ -0,0 +1,142 @@
+import plpy as plpy_orig
+import time
+from deep_learning.madlib_keras_model_selection import ModelSelectionSchema
+from deep_learning.madlib_keras_helper import DISTRIBUTION_KEY_COLNAME
+
+mst_key_col = ModelSelectionSchema.MST_KEY
+dist_key_col = DISTRIBUTION_KEY_COLNAME
+
+start_times = dict()
+timings_enabled = False
+
+def start_timing(msg, force=False):
+    if timings_enabled or force:
+        start_times[msg] = time.time()
+        plpy_orig.info("|_{}_time_HDR|Elapsed (s)|Current|Current (s)|Start|Start (s)|".format(msg))
+
+def print_timing(msg, force=False):
+    if timings_enabled or force:
+        try:
+            start_time = start_times[msg]
+        except:
+            raise Exception(
+                "print_timing({msg}) called with no start_timing({msg})!".format(msg=msg)
+            )
+        current_time = time.time() 
+        plpy_orig.info(
+            '|_{0}_time|{1}|{2}|{3}|{4}|{5}'.format(
+                msg,
+                current_time - start_time,
+                time.ctime(current_time),
+                current_time,
+                time.ctime(start_time),
+                start_time
+            )
+        )
+
+mst_keys_enabled = False
+def print_mst_keys(table, label, force=False):
+    if not (mst_keys_enabled or force):
+        return
+
+    res = plpy_orig.execute("""
+        SELECT gp_segment_id AS seg_id,
+               {mst_key_col},
+               {dist_key_col}
+        FROM {table} ORDER BY {dist_key_col}
+    """.format(dist_key_col=dist_key_col,
+               table=table,
+               mst_key_col=mst_key_col))
+
+    plpy_orig.info("|_MST_KEYS_{label}_HDR|mst_key|seg_id|dist_key|table".format(**locals()))
+    for r in res:
+        seg_id = r['seg_id']
+        mst_key = r['mst_key']
+        dist_key = r[dist_key_col]
+        plpy_orig.info("|_MST_KEYS_{label}|{mst_key}|{seg_id}|{dist_key}|{table}".format(**locals()))
+
+plpy_execute_enabled = False
+def plpy_execute(*args, **kwargs):
+    """ debug.plpy.execute(sql, ..., force=False)
+
+        Replace plpy.execute(sql, ...) with
+        debug.plpy.execute(sql, ...) to debug
+        a query.  Shows the query itself, the
+        EXPLAIN of it, and how long the query
+        takes to execute.
+    """
+
+    force = False
+    if 'force' in kwargs:
+        del kwargs['force']
+        force = force['force']

Review comment:
       Line 69 ensures that `force = False`. `force['force']` shouldn't work. Maybe the intention
was to do `force = kwargs['force']` and then `del kwargs['force']` ?

##########
File path: src/ports/postgres/modules/utilities/debug.py_in
##########
@@ -0,0 +1,142 @@
+import plpy as plpy_orig
+import time
+from deep_learning.madlib_keras_model_selection import ModelSelectionSchema
+from deep_learning.madlib_keras_helper import DISTRIBUTION_KEY_COLNAME
+
+mst_key_col = ModelSelectionSchema.MST_KEY
+dist_key_col = DISTRIBUTION_KEY_COLNAME
+
+start_times = dict()
+timings_enabled = False

Review comment:
       I am not clear on how this, or the other global variables in this file, like `mst_keys_enabled`,
are used. I don't see a statement that overrides them and they are not a part of a class so
the caller cannot manipulate them either.
   
   I assume the dev just changes these by hand to enable debug for every instance of the function
call, which would be an OK use case I guess.

##########
File path: src/ports/postgres/modules/utilities/debug.py_in
##########
@@ -0,0 +1,142 @@
+import plpy as plpy_orig
+import time
+from deep_learning.madlib_keras_model_selection import ModelSelectionSchema
+from deep_learning.madlib_keras_helper import DISTRIBUTION_KEY_COLNAME
+
+mst_key_col = ModelSelectionSchema.MST_KEY
+dist_key_col = DISTRIBUTION_KEY_COLNAME
+
+start_times = dict()
+timings_enabled = False
+
+def start_timing(msg, force=False):
+    if timings_enabled or force:
+        start_times[msg] = time.time()
+        plpy_orig.info("|_{}_time_HDR|Elapsed (s)|Current|Current (s)|Start|Start (s)|".format(msg))
+
+def print_timing(msg, force=False):
+    if timings_enabled or force:
+        try:
+            start_time = start_times[msg]
+        except:
+            raise Exception(
+                "print_timing({msg}) called with no start_timing({msg})!".format(msg=msg)
+            )
+        current_time = time.time() 
+        plpy_orig.info(
+            '|_{0}_time|{1}|{2}|{3}|{4}|{5}'.format(
+                msg,
+                current_time - start_time,
+                time.ctime(current_time),
+                current_time,
+                time.ctime(start_time),
+                start_time
+            )
+        )
+
+mst_keys_enabled = False
+def print_mst_keys(table, label, force=False):
+    if not (mst_keys_enabled or force):
+        return
+
+    res = plpy_orig.execute("""
+        SELECT gp_segment_id AS seg_id,
+               {mst_key_col},
+               {dist_key_col}
+        FROM {table} ORDER BY {dist_key_col}
+    """.format(dist_key_col=dist_key_col,
+               table=table,
+               mst_key_col=mst_key_col))
+
+    plpy_orig.info("|_MST_KEYS_{label}_HDR|mst_key|seg_id|dist_key|table".format(**locals()))
+    for r in res:
+        seg_id = r['seg_id']
+        mst_key = r['mst_key']
+        dist_key = r[dist_key_col]
+        plpy_orig.info("|_MST_KEYS_{label}|{mst_key}|{seg_id}|{dist_key}|{table}".format(**locals()))
+
+plpy_execute_enabled = False
+def plpy_execute(*args, **kwargs):
+    """ debug.plpy.execute(sql, ..., force=False)
+
+        Replace plpy.execute(sql, ...) with
+        debug.plpy.execute(sql, ...) to debug
+        a query.  Shows the query itself, the
+        EXPLAIN of it, and how long the query
+        takes to execute.
+    """
+
+    force = False
+    if 'force' in kwargs:
+        del kwargs['force']
+        force = force['force']
+
+    plpy = plpy_orig # override global plpy,
+                     # to avoid infinite recursion
+
+    if not (plpy_execute_enabled or force):
+        return plpy.execute(*args, **kwargs)
+
+    if len(args) > 0:
+        sql = args[0]
+    else:
+        raise TypeError('debug.plpy.execute() takes at least 1 parameter, 0 passed')
+
+    if type(sql) == str: # can't print if a PLyPlan object
+        plpy.info(sql)
+
+        # Print EXPLAIN of sql command
+        res = plpy.execute("EXPLAIN " + sql, *args[1:], **kwargs)
+        for r in res:
+            plpy.info(r['QUERY PLAN'])
+
+    # Run actual sql command, with timing
+    start = time.time()
+    res = plpy.execute(*args, **kwargs)
+
+    # Print how long execution of query took
+    plpy.info("Query took {0}s".format(time.time() - start))
+    if res:
+        plpy.info("Query returned {} row(s)".format(len(res)))
+    else:
+        plpy.info("Query returned 0 rows")
+    return res
+
+plpy_info_enabled = False
+def plpy_info(*args, **kwargs):
+    """ plpy_info(..., force=False)
+
+      plpy.info() if enabled, otherwise do nothing   
+    """
+
+    force = False
+    if 'force' in kwargs:
+        del kwargs['force']
+        force = kwargs['force']

Review comment:
       The previous line already deleted `kwargs['force']`.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



Mime
View raw message