madlib-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From iyerr3 <...@git.apache.org>
Subject [GitHub] madlib pull request #289: RF: Add impurity variable importance
Date Tue, 10 Jul 2018 21:23:47 GMT
Github user iyerr3 commented on a diff in the pull request:

    https://github.com/apache/madlib/pull/289#discussion_r201498820
  
    --- Diff: src/ports/postgres/modules/recursive_partitioning/random_forest.py_in ---
    @@ -1333,42 +1368,69 @@ def _create_group_table(
     # -------------------------------------------------------------------------
     
     
    -def _create_empty_result_table(schema_madlib, output_table_name):
    +def _create_empty_result_table(schema_madlib, output_table_name, importance):
         """Create the result table for all trees in the forest"""
    +    impurity_var_imp_str = """, impurity_var_importance double precision[]);""" if importance
else ");"
    +
         sql_create_empty_result_table = """
                 CREATE TABLE {output_table_name} (
                     gid         integer,
                     sample_id   integer,
    -                tree        {schema_madlib}.bytea8);
    +                tree        {schema_madlib}.bytea8
    +                {impurity_var_imp_str}
                 """.format(**locals())
         plpy.notice("sql_create_empty_result_table:\n" + sql_create_empty_result_table)
         plpy.execute(sql_create_empty_result_table)
     # ------------------------------------------------------------
     
     
     def _insert_into_result_table(schema_madlib, tree_states, output_table_name,
    -                              grp_key_to_grp_cols, sample_id):
    +                              grp_key_to_grp_cols, sample_id, importance, grouping_cols):
         """Insert one tree to result table"""
    +
    +    impurity_var_imp_str = ''
    +    importance_query = ''
    +    importance_results = ''
    --- End diff --
    
    IMO these variables in else clause is easier to read. Also `importance_results` is limited
to the `if` block and does not require to be defined outside that scope. 


---

Mime
View raw message