madlib-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From orhankislal <...@git.apache.org>
Subject [GitHub] incubator-madlib pull request #113: Graph: Add grouping support to SSSP
Date Tue, 11 Apr 2017 01:32:59 GMT
Github user orhankislal commented on a diff in the pull request:

    https://github.com/apache/incubator-madlib/pull/113#discussion_r110799252
  
    --- Diff: src/ports/postgres/modules/graph/sssp.py_in ---
    @@ -137,45 +231,131 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table,
     			<!''!>)
     		plpy.execute(sql_index)
     
    -		# The source can be reached with 0 cost and it has itself as the parent.
    -		plpy.execute(
    -			""" INSERT INTO {oldupdate}
    -				VALUES({source_vertex},0,{source_vertex})
    -			""".format(**locals()))
    +		# The initialization step is quite different when grouping is involved
    +		# since not every group (subgraph) will have the same set of vertices.
    +
    +		# Example:
    +		# Assume there are two grouping columns g1 and g2
    +		# g1 values are 0 or 1. g2 values are 5 or 6
    +		if grouping_cols is not None:
    +
    +			# Find every distinct group
    +			# Example: This will give us (0,5), (0,6), (1,5) and (1,6)
    +			groups = plpy.execute(
    +				"""SELECT array_agg(DISTINCT ({grouping_cols})) AS gsql
    +				FROM {edge_table} """.format(**locals()))[0]['gsql']
    +
    +			# For each group initialize the output table.
    +			for i in range(0,len(groups)):
    +				gslist = []
    +
    +				# If we have more than 1 grouping columns,
    +				# we have to prepare them induvidually
    +				if len(glist) > 1:
    +					gi = groups[i][1:-1]
    + 					gvals = _string_to_array(gi)
    + 					for j in range(0,len(glist)):
    +						gslist.append(glist[j] + " = " + gvals[j])
    +
    +					# Prepare the sql check for a particular group
    +					# Example: gcomp will be "g1 = 0 AND g2 = 5"
    +					gcomp = " AND ".join(gslist)
    + 				else:
    + 					gi = groups[i]
    + 					gcomp = glist[0] + " = " + str(gi)
    +
    +				# Find the vertices that are involved in this group
    +				plpy.execute(
    +					""" INSERT INTO {out_table}
    +					SELECT DISTINCT ON ({vertex_id})
    +						{grp_comma} {vertex_id} AS {vertex_id},
    +						{init_w} AS {weight},
    +					NULL::INT AS parent
    +					FROM (
    +						SELECT {src} AS {vertex_id} {comma_grp}
    +						FROM {edge_table} WHERE {gcomp}
    +						UNION
    +						SELECT {dest} AS {vertex_id} {comma_grp}
    +						FROM {edge_table} WHERE {gcomp}
    +						) x
    +					WHERE {vertex_id} IS NOT NULL
    --- End diff --
    
    Thanks for catching this. I was able to fix it using the same method from your pagerank
PR.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message