tinkerpop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ok...@apache.org
Subject [4/6] incubator-tinkerpop git commit: Allows SparkGraphComputer to Preserve Spark Contexts between Jobs
Date Wed, 07 Oct 2015 21:02:16 GMT
Allows SparkGraphComputer to Preserve Spark Contexts between Jobs

Previously every traversal on a SparkGraphComputer would require
creating a brand new Spark Context. This adds a lot of overhead for
doing a bunch of operations in a row. To alliveate this we allow the
option of passing in a spark.persistContext paramter as part of the
HadoopConf. When this is enabled Spark Contexts will not be closed at
the end of the job. The getOrCreate context then allows a new
SparkGraphComputer get a handle on a previously running context.


Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/9fb25391
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/9fb25391
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/9fb25391

Branch: refs/heads/group_step_2
Commit: 9fb253910bafb9e1c4d49b7525b49b4f0de63344
Parents: 15563be
Author: Russell Spitzer <Russell.Spitzer@gmail.com>
Authored: Tue Oct 6 11:27:29 2015 -0700
Committer: Marko A. Rodriguez <okrammarko@gmail.com>
Committed: Wed Oct 7 08:23:12 2015 -0600

----------------------------------------------------------------------
 .../spark/process/computer/SparkGraphComputer.java        | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/9fb25391/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java
----------------------------------------------------------------------
diff --git a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java
b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java
index d31631b..f5cb16a 100644
--- a/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java
+++ b/spark-gremlin/src/main/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkGraphComputer.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.tinkerpop.gremlin.hadoop.Constants;
@@ -110,7 +111,9 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer
{
             // create the spark configuration from the graph computer configuration
             hadoopConfiguration.forEach(entry -> sparkConfiguration.set(entry.getKey(),
entry.getValue()));
             // execute the vertex program and map reducers and if there is a failure, auto-close
the spark context
-            try (final JavaSparkContext sparkContext = new JavaSparkContext(sparkConfiguration))
{
+            JavaSparkContext sparkContext = null;
+            try  {
+                sparkContext = new JavaSparkContext(SparkContext.getOrCreate(sparkConfiguration));
                 // add the project jars to the cluster
                 this.loadJars(sparkContext, hadoopConfiguration);
                 // create a message-passing friendly rdd from the input rdd
@@ -187,6 +190,11 @@ public final class SparkGraphComputer extends AbstractHadoopGraphComputer
{
                 finalMemory.setRuntime(System.currentTimeMillis() - startTime);
                 return new DefaultComputerResult(HadoopHelper.getOutputGraph(this.hadoopGraph,
this.resultGraph, this.persist), finalMemory.asImmutable());
             }
+            finally
+            {
+                if (sparkContext != null && !hadoopGraph.configuration().getBoolean(Constants.GREMLIN_SPARK_PERSIST_CONTEXT,
false))
+                    sparkContext.stop();
+            }
         });
     }
 


Mime
View raw message