realized a stupid bug in ToyGraphInputRDD around reduceByKey(). Fixed. Thanks @dalaro. CTR.
Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/19beece8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/19beece8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/19beece8
Branch: refs/heads/TINKERPOP3-860
Commit: 19beece8f774f4b71a0895dd40e29a32c68762fd
Parents: 641a8cb
Author: Marko A. Rodriguez <okrammarko@gmail.com>
Authored: Thu Dec 3 16:20:42 2015 -0700
Committer: Marko A. Rodriguez <okrammarko@gmail.com>
Committed: Thu Dec 3 16:20:42 2015 -0700
----------------------------------------------------------------------
.../gremlin/spark/process/computer/SparkHadoopGraphProvider.java | 2 --
.../tinkerpop/gremlin/spark/structure/io/ToyGraphInputRDD.java | 2 +-
2 files changed, 1 insertion(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/19beece8/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java
----------------------------------------------------------------------
diff --git a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java
b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java
index de5a60d..618904a 100644
--- a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java
+++ b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/process/computer/SparkHadoopGraphProvider.java
@@ -48,10 +48,8 @@ public final class SparkHadoopGraphProvider extends HadoopGraphProvider
{
config.put(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true); // this makes the test
suite go really fast
if (null != loadGraphWith &&
!test.equals(BulkLoaderVertexProgramTest.class) &&
- !test.equals(PageRankVertexProgramTest.class) &&
RANDOM.nextBoolean()) {
config.put(Constants.GREMLIN_SPARK_GRAPH_INPUT_RDD, ToyGraphInputRDD.class.getCanonicalName());
- config.put(Constants.GREMLIN_HADOOP_GRAPH_INPUT_FORMAT, InputRDDFormat.class.getCanonicalName());
}
/// spark configuration
config.put("spark.master", "local[4]");
http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/19beece8/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/structure/io/ToyGraphInputRDD.java
----------------------------------------------------------------------
diff --git a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/structure/io/ToyGraphInputRDD.java
b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/structure/io/ToyGraphInputRDD.java
index ea3636f..ff253e3 100644
--- a/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/structure/io/ToyGraphInputRDD.java
+++ b/spark-gremlin/src/test/java/org/apache/tinkerpop/gremlin/spark/structure/io/ToyGraphInputRDD.java
@@ -68,6 +68,6 @@ public final class ToyGraphInputRDD implements InputRDD {
} else
throw new IllegalArgumentException("No legal toy graph was provided to load:
" + configuration.getProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION));
- return sparkContext.parallelize(vertices).mapToPair(vertex -> new Tuple2<>(vertex.get().id(),
vertex));
+ return sparkContext.parallelize(vertices).mapToPair(vertex -> new Tuple2<>(vertex.get().id(),
vertex)).reduceByKey((a, b) -> a); // if this is not done, then the graph is partitioned
and you can have duplicate vertices;
}
}
|