mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s..@apache.org
Subject svn commit: r1213291 - in /mahout/trunk/core/src: main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java test/java/org/apache/mahout/graph/AdjacencyMatrixJobTest.java test/java/org/apache/mahout/graph/preprocessing/
Date Mon, 12 Dec 2011 16:08:37 GMT
Author: ssc
Date: Mon Dec 12 16:08:36 2011
New Revision: 1213291

URL: http://svn.apache.org/viewvc?rev=1213291&view=rev
Log:
MAHOUT-924 Allow creation of symmetric adjacency matrices

Added:
    mahout/trunk/core/src/test/java/org/apache/mahout/graph/AdjacencyMatrixJobTest.java
      - copied, changed from r1212189, mahout/trunk/core/src/test/java/org/apache/mahout/graph/preprocessing/AdjacencyMatrixJobTest.java
Removed:
    mahout/trunk/core/src/test/java/org/apache/mahout/graph/preprocessing/
Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java?rev=1213291&r1=1213290&r2=1213291&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/graph/AdjacencyMatrixJob.java Mon Dec
12 16:08:36 2011
@@ -31,15 +31,19 @@ import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.util.ToolRunner;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.Pair;
 import org.apache.mahout.common.iterator.FileLineIterable;
 import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
 import org.apache.mahout.common.mapreduce.VectorSumReducer;
-import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.SequentialAccessSparseVector;
 import org.apache.mahout.math.VectorWritable;
 import org.apache.mahout.math.map.OpenIntIntHashMap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -47,16 +51,17 @@ import java.util.Map;
 import java.util.regex.Pattern;
 
 /**
- * <p>Distributed computation of the adjacency matrix of a directed graph, see http://en.wikipedia.org/wiki/Adjacency_matrix
+ * <p>Distributed computation of the adjacency matrix of a graph, see http://en.wikipedia.org/wiki/Adjacency_matrix
  *
  * <p>This job outputs {@link org.apache.hadoop.io.SequenceFile}s an {@link IntWritable}
as key and a {@link VectorWritable}  as value</p>
  *
  * <p>Command line arguments specific to this class are:</p>
  *
  * <ol>
- * <li>--output=(path): output path where the resulting matrix should be written</li>
- * <li>--vertices=(path): file containing a list of all vertices</li>
- * <li>--edges=(path): Directory containing edges of the graph</li>
+ *   <li>--output=(path): output path where the resulting matrix and the number of
vertices should be written</li>
+ *   <li>--vertices=(path): file containing a list of all vertices</li>
+ *   <li>--edges=(path): Directory containing edges of the graph</li>
+ *   <li>--symmetric = (boolean) produce a symmetric adjacency matrix (corresponds
to an undirected graph)</li>
  * </ol>
  *
  * <p>General command line options are documented in {@link AbstractJob}.</p>
@@ -65,31 +70,46 @@ import java.util.regex.Pattern;
  */
 public class AdjacencyMatrixJob extends AbstractJob {
 
+  private static final Logger log = LoggerFactory.getLogger(AdjacencyMatrixJob.class);
+
   public static final String NUM_VERTICES = "numVertices.bin";
   public static final String ADJACENCY_MATRIX = "adjacencyMatrix";
   public static final String VERTEX_INDEX = "vertexIndex";
 
   static final String NUM_VERTICES_PARAM = AdjacencyMatrixJob.class.getName() + ".numVertices";
   static final String VERTEX_INDEX_PARAM = AdjacencyMatrixJob.class.getName() + ".vertexIndex";
+  static final String SYMMETRIC_PARAM = AdjacencyMatrixJob.class.getName() + ".symmetric";
+
+  public static void main(String[] args) throws Exception {
+    ToolRunner.run(new AdjacencyMatrixJob(), args);
+  }
 
   @Override
   public int run(String[] args) throws Exception {
 
     addOption("vertices", null, "a text file containing all vertices of the graph (one per
line)", true);
-    addOption("edges", null, "text files containing the edges of the graph (vertexA,vertextB
per line)", true);
+    addOption("edges", null, "text files containing the edges of the graph (vertexA,vertexB
per line)", true);
+    addOption("symmetric", null, "produce a symmetric adjacency matrix (corresponds to an
undirected graph)",
+        String.valueOf(false));
+
     addOutputOption();
 
     Map<String, String> parsedArgs = parseArguments(args);
+    if (parsedArgs == null) {
+      return -1;
+    }
 
     Path vertices = new Path(parsedArgs.get("--vertices"));
     Path edges = new Path(parsedArgs.get("--edges"));
+    boolean symmetric = Boolean.parseBoolean(parsedArgs.get("--symmetric"));
 
+    log.info("Indexing vertices sequentially, this might take a while...");
     int numVertices = indexVertices(vertices, getOutputPath(VERTEX_INDEX));
 
     HadoopUtil.writeInt(numVertices, getOutputPath(NUM_VERTICES), getConf());
-
     Preconditions.checkArgument(numVertices > 0);
 
+    log.info("Found " + numVertices + " vertices, creating adjacency matrix...");
     Job createAdjacencyMatrix = prepareJob(edges, getOutputPath(ADJACENCY_MATRIX), TextInputFormat.class,
         VectorizeEdgesMapper.class, IntWritable.class, VectorWritable.class, VectorSumReducer.class,
         IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
@@ -97,6 +117,7 @@ public class AdjacencyMatrixJob extends 
     Configuration createAdjacencyMatrixConf = createAdjacencyMatrix.getConfiguration();
     createAdjacencyMatrixConf.set(NUM_VERTICES_PARAM, String.valueOf(numVertices));
     createAdjacencyMatrixConf.set(VERTEX_INDEX_PARAM, getOutputPath(VERTEX_INDEX).toString());
+    createAdjacencyMatrixConf.setBoolean(SYMMETRIC_PARAM, symmetric);
     createAdjacencyMatrix.waitForCompletion(true);
 
     return 0;
@@ -133,6 +154,7 @@ public class AdjacencyMatrixJob extends 
 
     private int numVertices;
     private OpenIntIntHashMap vertexIDsToIndex;
+    private boolean symmetric;
 
     private final IntWritable row = new IntWritable();
 
@@ -142,6 +164,7 @@ public class AdjacencyMatrixJob extends 
     protected void setup(Context ctx) throws IOException, InterruptedException {
       Configuration conf = ctx.getConfiguration();
       numVertices = Integer.parseInt(conf.get(NUM_VERTICES_PARAM));
+      symmetric = conf.getBoolean(SYMMETRIC_PARAM, false);
       Path vertexIndexPath = new Path(conf.get(VERTEX_INDEX_PARAM));
       vertexIDsToIndex = new OpenIntIntHashMap(numVertices);
       for (Pair<IntWritable,IntWritable> indexAndVertexID :
@@ -157,13 +180,19 @@ public class AdjacencyMatrixJob extends 
       String[] tokens = SEPARATOR.split(line.toString());
       int rowIndex = vertexIDsToIndex.get(Integer.parseInt(tokens[0]));
       int columnIndex = vertexIDsToIndex.get(Integer.parseInt(tokens[1]));
-      RandomAccessSparseVector partialTransitionMatrixRow = new RandomAccessSparseVector(numVertices,
1);
 
+      Vector partialTransitionMatrixRow = new SequentialAccessSparseVector(numVertices, 1);
       row.set(rowIndex);
       partialTransitionMatrixRow.setQuick(columnIndex, 1);
-
       ctx.write(row, new VectorWritable(partialTransitionMatrixRow));
+
+      if (symmetric && rowIndex != columnIndex) {
+        partialTransitionMatrixRow = new SequentialAccessSparseVector(numVertices, 1);
+        row.set(columnIndex);
+        partialTransitionMatrixRow.setQuick(rowIndex, 1);
+        ctx.write(row, new VectorWritable(partialTransitionMatrixRow));
+      }
     }
   }
 
-}
\ No newline at end of file
+}

Copied: mahout/trunk/core/src/test/java/org/apache/mahout/graph/AdjacencyMatrixJobTest.java
(from r1212189, mahout/trunk/core/src/test/java/org/apache/mahout/graph/preprocessing/AdjacencyMatrixJobTest.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/graph/AdjacencyMatrixJobTest.java?p2=mahout/trunk/core/src/test/java/org/apache/mahout/graph/AdjacencyMatrixJobTest.java&p1=mahout/trunk/core/src/test/java/org/apache/mahout/graph/preprocessing/AdjacencyMatrixJobTest.java&r1=1212189&r2=1213291&rev=1213291&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/graph/preprocessing/AdjacencyMatrixJobTest.java
(original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/graph/AdjacencyMatrixJobTest.java Mon
Dec 12 16:08:36 2011
@@ -15,13 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.graph.preprocessing;
+package org.apache.mahout.graph;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.graph.AdjacencyMatrixJob;
 import org.apache.mahout.math.DenseMatrix;
 import org.apache.mahout.math.Matrix;
 import org.apache.mahout.math.hadoop.MathHelper;
@@ -82,4 +81,51 @@ public class AdjacencyMatrixJobTest exte
 
     MathHelper.assertMatrixEquals(expectedAdjacencyMatrix, actualAdjacencyMatrix);
   }
+
+  @Test
+  public void symmetricAdjacencyMatrix() throws Exception {
+    File verticesFile = getTestTempFile("vertices.txt");
+    File edgesFile = getTestTempFile("edges.txt");
+    File outputDir = getTestTempDir("output");
+    outputDir.delete();
+
+    Configuration conf = new Configuration();
+
+    writeLines(verticesFile, "12", "34", "56", "78");
+
+    writeLines(edgesFile,
+        "12,34",
+        "12,56",
+        "34,34",
+        "34,78",
+        "56,34",
+        "56,56",
+        "56,78");
+
+    Matrix expectedAdjacencyMatrix = new DenseMatrix(new double[][] {
+        { 0, 1, 1, 0 },
+        { 1, 1, 1, 1 },
+        { 1, 1, 1, 1 },
+        { 0, 1, 1, 0 } });
+
+    AdjacencyMatrixJob createAdjacencyMatrix = new AdjacencyMatrixJob();
+    createAdjacencyMatrix.setConf(conf);
+    createAdjacencyMatrix.run(new String[] { "--vertices", verticesFile.getAbsolutePath(),
+        "--edges", edgesFile.getAbsolutePath(), "--symmetric", String.valueOf(true),
+        "--output", outputDir.getAbsolutePath() });
+
+    int numVertices = HadoopUtil.readInt(new Path(outputDir.getAbsolutePath(), AdjacencyMatrixJob.NUM_VERTICES),
conf);
+    Matrix actualAdjacencyMatrix = MathHelper.readMatrix(conf, new Path(outputDir.getAbsolutePath(),
+        AdjacencyMatrixJob.ADJACENCY_MATRIX + "/part-r-00000"), numVertices, numVertices);
+
+    StringBuilder info = new StringBuilder();
+    info.append("expected adjacency matrix:\n");
+    info.append(MathHelper.nice(expectedAdjacencyMatrix));
+    info.append("actual adjacency matrix:\n");
+    info.append(MathHelper.nice(actualAdjacencyMatrix));
+
+    log.info(info.toString());
+
+    MathHelper.assertMatrixEquals(expectedAdjacencyMatrix, actualAdjacencyMatrix);
+  }
 }
\ No newline at end of file



Mime
View raw message