mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s..@apache.org
Subject svn commit: r1071369 - in /mahout/trunk/core/src: main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java
Date Wed, 16 Feb 2011 19:28:29 GMT
Author: ssc
Date: Wed Feb 16 19:28:29 2011
New Revision: 1071369

URL: http://svn.apache.org/viewvc?rev=1071369&view=rev
Log:
MAHOUT-610 Not all Coocurrences provided to SimilarityReducer

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
    mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java?rev=1071369&r1=1071368&r2=1071369&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
Wed Feb 16 19:28:29 2011
@@ -82,6 +82,10 @@ public class RowSimilarityJob extends Ab
 
   private static final int DEFAULT_MAX_SIMILARITIES_PER_ROW = 100;
 
+  public static enum Counter {
+    COOCCURRENCES, SIMILAR_ROWS
+  }
+
   public static void main(String[] args) throws Exception {
     ToolRunner.run(new RowSimilarityJob(), args);
   }
@@ -254,6 +258,7 @@ public class RowSimilarityJob extends Ab
       WeightedRowPair rowPair = new WeightedRowPair();
       Cooccurrence coocurrence = new Cooccurrence();
 
+      int numPairs = 0;
       for (int n = 0; n < weightedOccurrences.length; n++) {
         int rowA = weightedOccurrences[n].getRow();
         double weightA = weightedOccurrences[n].getWeight();
@@ -262,11 +267,17 @@ public class RowSimilarityJob extends Ab
           int rowB = weightedOccurrences[m].getRow();
           double weightB = weightedOccurrences[m].getWeight();
           double valueB = weightedOccurrences[m].getValue();
-          rowPair.set(rowA, rowB, weightA, weightB);
+          if(rowA <= rowB){
+        	  rowPair.set(rowA, rowB, weightA, weightB);
+          } else {
+        	  rowPair.set(rowB, rowA, weightB, weightA);
+          }
           coocurrence.set(column.get(), valueA, valueB);
           ctx.write(rowPair, coocurrence);
+          numPairs++;
         }
       }
+      ctx.getCounter(Counter.COOCCURRENCES).increment(numPairs);
     }
   }
 
@@ -299,6 +310,7 @@ public class RowSimilarityJob extends Ab
           rowPair.getWeightB(), numberOfColumns);
 
       if (!Double.isNaN(similarityValue)) {
+        ctx.getCounter(Counter.SIMILAR_ROWS).increment(1);
         SimilarityMatrixEntryKey key = new SimilarityMatrixEntryKey();
         MatrixEntryWritable entry = new MatrixEntryWritable();
         entry.setVal(similarityValue);

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java?rev=1071369&r1=1071368&r2=1071369&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java
(original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/similarity/TestRowSimilarityJob.java
Wed Feb 16 19:28:29 2011
@@ -26,6 +26,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapreduce.Counter;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.mahout.common.MahoutTestCase;
@@ -133,36 +134,63 @@ public final class TestRowSimilarityJob 
   public void testCooccurrencesMapper() throws Exception {
     Mapper<VarIntWritable,WeightedOccurrenceArray,WeightedRowPair,Cooccurrence>.Context
context =
       EasyMock.createMock(Mapper.Context.class);
+    Counter counter = EasyMock.createMock(Counter.class);
 
     context.write(new WeightedRowPair(34, 34, 1.0, 1.0), new Cooccurrence(12, 0.5, 0.5));
     context.write(new WeightedRowPair(34, 56, 1.0, 3.0), new Cooccurrence(12, 0.5, 1.0));
     context.write(new WeightedRowPair(56, 56, 3.0, 3.0), new Cooccurrence(12, 1.0, 1.0));
+    EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.COOCCURRENCES)).andReturn(counter);
+    counter.increment(3);
 
-    EasyMock.replay(context);
+    EasyMock.replay(context, counter);
 
     WeightedOccurrenceArray weightedOccurrences = new WeightedOccurrenceArray(new WeightedOccurrence[]
{
         new WeightedOccurrence(34, 0.5, 1.0), new WeightedOccurrence(56, 1.0, 3.0) });
 
     new RowSimilarityJob.CooccurrencesMapper().map(new VarIntWritable(12), weightedOccurrences,
context);
 
-    EasyMock.verify(context);
+    EasyMock.verify(context, counter);
   }
 
+  public void testCooccurrencesMapperOrdering() throws Exception {
+    Mapper<VarIntWritable,WeightedOccurrenceArray,WeightedRowPair,Cooccurrence>.Context
context =
+      EasyMock.createMock(Mapper.Context.class);
+    Counter counter = EasyMock.createMock(Counter.class);
+
+    context.write(new WeightedRowPair(34, 34, 1.0, 1.0), new Cooccurrence(12, 0.5, 0.5));
+    context.write(new WeightedRowPair(34, 56, 1.0, 3.0), new Cooccurrence(12, 0.5, 1.0));
+    context.write(new WeightedRowPair(56, 56, 3.0, 3.0), new Cooccurrence(12, 1.0, 1.0));
+    EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.COOCCURRENCES)).andReturn(counter);
+    counter.increment(3);
+
+    EasyMock.replay(context, counter);
+
+    WeightedOccurrenceArray weightedOccurrences = new WeightedOccurrenceArray(new WeightedOccurrence[]
{
+        new WeightedOccurrence(56, 1.0, 3.0), new WeightedOccurrence(34, 0.5, 1.0) });
+
+    new RowSimilarityJob.CooccurrencesMapper().map(new VarIntWritable(12), weightedOccurrences,
context);
+
+    EasyMock.verify(context, counter);
+  }
+
+
   /**
    * Tests {@link SimilarityReducer}
    */
   @Test
   public void testSimilarityReducer() throws Exception {
-
     Reducer<WeightedRowPair,Cooccurrence,SimilarityMatrixEntryKey,MatrixEntryWritable>.Context
context =
       EasyMock.createMock(Reducer.Context.class);
+    Counter counter = EasyMock.createMock(Counter.class);
 
     context.write(EasyMock.eq(new SimilarityMatrixEntryKey(12, 0.5)),
         MathHelper.matrixEntryMatches(12, 34, 0.5));
     context.write(EasyMock.eq(new SimilarityMatrixEntryKey(34, 0.5)),
         MathHelper.matrixEntryMatches(34, 12, 0.5));
+    EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.SIMILAR_ROWS)).andReturn(counter);
+    counter.increment(1);
 
-    EasyMock.replay(context);
+    EasyMock.replay(context, counter);
 
     SimilarityReducer reducer = new SimilarityReducer();
     setField(reducer, "similarity", new DistributedTanimotoCoefficientVectorSimilarity());
@@ -170,7 +198,7 @@ public final class TestRowSimilarityJob 
     reducer.reduce(new WeightedRowPair(12, 34, 3.0, 3.0), Arrays.asList(new Cooccurrence(56,
1.0, 2.0),
         new Cooccurrence(78, 3.0, 6.0)), context);
 
-    EasyMock.verify(context);
+    EasyMock.verify(context, counter);
   }
 
   /**
@@ -179,13 +207,15 @@ public final class TestRowSimilarityJob 
    */
   @Test
   public void testSimilarityReducerSelfSimilarity() throws Exception {
-
     Reducer<WeightedRowPair,Cooccurrence,SimilarityMatrixEntryKey,MatrixEntryWritable>.Context
context =
       EasyMock.createMock(Reducer.Context.class);
+    Counter counter = EasyMock.createMock(Counter.class);
 
     context.write(EasyMock.eq(new SimilarityMatrixEntryKey(90, 1.0)), MathHelper.matrixEntryMatches(90,
90, 1.0));
+    EasyMock.expect(context.getCounter(RowSimilarityJob.Counter.SIMILAR_ROWS)).andReturn(counter);
+    counter.increment(1);
 
-    EasyMock.replay(context);
+    EasyMock.replay(context, counter);
 
     SimilarityReducer reducer = new SimilarityReducer();
     setField(reducer, "similarity", new DistributedTanimotoCoefficientVectorSimilarity());
@@ -193,7 +223,7 @@ public final class TestRowSimilarityJob 
     reducer.reduce(new WeightedRowPair(90, 90, 2.0, 2.0), Arrays.asList(new Cooccurrence(56,
1.0, 2.0),
         new Cooccurrence(78, 3.0, 6.0)), context);
 
-    EasyMock.verify(context);
+    EasyMock.verify(context, counter);
   }
 
   /**



Mime
View raw message