mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s..@apache.org
Subject svn commit: r1177237 - in /mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence: ./ measures/
Date Thu, 29 Sep 2011 09:24:48 GMT
Author: ssc
Date: Thu Sep 29 09:24:47 2011
New Revision: 1177237

URL: http://svn.apache.org/viewvc?rev=1177237&view=rev
Log:
refined size constraints for pruning of similarity candidate pairs

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CityBlockSimilarity.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CooccurrenceCountSimilarity.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CosineSimilarity.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CountbasedMeasure.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/EuclideanDistanceSimilarity.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/VectorSimilarityMeasure.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
Thu Sep 29 09:24:47 2011
@@ -279,8 +279,9 @@ public class RowSimilarityJob extends Ab
       int numNonZeroEntriesB = numNonZeroEntries.get(occurrenceB.index());
 
       double maxValueA = maxValues.get(occurrenceA.index());
+      double maxValueB = maxValues.get(occurrenceB.index());
 
-      return similarity.consider(numNonZeroEntriesA, numNonZeroEntriesB, maxValueA, threshold);
+      return similarity.consider(numNonZeroEntriesA, numNonZeroEntriesB, maxValueA, maxValueB,
threshold);
     }
 
     @Override

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CityBlockSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CityBlockSimilarity.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CityBlockSimilarity.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CityBlockSimilarity.java
Thu Sep 29 09:24:47 2011
@@ -23,9 +23,4 @@ public class CityBlockSimilarity extends
   public double similarity(double dots, double normA, double normB, int numberOfColumns)
{
     return 1.0 / (1.0 + normA + normB - 2 * dots);
   }
-
-  @Override
-  public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA,
double threshold) {
-    return true;
-  }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CooccurrenceCountSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CooccurrenceCountSimilarity.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CooccurrenceCountSimilarity.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CooccurrenceCountSimilarity.java
Thu Sep 29 09:24:47 2011
@@ -25,7 +25,8 @@ public class CooccurrenceCountSimilarity
   }
 
   @Override
-  public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA,
double threshold) {
+  public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA,
double maxValueB,
+      double threshold) {
     return numNonZeroEntriesA >= threshold && numNonZeroEntriesB >= threshold;
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CosineSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CosineSimilarity.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CosineSimilarity.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CosineSimilarity.java
Thu Sep 29 09:24:47 2011
@@ -42,7 +42,9 @@ public class CosineSimilarity implements
   }
 
   @Override
-  public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA,
double threshold) {
-    return numNonZeroEntriesB >= threshold / maxValueA;
+  public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA,
double maxValueB,
+      double threshold) {
+    return numNonZeroEntriesB >= threshold / maxValueA &&
+        numNonZeroEntriesA >= threshold / maxValueB;
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CountbasedMeasure.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CountbasedMeasure.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CountbasedMeasure.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/CountbasedMeasure.java
Thu Sep 29 09:24:47 2011
@@ -36,4 +36,9 @@ public abstract class CountbasedMeasure 
     return 1;
   }
 
+  @Override
+  public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA,
double maxValueB,
+      double threshold) {
+    return true;
+  }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/EuclideanDistanceSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/EuclideanDistanceSimilarity.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/EuclideanDistanceSimilarity.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/EuclideanDistanceSimilarity.java
Thu Sep 29 09:24:47 2011
@@ -51,7 +51,8 @@ public class EuclideanDistanceSimilarity
   }
 
   @Override
-  public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA,
double threshold) {
+  public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA,
double maxValueB,
+      double threshold) {
     return true;
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java
Thu Sep 29 09:24:47 2011
@@ -29,8 +29,4 @@ public class LoglikelihoodSimilarity ext
     return 1.0 - 1.0 / (1.0 + logLikelihood);
   }
 
-  @Override
-  public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA,
double threshold) {
-    return true;
-  }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java
Thu Sep 29 09:24:47 2011
@@ -25,7 +25,9 @@ public class TanimotoCoefficientSimilari
   }
 
   @Override
-  public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA,
double threshold) {
-    return numNonZeroEntriesA >= numNonZeroEntriesB * threshold;
+  public boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA,
double maxValueB,
+      double threshold) {
+    return numNonZeroEntriesA >= numNonZeroEntriesB * threshold &&
+        numNonZeroEntriesB >= numNonZeroEntriesA * threshold;
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/VectorSimilarityMeasure.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/VectorSimilarityMeasure.java?rev=1177237&r1=1177236&r2=1177237&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/VectorSimilarityMeasure.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/VectorSimilarityMeasure.java
Thu Sep 29 09:24:47 2011
@@ -27,5 +27,6 @@ public interface VectorSimilarityMeasure
   double norm(Vector vector);
   double aggregate(double nonZeroValueA, double nonZeroValueB);
   double similarity(double summedAggregations, double normA, double normB, int numberOfColumns);
-  boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double
threshold);
+  boolean consider(int numNonZeroEntriesA, int numNonZeroEntriesB, double maxValueA, double
maxValueB,
+      double threshold);
 }



Mime
View raw message