mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dfili...@apache.org
Subject svn commit: r1484697 - in /mahout/trunk: CHANGELOG core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java
Date Tue, 21 May 2013 08:38:48 GMT
Author: dfilimon
Date: Tue May 21 08:38:48 2013
New Revision: 1484697

URL: http://svn.apache.org/r1484697
Log:
MAHOUT-1222: Fix total weight in FastProjectionSearch

Sometimes when removing a Vector that's in pendingAdditions, the wrong Vector
gets removed.
This happens because the closest Vector is removed rather than the one that's
equal to it.


Modified:
    mahout/trunk/CHANGELOG
    mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java

Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1484697&r1=1484696&r2=1484697&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Tue May 21 08:38:48 2013
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 0.8 - unreleased
 
+__MAHOUT-1222: Fix total weight in FastProjectionSearch (dfilimon)
+
 __MAHOUT-1219: Remove LSHSearcher from StreamingKMeansTest. It causes it to sometimes fail
(dfilimon)
 
   MAHOUT-1221: SparseMatrix.viewRow is sometimes readonly. (Maysam Yabandeh via smarthi)

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java?rev=1484697&r1=1484696&r2=1484697&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java
Tue May 21 08:38:48 2013
@@ -52,11 +52,6 @@ public class FastProjectionSearch extend
   // happened or not so we only do it once.
   private boolean initialized = false;
 
-  // Whether the iterator returned from the searcher was used to modify any of the vectors.
This
-  // flag must be set manually by calling setDirty after said modification so the internal
-  // structures can be updated.
-  private boolean dirty = false;
-
   // Removing vectors from the searcher is done lazily to avoid the linear time cost of removing
   // elements from an array. This member keeps track of the number of removed vectors (marked
as
   // "impossible" values in the array) so they can be removed when updating the structure.
@@ -197,26 +192,25 @@ public class FastProjectionSearch extend
   }
 
   @Override
-  public boolean remove(Vector v, double epsilon) {
-    WeightedThing<Vector> closestPair = searchFirst(v, false);
-    if (distanceMeasure.distance(closestPair.getValue(), v) > epsilon) {
+  public boolean remove(Vector vector, double epsilon) {
+    WeightedThing<Vector> closestPair = searchFirst(vector, false);
+    if (distanceMeasure.distance(closestPair.getValue(), vector) > epsilon) {
       return false;
     }
 
     boolean isProjected = true;
-    final Vector projection = basisMatrix.times(v);
+    final Vector projection = basisMatrix.times(vector);
     for (int i = 0; i < basisMatrix.numRows(); ++i) {
       List<WeightedThing<Vector>> currProjections = scalarProjections.get(i);
-      int middle = Collections.binarySearch(currProjections,
-          new WeightedThing<Vector>(projection.get(i)));
+      WeightedThing<Vector> searchedThing = new WeightedThing<Vector>(projection.get(i));
+      int middle = Collections.binarySearch(currProjections, searchedThing);
       if (middle < 0) {
         isProjected = false;
         break;
       }
-      double oldWeight = currProjections.get(middle).getWeight();
       // Elements to be removed are kept in the sorted array until the next reindex, but
their inner vector
       // is set to null.
-      scalarProjections.get(i).set(middle, new WeightedThing<Vector>(oldWeight));
+      scalarProjections.get(i).set(middle, searchedThing);
     }
     if (isProjected) {
       ++numPendingRemovals;
@@ -224,7 +218,7 @@ public class FastProjectionSearch extend
     }
 
     for (int i = 0; i < pendingAdditions.size(); ++i) {
-      if (distanceMeasure.distance(v, pendingAdditions.get(i)) < epsilon) {
+      if (pendingAdditions.get(i).equals(vector)) {
         pendingAdditions.remove(i);
         break;
       }
@@ -234,8 +228,8 @@ public class FastProjectionSearch extend
 
   private void reindex(boolean force) {
     int numProjected = scalarProjections.get(0).size();
-    if (force || dirty || pendingAdditions.size() > ADDITION_THRESHOLD * numProjected
||
-        numPendingRemovals > REMOVAL_THRESHOLD * numProjected) {
+    if (force || pendingAdditions.size() > ADDITION_THRESHOLD * numProjected
+        || numPendingRemovals > REMOVAL_THRESHOLD * numProjected) {
 
       // We only need to copy the first list because when iterating we use only that list
for the Vector
       // references.
@@ -285,7 +279,6 @@ public class FastProjectionSearch extend
       scalarProjections.get(i).clear();
     }
     numPendingRemovals = 0;
-    dirty = false;
   }
 
   /**
@@ -298,30 +291,20 @@ public class FastProjectionSearch extend
   public Iterator<Vector> iterator() {
     reindex(true);
     return new AbstractIterator<Vector>() {
-          Iterator<WeightedThing<Vector>> data = scalarProjections.get(0).iterator();
-          @Override
-          protected Vector computeNext() {
-            WeightedThing<Vector> next;
-            do {
-              if (!data.hasNext()) {
-                return endOfData();
-              }
-              next = data.next();
-              if (next.getValue() != null) {
-                return next.getValue();
-              }
-            } while (true);
+      Iterator<WeightedThing<Vector>> data = scalarProjections.get(0).iterator();
+      @Override
+      protected Vector computeNext() {
+        WeightedThing<Vector> next;
+        do {
+          if (!data.hasNext()) {
+            return endOfData();
           }
-        };
-  }
-
-  /**
-   * When modifying an element of the searcher through the iterator,
-   * the user MUST CALL setDirty() to update the internal data structures. Otherwise,
-   * the internal order of the vectors will change and future results might be wrong.
-   */
-  @SuppressWarnings("unused")
-  public void setDirty() {
-    dirty = true;
+          next = data.next();
+          if (next.getValue() != null) {
+            return next.getValue();
+          }
+        } while (true);
+      }
+    };
   }
 }



Mime
View raw message