mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jman...@apache.org
Subject svn commit: r1098041 - in /mahout/trunk: core/src/main/java/org/apache/mahout/math/hadoop/ math/src/main/java/org/apache/mahout/math/ math/src/test/java/org/apache/mahout/math/
Date Sat, 30 Apr 2011 04:35:19 GMT
Author: jmannix
Date: Sat Apr 30 04:35:18 2011
New Revision: 1098041

URL: http://svn.apache.org/viewvc?rev=1098041&view=rev
Log:
Fixes MAHOUT-639

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSequentialAccessSparseVector.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java?rev=1098041&r1=1098040&r2=1098041&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
Sat Apr 30 04:35:18 2011
@@ -131,11 +131,12 @@ public class MatrixMultiplicationJob ext
       Vector outFrag = firstIsOutFrag ? ((VectorWritable)v.get(0)).get() : ((VectorWritable)v.get(1)).get();
       Vector multiplier = firstIsOutFrag ? ((VectorWritable)v.get(1)).get() : ((VectorWritable)v.get(0)).get();
 
+      VectorWritable outVector = new VectorWritable();
       Iterator<Vector.Element> it = multiplier.iterateNonZero();
       while (it.hasNext()) {
         Vector.Element e = it.next();
         row.set(e.index());
-        VectorWritable outVector = new VectorWritable(outFrag.times(e.get()));
+        outVector.set(outFrag.times(e.get()));
         out.collect(row, outVector);
       }
     }

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java?rev=1098041&r1=1098040&r2=1098041&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java Sat Apr 30
04:35:18 2011
@@ -17,12 +17,12 @@
 
 package org.apache.mahout.math;
 
-import java.util.Iterator;
-
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.function.DoubleDoubleFunction;
 import org.apache.mahout.math.function.DoubleFunction;
 
+import java.util.Iterator;
+
 /** Implementations of generic capabilities like sum of elements and dot products */
 public abstract class AbstractVector implements Vector {
 
@@ -417,13 +417,15 @@ public abstract class AbstractVector imp
 
   @Override
   public Vector times(double x) {
+    if (x == 0.0) {
+      return like();
+    }
+
     Vector result = like().assign(this);
     if (x == 1.0) {
       return result;
     }
-    if (x == 0.0) {
-      return like();
-    }
+    
     Iterator<Element> iter = result.iterateNonZero();
     while (iter.hasNext()) {
       Element element = iter.next();

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java?rev=1098041&r1=1098040&r2=1098041&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java Sat
Apr 30 04:35:18 2011
@@ -38,7 +38,7 @@ final class OrderedIntDoubleMapping impl
     numMappings = 0;
   }
 
-  private OrderedIntDoubleMapping(int[] indices, double[] values, int numMappings) {
+  OrderedIntDoubleMapping(int[] indices, double[] values, int numMappings) {
     this.indices = indices;
     this.values = values;
     this.numMappings = numMappings;

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java?rev=1098041&r1=1098040&r2=1098041&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
(original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
Sat Apr 30 04:35:18 2011
@@ -20,6 +20,7 @@ package org.apache.mahout.math;
 import com.google.common.collect.AbstractIterator;
 import org.apache.mahout.math.function.Functions;
 
+import java.util.Arrays;
 import java.util.Iterator;
 
 /**
@@ -63,11 +64,40 @@ public class SequentialAccessSparseVecto
 
   public SequentialAccessSparseVector(Vector other) {
     this(other.size(), other.getNumNondefaultElements());
+    
+    if (!other.isSequentialAccess()) {
+      // If the incoming Vector to copy is random, then adding items
+      // from the Iterator can degrade performance dramatically if 
+      // the number of elements is large as this Vector tries to stay
+      // in order as items are added, so it's better to sort the other
+      // Vector's elements by index and then add them to this
+      copySortedRandomAccessSparseVector(other);
+    } else {
+      Iterator<Element> it = other.iterateNonZero();
+      Element e;
+      while (it.hasNext() && (e = it.next()) != null) {
+        set(e.index(), e.get());
+      }
+    }    
+  }
+
+  // Sorts a RandomAccessSparseVectors Elements before adding them to this
+  private int copySortedRandomAccessSparseVector(Vector other) {
+    int elementCount = other.getNumNondefaultElements();
+    OrderedElement[] sortableElements = new OrderedElement[elementCount];
     Iterator<Element> it = other.iterateNonZero();
     Element e;
+    int s=0;
     while (it.hasNext() && (e = it.next()) != null) {
-      set(e.index(), e.get());
+      sortableElements[s++] = new OrderedElement(e.index(), e.get());
+    }
+    Arrays.sort(sortableElements);
+    for (int i = 0; i < sortableElements.length; i++) {
+      values.getIndices()[i] = sortableElements[i].index;
+      values.getValues()[i] = sortableElements[i].value;
     }
+    values = new OrderedIntDoubleMapping(values.getIndices(), values.getValues(), elementCount);
+    return elementCount;
   }
 
   public SequentialAccessSparseVector(SequentialAccessSparseVector other, boolean shallowCopy)
{
@@ -188,7 +218,7 @@ public class SequentialAccessSparseVecto
     if (this == x) {
       return dotSelf();
     }
-    
+
     if (x instanceof SequentialAccessSparseVector) {
       // For sparse SeqAccVectors. do dot product without lookup in a linear fashion
       Iterator<Element> myIter = iterateNonZero();
@@ -220,7 +250,7 @@ public class SequentialAccessSparseVecto
       }
       return result;
     } else { // seq.rand. seq.dense
-      double result = 0.0;      
+      double result = 0.0;
       Iterator<Element> iter = iterateNonZero();
       while (iter.hasNext()) {
         Element element = iter.next();
@@ -305,7 +335,7 @@ public class SequentialAccessSparseVecto
 
     @Override
     public void set(double value) {
-      lengthSquared = -1;      
+      lengthSquared = -1;
       values.getValues()[offset] = value;
     }
   }
@@ -341,7 +371,7 @@ public class SequentialAccessSparseVecto
 
     @Override
     public void set(double value) {
-      lengthSquared = -1;      
+      lengthSquared = -1;
       if (index == values.getIndices()[nextOffset]) {
         values.getValues()[nextOffset] = value;
       } else {
@@ -350,5 +380,23 @@ public class SequentialAccessSparseVecto
       }
     }
   }
+
+  // Comparable Element for sorting Elements by index
+  private static final class OrderedElement implements Comparable<OrderedElement> {
+    private final int index;
+    private final double value;
+    
+    OrderedElement(int index, double value) {
+      this.index = index;
+      this.value = value;
+    }
+    
+    @Override
+    public int compareTo(final OrderedElement that) {
+      // both indexes are positive, and neither can be Integer.MAX_VALUE (otherwise there
would be
+      // an array somewhere with Integer.MAX_VALUE + 1 elements)
+      return this.index - that.index;
+    }
+  }
   
 }

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSequentialAccessSparseVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSequentialAccessSparseVector.java?rev=1098041&r1=1098040&r2=1098041&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSequentialAccessSparseVector.java
(original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSequentialAccessSparseVector.java
Sat Apr 30 04:35:18 2011
@@ -39,6 +39,4 @@ public final class TestSequentialAccessS
 
     assertEquals("dot2", -0.666666667, v.dot(w), EPSILON);
   }
-
-
 }
\ No newline at end of file



Mime
View raw message