mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r788504 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout: clustering/canopy/ clustering/kmeans/ matrix/ utils/
Date Thu, 25 Jun 2009 20:46:43 GMT
Author: gsingers
Date: Thu Jun 25 20:46:42 2009
New Revision: 788504

URL: http://svn.apache.org/viewvc?rev=788504&view=rev
Log:
MAHOUT-121: Take advantage of sparseness, also intro Euclidean base class that doesn't do
the square root

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/CosineDistanceMeasure.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/DistanceMeasure.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/EuclideanDistanceMeasure.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/ManhattanDistanceMeasure.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/TanimotoDistanceMeasure.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedEuclideanDistanceMeasure.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedManhattanDistanceMeasure.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java?rev=788504&r1=788503&r2=788504&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/Canopy.java
Thu Jun 25 20:46:42 2009
@@ -143,7 +143,7 @@
   public static void addPointToCanopies(Vector point, List<Canopy> canopies) {
     boolean pointStronglyBound = false;
     for (Canopy canopy : canopies) {
-      double dist = measure.distance(canopy.getCenter(), point);
+      double dist = measure.distance(canopy.getCenter().getLengthSquared(), canopy.getCenter(),
point);
       if (dist < t1)
         canopy.addPoint(point);
       pointStronglyBound = pointStronglyBound || (dist < t2);
@@ -166,7 +166,7 @@
       throws IOException {
     boolean pointStronglyBound = false;
     for (Canopy canopy : canopies) {
-      double dist = measure.distance(canopy.getCenter(), point);
+      double dist = measure.distance(canopy.getCenter().getLengthSquared(), canopy.getCenter(),
point);
       if (dist < t1)
         canopy.emitPoint(point, collector);
       pointStronglyBound = pointStronglyBound || (dist < t2);
@@ -195,7 +195,7 @@
     Canopy closest = null;
     boolean isCovered = false;
     for (Canopy canopy : canopies) {
-      double dist = measure.distance(canopy.getCenter(), point);
+      double dist = measure.distance(canopy.getCenter().getLengthSquared(), canopy.getCenter(),
point);
       if (dist < t1) {
         isCovered = true;
         collector.collect(new Text(canopy.getIdentifier()), point);
@@ -308,6 +308,6 @@
    * @return if the point is covered
    */
   public boolean covers(Vector point) {
-    return measure.distance(center, point) < t1;
+    return measure.distance(center.getLengthSquared(), center, point) < t1;
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java?rev=788504&r1=788503&r2=788504&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Cluster.java
Thu Jun 25 20:46:42 2009
@@ -225,7 +225,8 @@
   /**
    * Construct a new cluster with the given point as its center
    * 
-   * @param center the center point
+   * @param center
+   *            the center point
    */
   public Cluster(Vector center) {
     super();
@@ -300,8 +301,8 @@
       pointTotal = delta.clone();
       pointSquaredTotal = delta.times(delta);
     } else {
-      pointTotal = pointTotal.plus(delta);
-      pointSquaredTotal = pointSquaredTotal.plus(delta.times(delta));
+      delta.addTo(pointTotal);
+      delta.times(delta).addTo(pointSquaredTotal);
     }
   }
 
@@ -324,7 +325,7 @@
    */
   public boolean computeConvergence() {
     Vector centroid = computeCentroid();
-    converged = measure.distance(centroid, center) <= convergenceDelta;
+    converged = measure.distance(centroid.getLengthSquared(), centroid, center) <= convergenceDelta;
     return converged;
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java?rev=788504&r1=788503&r2=788504&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java Thu Jun
25 20:46:42 2009
@@ -306,4 +306,38 @@
 
     return result;
   }
+
+  private Double lengthSquared = null; 
+
+  @Override
+  public double getLengthSquared() {
+  	if (lengthSquared != null) {
+  		return lengthSquared;
+  	}	
+
+  	double result = 0.0;
+  	for (int i = 0; i < values.length; i++) {
+  		result += values[i] * values[i];
+  	
+  	}
+  	lengthSquared = new Double(result);
+  	return result;
+  }
+
+  @Override
+  public double getDistanceSquared(Vector v) {
+  	double result = 0.0;
+  	double delta = 0.0;
+  	for (int i = 0; i < values.length; i++) {
+  		delta = values[i] - v.getQuick(i);  
+  		result += delta * delta;	
+  	}	
+  	return result;
+  }
+
+  @Override
+  public void addTo(Vector v) {
+    for (int i = 0; i < size(); i++)
+      v.setQuick(i, get(i) + v.get(i));  	
+  }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java?rev=788504&r1=788503&r2=788504&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java Thu
Jun 25 20:46:42 2009
@@ -20,15 +20,17 @@
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
-import java.util.NoSuchElementException;
 import java.util.Iterator;
+import java.util.NoSuchElementException;
 
 /**
  * Implements vector that only stores non-zero doubles
  */
 public class SparseVector extends AbstractVector {
 
-  /** For serialization purposes only. */
+  /**
+   * For serialization purposes only.
+   */
   public SparseVector() {
   }
 
@@ -36,6 +38,9 @@
 
   private int cardinality;
 
+  public SparseVector(int cardinality, int size) {
+    this(null, cardinality, size);
+  }
 
   public SparseVector(String name, int cardinality, int size) {
     super(name);
@@ -45,17 +50,17 @@
 
   public SparseVector(String name, int cardinality) {
     this(name, cardinality, cardinality / 8); // arbitrary estimate of
-                                              // 'sparseness'
+    // 'sparseness'
   }
 
   public SparseVector(int cardinality) {
     this(null, cardinality, cardinality / 8); // arbitrary estimate of
-                                              // 'sparseness'
+    // 'sparseness'
   }
 
   @Override
   protected Matrix matrixLike(int rows, int columns) {
-    int[] cardinality = { rows, columns };
+    int[] cardinality = {rows, columns};
     return new SparseRowMatrix(cardinality);
   }
 
@@ -105,19 +110,27 @@
 
   @Override
   public SparseVector like() {
-    return new SparseVector(cardinality);
+    int numValues = 256;
+    if (values != null) {
+      numValues = values.getNumMappings();
+    }
+    return new SparseVector(cardinality, numValues);
   }
 
   @Override
   public Vector like(int newCardinality) {
-    return new SparseVector(newCardinality);
+    int numValues = 256;
+    if (values != null) {
+      numValues = values.getNumMappings();
+    }
+    return new SparseVector(newCardinality, numValues);
   }
 
   /**
    * NOTE: this implementation reuses the Vector.Element instance for each call of next().
If you
    * need to preserve the instance, you need to make a copy of it
-   * @return an {@link org.apache.mahout.matrix.SparseVector.NonZeroIterator} over the Elements.
    *
+   * @return an {@link org.apache.mahout.matrix.SparseVector.NonZeroIterator} over the Elements.
    * @see #getElement(int)
    */
   public java.util.Iterator<Vector.Element> iterateNonZero() {
@@ -133,11 +146,11 @@
    * Indicate whether the two objects are the same or not. Two
    * {@link org.apache.mahout.matrix.Vector}s can be equal even if the
    * underlying implementation is not equal.
-   * 
+   *
    * @param o The object to compare
    * @return true if the objects have the same cell values and same name, false
    *         otherwise.
-   * 
+   *         <p/>
    *         * @see AbstractVector#strictEquivalence(Vector, Vector)
    * @see AbstractVector#equivalent(Vector, Vector)
    */
@@ -154,7 +167,7 @@
 
     if (that instanceof SparseVector) {
       return (values == null ? ((SparseVector) that).values == null : values
-          .equals(((SparseVector) that).values));
+              .equals(((SparseVector) that).values));
     } else {
       return equivalent(this, that);
     }
@@ -169,7 +182,7 @@
     return result;
   }
 
-  private class AllIterator implements java.util.Iterator<Vector.Element>{
+  private class AllIterator implements java.util.Iterator<Vector.Element> {
     private int offset = 0;
     private Element element = new Element(0);
 
@@ -194,6 +207,7 @@
   private class NonZeroIterator implements java.util.Iterator<Vector.Element> {
     private int offset = 0;
     private Element element = new Element(0);
+
     @Override
     public boolean hasNext() {
       return offset < values.getNumMappings();
@@ -243,7 +257,6 @@
   }
 
 
- 
   @Override
   public void write(DataOutput dataOutput) throws IOException {
     dataOutput.writeUTF(this.name == null ? "" : this.name);
@@ -258,7 +271,7 @@
       dataOutput.writeDouble(element.get());
       count++;
     }
-    assert(nde == count);
+    assert (nde == count);
   }
 
   @Override
@@ -271,10 +284,50 @@
     for (; i < size; i++) {
       values.set(dataInput.readInt(), dataInput.readDouble());
     }
-    assert(i == size);
+    assert (i == size);
     this.cardinality = cardinality;
     this.values = values;
   }
 
+  private Double lengthSquared = null;
+
+  @Override
+  public double getLengthSquared() {
+    if (lengthSquared != null) {
+      return lengthSquared;
+    }
+    double result = 0.0f;
+    for (double val : values.getValues()) {
+      result += val * val;
+    }
+    lengthSquared = new Double(result);
+    return result;
+  }
+
+  @Override
+  public double getDistanceSquared(Vector v) {
+    //TODO: Check sizes?
+
+    double result = 0.0f;
+    double delta = 0.0f;
+    double centroidValue = 0.0f;
+    Iterator<Vector.Element> iter = iterateNonZero();
+    while (iter.hasNext()) {
+      Vector.Element elt = iter.next();
+      centroidValue = v.getQuick(elt.index());
+      delta = elt.get() - centroidValue;
+      result += (delta * delta) - (centroidValue * centroidValue);
+    }
+    return result;
+  }
+
+  @Override
+  public void addTo(Vector v) {
+    Iterator<Vector.Element> iter = iterateNonZero();
+    while (iter.hasNext()) {
+      Vector.Element elt = iter.next();
+      v.setQuick(elt.index(), elt.get() + v.get(elt.index()));
+    }
+  }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java?rev=788504&r1=788503&r2=788504&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java Thu Jun 25
20:46:42 2009
@@ -425,4 +425,23 @@
   // DoubleDoubleFunction map);
   // NewVector assign(Vector y, DoubleDoubleFunction function, IntArrayList
   // nonZeroIndexes);
+  
+
+  /**
+   * Return the sum of squares of all elements in the vector. Square root of this value is
the length of the vector.
+   */
+  public abstract double getLengthSquared();
+    
+  /**
+   * Get the square of the distance between this vector and the other vector. 
+   * @param v
+   * @return
+   */
+  public abstract double getDistanceSquared(Vector v);
+  
+  /**
+   * Add the elements to the other vector and results are stored in that vector.
+   */
+  public abstract void addTo(Vector v);
+  
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java?rev=788504&r1=788503&r2=788504&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java Thu Jun
25 20:46:42 2009
@@ -21,6 +21,7 @@
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.NoSuchElementException;
+import java.util.Iterator;
 
 /**
  * Implements subset view of a Vector
@@ -313,4 +314,35 @@
     result = 31 * result + cardinality;
     return result;
   }
+
+  @Override
+  public double getLengthSquared() {
+  	double result = 0.0;
+  	for (int i = 0; i < cardinality; i++) {
+  		double value = getQuick(i); 
+  		result +=  value * value;
+  	}
+  	return result;
+  }
+
+  @Override
+  public double getDistanceSquared(Vector v) {  	
+  	double result = 0.0;
+  	double delta = 0.0;
+  	for (int i = 0; i < cardinality; i++) {
+  		delta = getQuick(i) - v.getQuick(i);
+  		result += delta * delta;
+  	}
+  	return result;
+  }
+
+  @Override
+  public void addTo(Vector v) {
+    Iterator<Vector.Element> iter = iterateNonZero();
+    while (iter.hasNext()) {
+      Vector.Element elt = iter.next();
+  	  v.set(elt.index(), elt.get() + v.get(elt.index()));
+    }
+  }
+
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/CosineDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/CosineDistanceMeasure.java?rev=788504&r1=788503&r2=788504&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/CosineDistanceMeasure.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/CosineDistanceMeasure.java
Thu Jun 25 20:46:42 2009
@@ -92,4 +92,9 @@
     return 1.0 - (dotProduct / denominator);
   }
 
+  @Override
+   public double distance(double centroidLengthSquare, Vector centroid, Vector v) {	 
+     return distance(centroid, v); // TODO
+   }
+
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/DistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/DistanceMeasure.java?rev=788504&r1=788503&r2=788504&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/DistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/DistanceMeasure.java Thu
Jun 25 20:46:42 2009
@@ -36,5 +36,16 @@
    * @throws CardinalityException
    */
   double distance(Vector v1, Vector v2);
+  
+  /**
+   * Optimized version of distance metric for sparse vectors. This distance computation requires
 
+   * operations proportional to the number of non-zero elements in the vector instead of
the cardinality 
+   * of the vector.
+   * @param centroidLengthSquare Square of the length of centroid
+   * @param centroid Centroid vector
+   * @param v 
+   * @return
+   */
+  double distance(double centroidLengthSquare, Vector centroid, Vector v);
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/EuclideanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/EuclideanDistanceMeasure.java?rev=788504&r1=788503&r2=788504&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/EuclideanDistanceMeasure.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/EuclideanDistanceMeasure.java
Thu Jun 25 20:46:42 2009
@@ -17,60 +17,24 @@
 
 package org.apache.mahout.utils;
 
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.mahout.matrix.CardinalityException;
 import org.apache.mahout.matrix.Vector;
-import org.apache.mahout.utils.parameters.Parameter;
-
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Iterator;
 
 /**
  * This class implements a Euclidian distance metric by summing the square root
- * of the squared differences between each coordinate
+ * of the squared differences between each coordinate.
+ * <p/>
+ * If you don't care about the true distance and only need the values for comparison, then
+ * the base class, {@link SquaredEuclideanDistanceMeasure}, will be faster since it doesn't
do
+ * the actual square root of the squared differences.
  */
-public class EuclideanDistanceMeasure implements DistanceMeasure {
-
+public class EuclideanDistanceMeasure extends SquaredEuclideanDistanceMeasure {
   @Override
-  public void configure(JobConf job) {
-    // nothing to do
-  }
-
-  @Override
-  public Collection<Parameter<?>> getParameters() {
-    return Collections.emptyList();
-  }
-
-  @Override
-  public void createParameters(String prefix, JobConf jobConf) {
-    // nothing to do
-  }
-
-  public static double distance(double[] p1, double[] p2) {
-    double result = 0.0;
-    for (int i = 0; i < p1.length; i++) {
-      double delta = p2[i] - p1[i];
-      result += delta * delta;
-    }
-    //TODO: Do we really need to return the square root?
-    return Math.sqrt(result);
+  public double distance(Vector v1, Vector v2) {
+    return Math.sqrt(super.distance(v1, v2));
   }
 
   @Override
-  public double distance(Vector v1, Vector v2) {
-    if (v1.size() != v2.size())
-      throw new CardinalityException();
-    double result = 0;
-    Vector vector = v1.plus(v2);
-    Iterator<Vector.Element> iter = vector.iterateNonZero();//this contains all non
zero elements between the two
-    while (iter.hasNext()) {
-      Vector.Element e = iter.next();
-      double delta = v2.getQuick(e.index()) - v1.getQuick(e.index());
-      result += delta * delta;
-    }
-    //TODO: Do we really need to return the square root?
-    return Math.sqrt(result);
+  public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
+    return Math.sqrt(super.distance(centroidLengthSquare, centroid, v));
   }
-
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/ManhattanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/ManhattanDistanceMeasure.java?rev=788504&r1=788503&r2=788504&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/ManhattanDistanceMeasure.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/ManhattanDistanceMeasure.java
Thu Jun 25 20:46:42 2009
@@ -68,4 +68,9 @@
     return result;
   }
 
+ @Override
+ public double distance(double centroidLengthSquare, Vector centroid, Vector v) {	 
+  return distance(centroid, v); // TODO
+ }
+ 
 }

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java?rev=788504&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java
(added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/SquaredEuclideanDistanceMeasure.java
Thu Jun 25 20:46:42 2009
@@ -0,0 +1,88 @@
+package org.apache.mahout.utils;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.mahout.utils.parameters.Parameter;
+import org.apache.mahout.matrix.Vector;
+import org.apache.mahout.matrix.CardinalityException;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+
+
+/**
+ * Like {@link org.apache.mahout.utils.EuclideanDistanceMeasure} but it does not take the
square root.
+ * <p/>
+ * Thus, it is not actually the Euclidean Distance, but it is saves on computation when you
only need the distance for
+ * comparison and don't care about the actual value as a distance.
+ *
+ **/
+public class SquaredEuclideanDistanceMeasure implements DistanceMeasure {
+
+  @Override
+  public void configure(JobConf job) {
+    // nothing to do
+  }
+
+  @Override
+  public Collection<Parameter<?>> getParameters() {
+    return Collections.emptyList();
+  }
+
+  @Override
+  public void createParameters(String prefix, JobConf jobConf) {
+    // nothing to do
+  }
+
+  public static double distance(double[] p1, double[] p2) {
+    double result = 0.0;
+    for (int i = 0; i < p1.length; i++) {
+      double delta = p2[i] - p1[i];
+      result += delta * delta;
+    }
+
+    return result;
+  }
+
+  @Override
+  public double distance(Vector v1, Vector v2) {
+    if (v1.size() != v2.size())
+      throw new CardinalityException();
+    double result = 0;
+    Vector vector = v1.plus(v2);
+    Iterator<Vector.Element> iter = vector.iterateNonZero();//this contains all non
zero elements between the two
+    while (iter.hasNext()) {
+      Vector.Element e = iter.next();
+      double delta = v2.getQuick(e.index()) - v1.getQuick(e.index());
+      result += delta * delta;
+    }
+
+    return result;
+  }
+
+  @Override
+  public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
+	    if (centroid.size() != centroid.size())
+	        throw new CardinalityException();
+
+	    double result = centroidLengthSquare;
+	    result += v.getDistanceSquared(centroid);
+	    return result;
+  }
+}

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/TanimotoDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/TanimotoDistanceMeasure.java?rev=788504&r1=788503&r2=788504&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/TanimotoDistanceMeasure.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/TanimotoDistanceMeasure.java
Thu Jun 25 20:46:42 2009
@@ -93,5 +93,10 @@
     return ((a2 + b2 - ab) / ab) - 1.0;
   }
 
+  @Override
+  public double distance(double centroidLengthSquare, Vector centroid, Vector v) {	 
+   return distance(centroid, v); // TODO
+  }
+
 }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedEuclideanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedEuclideanDistanceMeasure.java?rev=788504&r1=788503&r2=788504&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedEuclideanDistanceMeasure.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedEuclideanDistanceMeasure.java
Thu Jun 25 20:46:42 2009
@@ -47,4 +47,9 @@
     return Math.sqrt(result);
   }
 
+  @Override
+  public double distance(double centroidLengthSquare, Vector centroid, Vector v) {	 
+   return distance(centroid, v); // TODO
+  }
+
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedManhattanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedManhattanDistanceMeasure.java?rev=788504&r1=788503&r2=788504&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedManhattanDistanceMeasure.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedManhattanDistanceMeasure.java
Thu Jun 25 20:46:42 2009
@@ -51,4 +51,9 @@
     return result;
   }
 
+  @Override
+  public double distance(double centroidLengthSquare, Vector centroid, Vector v) {	 
+   return distance(centroid, v); // TODO
+  }
+
 }



Mime
View raw message