lucene-solr-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r895701 - in /lucene/solr/trunk/src/java/org/apache/solr/search/function/distance: DistanceUtils.java SquaredEuclideanFunction.java VectorDistanceFunction.java
Date Mon, 04 Jan 2010 16:59:57 GMT
Author: gsingers
Date: Mon Jan  4 16:59:56 2010
New Revision: 895701

URL: http://svn.apache.org/viewvc?rev=895701&view=rev
Log:
SOLR-1302: some slight refactoring for more reusable distance calculations

Modified:
    lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java
    lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java
    lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java

Modified: lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java?rev=895701&r1=895700&r2=895701&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java
Mon Jan  4 16:59:56 2010
@@ -28,6 +28,66 @@
   public static final double RADIANS_TO_DEGREES = 180.0 / Math.PI;
 
   /**
+   * Calculate the p-norm (i.e. length) between two vectors
+   *
+   * @param vec1  The first vector
+   * @param vec2  The second vector
+   * @param power The power (2 for Euclidean distance, 1 for manhattan, etc.)
+   * @return The length.
+   *         <p/>
+   *         See http://en.wikipedia.org/wiki/Lp_space
+   * @see #vectorDistance(double[], double[], double, double)
+   */
+  public static double vectorDistance(double[] vec1, double[] vec2, double power) {
+    return vectorDistance(vec1, vec2, power, 1.0 / power);
+  }
+
+  /**
+   * Calculate the p-norm (i.e. length) between two vectors
+   *
+   * @param vec1         The first vector
+   * @param vec2         The second vector
+   * @param power        The power (2 for Euclidean distance, 1 for manhattan, etc.)
+   * @param oneOverPower If you've precalculated oneOverPower and cached it, use this method
to save one division operation over {@link #vectorDistance(double[], double[], double)}.
+   * @return The length.
+   */
+  public static double vectorDistance(double[] vec1, double[] vec2, double power, double
oneOverPower) {
+    double result = 0;
+
+    if (power == 0) {
+      for (int i = 0; i < vec1.length; i++) {
+        result += vec1[i] - vec2[i] == 0 ? 0 : 1;
+      }
+
+    } else if (power == 1.0) {
+      for (int i = 0; i < vec1.length; i++) {
+        result += vec1[i] - vec2[i];
+      }
+    } else if (power == 2.0) {
+      result = Math.sqrt(squaredEuclideanDistance(vec1, vec2));
+    } else if (power == Integer.MAX_VALUE || Double.isInfinite(power)) {//infininte norm?
+      for (int i = 0; i < vec1.length; i++) {
+        result = Math.max(vec1[i], vec2[i]);
+      }
+    } else {
+      for (int i = 0; i < vec1.length; i++) {
+        result += Math.pow(vec1[i] - vec2[i], power);
+      }
+      result = Math.pow(result, oneOverPower);
+    }
+    return result;
+  }
+
+  public static double squaredEuclideanDistance(double[] vec1, double[] vec2) {
+    double result = 0;
+    for (int i = 0; i < vec1.length; i++) {
+      double v = vec1[i] - vec2[i];
+      result += v * v;
+    }
+    return result;
+  }
+
+  /**
    * @param x1     The x coordinate of the first point
    * @param y1     The y coordinate of the first point
    * @param x2     The x coordinate of the second point
@@ -92,6 +152,46 @@
   }
 
   /**
+   * Given a string containing <i>dimension</i> values encoded in it, separated
by commas, return a double array of length <i>dimension</i>
+   * containing the values.
+   *
+   * @param out         A preallocated array.  Must be size dimension.  If it is not it will
be resized.
+   * @param externalVal The value to parse
+   * @param dimension   The expected number of values for the point
+   * @return An array of the values that make up the point (aka vector)
+   * @throws {@link SolrException} if the dimension specified does not match the number of
values in the externalValue.
+   */
+  public static double[] parsePointDouble(double[] out, String externalVal, int dimension)
{
+    if (out == null || out.length != dimension) out = new double[dimension];
+    int idx = externalVal.indexOf(',');
+    int end = idx;
+    int start = 0;
+    int i = 0;
+    if (idx == -1 && dimension == 1 && externalVal.length() > 0) {//we
have a single point, dimension better be 1
+      out[0] = Double.parseDouble(externalVal.trim());
+      i = 1;
+    } else if (idx > 0) {//if it is zero, that is an error
+      //Parse out a comma separated list of point values, as in: 73.5,89.2,7773.4
+      for (; i < dimension; i++) {
+        //TODO: abstract common code with other parsePoint
+        while (start < end && externalVal.charAt(start) == ' ') start++;
+        while (end > start && externalVal.charAt(end - 1) == ' ') end--;
+        out[i] = Double.parseDouble(externalVal.substring(start, end));
+        start = idx + 1;
+        end = externalVal.indexOf(',', start);
+        if (end == -1) {
+          end = externalVal.length();
+        }
+      }
+    }
+    if (i != dimension) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "incompatible dimension
(" + dimension +
+              ") and values (" + externalVal + ").  Only " + i + " values specified");
+    }
+    return out;
+  }
+
+  /**
    * extract (by calling {@link #parsePoint(String[], String, int)} and validate the latitude
and longitude contained
    * in the String by making sure the latitude is between 90 & -90 and longitude is between
-180 and 180.
    * <p/>
@@ -105,7 +205,7 @@
     if (latLon == null) {
       latLon = new double[2];
     }
-    String[] toks = DistanceUtils.parsePoint(null, latLonStr, 2);
+    double[] toks = DistanceUtils.parsePointDouble(null, latLonStr, 2);
     latLon[0] = Double.valueOf(toks[0]);
 
     if (latLon[0] < -90.0 || latLon[0] > 90.0) {

Modified: lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java?rev=895701&r1=895700&r2=895701&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java
Mon Jan  4 16:59:56 2010
@@ -23,8 +23,7 @@
 /**
  * While not strictly a distance, the Sq. Euclidean Distance is often all that is needed
in many applications
  * that require a distance, thus saving a sq. rt. calculation
- *
- **/
+ */
 public class SquaredEuclideanFunction extends VectorDistanceFunction {
   protected String name = "sqedist";
 
@@ -42,16 +41,13 @@
    * @param doc The doc to score
    */
   protected double distance(int doc, DocValues dv1, DocValues dv2) {
-    double result = 0;
-    double [] vals1 = new double[source1.dimension()];
-    double [] vals2 = new double[source1.dimension()];
+
+    double[] vals1 = new double[source1.dimension()];
+    double[] vals2 = new double[source1.dimension()];
     dv1.doubleVal(doc, vals1);
     dv2.doubleVal(doc, vals2);
-    for (int i = 0; i < vals1.length; i++) {
-        double v = vals1[i] - vals2[i];
-        result += v * v;
-      }
-    return result;
+
+    return DistanceUtils.squaredEuclideanDistance(vals1, vals2);
   }
 
   @Override

Modified: lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java?rev=895701&r1=895700&r2=895701&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java
Mon Jan  4 16:59:56 2010
@@ -20,8 +20,8 @@
 import org.apache.lucene.search.Searcher;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.search.function.DocValues;
-import org.apache.solr.search.function.ValueSource;
 import org.apache.solr.search.function.MultiValueSource;
+import org.apache.solr.search.function.ValueSource;
 
 import java.io.IOException;
 import java.util.Map;
@@ -62,45 +62,18 @@
   /**
    * Calculate the distance
    *
-   * @param doc        The current doc
+   * @param doc The current doc
    * @param dv1 The values from the first MultiValueSource
    * @param dv2 The values from the second MultiValueSource
    * @return The distance
    */
   protected double distance(int doc, DocValues dv1, DocValues dv2) {
-    double result = 0;
     //Handle some special cases:
-    double [] vals1 = new double[source1.dimension()];
-    double [] vals2 = new double[source1.dimension()];
+    double[] vals1 = new double[source1.dimension()];
+    double[] vals2 = new double[source1.dimension()];
     dv1.doubleVal(doc, vals1);
     dv2.doubleVal(doc, vals2);
-    if (power == 0) {
-      for (int i = 0; i < vals1.length; i++) {
-        result += vals1[i] - vals2[i] == 0 ? 0 :1;
-      }
-
-    } else if (power == 1.0) {
-      for (int i = 0; i < vals1.length; i++) {
-        result += vals1[i] - vals2[i];
-      }
-    } else if (power == 2.0) {
-      for (int i = 0; i < vals1.length; i++) {
-        double v = vals1[i] - vals2[i];
-        result += v * v;
-      }
-      result = Math.sqrt(result);
-    } else if (power == Integer.MAX_VALUE || Double.isInfinite(power)) {//infininte norm?
-      for (int i = 0; i < vals1.length; i++) {
-        result = Math.max(vals1[i], vals2[i]);
-      }
-    } else {
-      for (int i = 0; i < vals1.length; i++) {
-        result += Math.pow(vals1[i] - vals2[i], power);
-      }
-      result = Math.pow(result, oneOverPower);
-    }
-
-    return result;
+    return DistanceUtils.vectorDistance(vals1, vals2, power, oneOverPower);
   }
 
   @Override
@@ -111,7 +84,6 @@
     final DocValues vals2 = source2.getValues(context, reader);
 
 
-
     return new DocValues() {
       @Override
       public byte byteVal(int doc) {
@@ -120,7 +92,7 @@
 
       @Override
       public short shortVal(int doc) {
-        return (short)doubleVal(doc);
+        return (short) doubleVal(doc);
       }
 
       public float floatVal(int doc) {



Mime
View raw message