Return-Path: Delivered-To: apmail-lucene-solr-commits-archive@minotaur.apache.org Received: (qmail 83778 invoked from network); 4 Jan 2010 17:00:29 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 4 Jan 2010 17:00:29 -0000 Received: (qmail 74226 invoked by uid 500); 4 Jan 2010 17:00:29 -0000 Delivered-To: apmail-lucene-solr-commits-archive@lucene.apache.org Received: (qmail 74163 invoked by uid 500); 4 Jan 2010 17:00:28 -0000 Mailing-List: contact solr-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: solr-dev@lucene.apache.org Delivered-To: mailing list solr-commits@lucene.apache.org Received: (qmail 74154 invoked by uid 99); 4 Jan 2010 17:00:28 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 04 Jan 2010 17:00:28 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 04 Jan 2010 17:00:18 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 3E28D2388906; Mon, 4 Jan 2010 16:59:57 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r895701 - in /lucene/solr/trunk/src/java/org/apache/solr/search/function/distance: DistanceUtils.java SquaredEuclideanFunction.java VectorDistanceFunction.java Date: Mon, 04 Jan 2010 16:59:57 -0000 To: solr-commits@lucene.apache.org From: gsingers@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100104165957.3E28D2388906@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: gsingers Date: Mon Jan 4 16:59:56 2010 New Revision: 895701 URL: http://svn.apache.org/viewvc?rev=895701&view=rev Log: SOLR-1302: some slight refactoring for more reusable distance calculations Modified: lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java Modified: lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java?rev=895701&r1=895700&r2=895701&view=diff ============================================================================== --- lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java (original) +++ lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java Mon Jan 4 16:59:56 2010 @@ -28,6 +28,66 @@ public static final double RADIANS_TO_DEGREES = 180.0 / Math.PI; /** + * Calculate the p-norm (i.e. length) between two vectors + * + * @param vec1 The first vector + * @param vec2 The second vector + * @param power The power (2 for Euclidean distance, 1 for manhattan, etc.) + * @return The length. + *

+ * See http://en.wikipedia.org/wiki/Lp_space + * @see #vectorDistance(double[], double[], double, double) + */ + public static double vectorDistance(double[] vec1, double[] vec2, double power) { + return vectorDistance(vec1, vec2, power, 1.0 / power); + } + + /** + * Calculate the p-norm (i.e. length) between two vectors + * + * @param vec1 The first vector + * @param vec2 The second vector + * @param power The power (2 for Euclidean distance, 1 for manhattan, etc.) + * @param oneOverPower If you've precalculated oneOverPower and cached it, use this method to save one division operation over {@link #vectorDistance(double[], double[], double)}. + * @return The length. + */ + public static double vectorDistance(double[] vec1, double[] vec2, double power, double oneOverPower) { + double result = 0; + + if (power == 0) { + for (int i = 0; i < vec1.length; i++) { + result += vec1[i] - vec2[i] == 0 ? 0 : 1; + } + + } else if (power == 1.0) { + for (int i = 0; i < vec1.length; i++) { + result += vec1[i] - vec2[i]; + } + } else if (power == 2.0) { + result = Math.sqrt(squaredEuclideanDistance(vec1, vec2)); + } else if (power == Integer.MAX_VALUE || Double.isInfinite(power)) {//infininte norm? + for (int i = 0; i < vec1.length; i++) { + result = Math.max(vec1[i], vec2[i]); + } + } else { + for (int i = 0; i < vec1.length; i++) { + result += Math.pow(vec1[i] - vec2[i], power); + } + result = Math.pow(result, oneOverPower); + } + return result; + } + + public static double squaredEuclideanDistance(double[] vec1, double[] vec2) { + double result = 0; + for (int i = 0; i < vec1.length; i++) { + double v = vec1[i] - vec2[i]; + result += v * v; + } + return result; + } + + /** * @param x1 The x coordinate of the first point * @param y1 The y coordinate of the first point * @param x2 The x coordinate of the second point @@ -92,6 +152,46 @@ } /** + * Given a string containing dimension values encoded in it, separated by commas, return a double array of length dimension + * containing the values. + * + * @param out A preallocated array. Must be size dimension. If it is not it will be resized. + * @param externalVal The value to parse + * @param dimension The expected number of values for the point + * @return An array of the values that make up the point (aka vector) + * @throws {@link SolrException} if the dimension specified does not match the number of values in the externalValue. + */ + public static double[] parsePointDouble(double[] out, String externalVal, int dimension) { + if (out == null || out.length != dimension) out = new double[dimension]; + int idx = externalVal.indexOf(','); + int end = idx; + int start = 0; + int i = 0; + if (idx == -1 && dimension == 1 && externalVal.length() > 0) {//we have a single point, dimension better be 1 + out[0] = Double.parseDouble(externalVal.trim()); + i = 1; + } else if (idx > 0) {//if it is zero, that is an error + //Parse out a comma separated list of point values, as in: 73.5,89.2,7773.4 + for (; i < dimension; i++) { + //TODO: abstract common code with other parsePoint + while (start < end && externalVal.charAt(start) == ' ') start++; + while (end > start && externalVal.charAt(end - 1) == ' ') end--; + out[i] = Double.parseDouble(externalVal.substring(start, end)); + start = idx + 1; + end = externalVal.indexOf(',', start); + if (end == -1) { + end = externalVal.length(); + } + } + } + if (i != dimension) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "incompatible dimension (" + dimension + + ") and values (" + externalVal + "). Only " + i + " values specified"); + } + return out; + } + + /** * extract (by calling {@link #parsePoint(String[], String, int)} and validate the latitude and longitude contained * in the String by making sure the latitude is between 90 & -90 and longitude is between -180 and 180. *

@@ -105,7 +205,7 @@ if (latLon == null) { latLon = new double[2]; } - String[] toks = DistanceUtils.parsePoint(null, latLonStr, 2); + double[] toks = DistanceUtils.parsePointDouble(null, latLonStr, 2); latLon[0] = Double.valueOf(toks[0]); if (latLon[0] < -90.0 || latLon[0] > 90.0) { Modified: lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java?rev=895701&r1=895700&r2=895701&view=diff ============================================================================== --- lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java (original) +++ lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java Mon Jan 4 16:59:56 2010 @@ -23,8 +23,7 @@ /** * While not strictly a distance, the Sq. Euclidean Distance is often all that is needed in many applications * that require a distance, thus saving a sq. rt. calculation - * - **/ + */ public class SquaredEuclideanFunction extends VectorDistanceFunction { protected String name = "sqedist"; @@ -42,16 +41,13 @@ * @param doc The doc to score */ protected double distance(int doc, DocValues dv1, DocValues dv2) { - double result = 0; - double [] vals1 = new double[source1.dimension()]; - double [] vals2 = new double[source1.dimension()]; + + double[] vals1 = new double[source1.dimension()]; + double[] vals2 = new double[source1.dimension()]; dv1.doubleVal(doc, vals1); dv2.doubleVal(doc, vals2); - for (int i = 0; i < vals1.length; i++) { - double v = vals1[i] - vals2[i]; - result += v * v; - } - return result; + + return DistanceUtils.squaredEuclideanDistance(vals1, vals2); } @Override Modified: lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java?rev=895701&r1=895700&r2=895701&view=diff ============================================================================== --- lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java (original) +++ lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java Mon Jan 4 16:59:56 2010 @@ -20,8 +20,8 @@ import org.apache.lucene.search.Searcher; import org.apache.solr.common.SolrException; import org.apache.solr.search.function.DocValues; -import org.apache.solr.search.function.ValueSource; import org.apache.solr.search.function.MultiValueSource; +import org.apache.solr.search.function.ValueSource; import java.io.IOException; import java.util.Map; @@ -62,45 +62,18 @@ /** * Calculate the distance * - * @param doc The current doc + * @param doc The current doc * @param dv1 The values from the first MultiValueSource * @param dv2 The values from the second MultiValueSource * @return The distance */ protected double distance(int doc, DocValues dv1, DocValues dv2) { - double result = 0; //Handle some special cases: - double [] vals1 = new double[source1.dimension()]; - double [] vals2 = new double[source1.dimension()]; + double[] vals1 = new double[source1.dimension()]; + double[] vals2 = new double[source1.dimension()]; dv1.doubleVal(doc, vals1); dv2.doubleVal(doc, vals2); - if (power == 0) { - for (int i = 0; i < vals1.length; i++) { - result += vals1[i] - vals2[i] == 0 ? 0 :1; - } - - } else if (power == 1.0) { - for (int i = 0; i < vals1.length; i++) { - result += vals1[i] - vals2[i]; - } - } else if (power == 2.0) { - for (int i = 0; i < vals1.length; i++) { - double v = vals1[i] - vals2[i]; - result += v * v; - } - result = Math.sqrt(result); - } else if (power == Integer.MAX_VALUE || Double.isInfinite(power)) {//infininte norm? - for (int i = 0; i < vals1.length; i++) { - result = Math.max(vals1[i], vals2[i]); - } - } else { - for (int i = 0; i < vals1.length; i++) { - result += Math.pow(vals1[i] - vals2[i], power); - } - result = Math.pow(result, oneOverPower); - } - - return result; + return DistanceUtils.vectorDistance(vals1, vals2, power, oneOverPower); } @Override @@ -111,7 +84,6 @@ final DocValues vals2 = source2.getValues(context, reader); - return new DocValues() { @Override public byte byteVal(int doc) { @@ -120,7 +92,7 @@ @Override public short shortVal(int doc) { - return (short)doubleVal(doc); + return (short) doubleVal(doc); } public float floatVal(int doc) {