Return-Path:
It utilizes the CosineSimilarity to compute the distance. Character sequences
* are converted into vectors through a simple tokenizer that works with
+ * A edit distance measures the similarity between two character sequences. Closer strings + * have shorter distances, and vice-versa. + *
+ * + *
+ * This is a BiFunction<CharSequence, CharSequence, R>.
+ * The apply
method
+ * accepts a pair of {@link CharSequence} parameters
+ * and returns an R
type similarity score.
+ *
+ * This stores a {@link EditDistance} implementation and a {@link CharSequence} "left" string. + * The {@link #apply(CharSequence right)} method accepts the "right" string and invokes the + * comparison function for the pair of strings. + *
+ * + *+ * The following is an example which finds the most similar string: + *
+ *+ * EditDistance<Integer> editDistance = new LevenshteinDistance(); + * String target = "Apache"; + * EditDistanceFrom<Integer> editDistanceFrom = + * new EditDistanceFrom<Integer>(editDistance, target); + * String mostSimilar = null; + * Integer shortestDistance = null; + * + * for (String test : new String[] { "Appaloosa", "a patchy", "apple" }) { + * Integer distance = editDistanceFrom.apply(test); + * if (shortestDistance == null || distance < shortestDistance) { + * shortestDistance = distance; + * mostSimilar = test; + * } + * } + * + * System.out.println("The string most similar to \"" + target + "\" " + * + "is \"" + mostSimilar + "\" because " + * + "its distance is only " + shortestDistance + "."); + *+ * + * @param
This accepts the edit distance implementation and the "left" string.
+ * + * @param editDistance This may not be null. + * @param left This may be null here, + * but the EditDistance#compare(CharSequence left, CharSequence right) + * implementation may not accept nulls. + */ + public EditDistanceFrom(final EditDistance+ * This compares "left" field against the "right" parameter + * using the "edit distance" implementation. + *
+ * + * @param right the second CharSequence + * @return the similarity score between two CharSequences + */ + public R apply(CharSequence right) { + return editDistance.apply(left, right); + } + + /** + * Gets the left parameter. + * + * @return the left parameter + */ + public CharSequence getLeft() { + return left; + } + + /** + * Gets the edit distance. + * + * @return the edit distance + */ + public EditDistance* This code has been adapted from Apache Commons Lang 3.3. *
+ * + * @since 1.0 */ -public class FuzzyScore implements StringMetric- * score.apply(null, null, null) = IllegalArgumentException - * score.apply("", "", Locale.ENGLISH) = 0 - * score.apply("Workshop", "b", Locale.ENGLISH) = 0 - * score.apply("Room", "o", Locale.ENGLISH) = 1 - * score.apply("Workshop", "w", Locale.ENGLISH) = 1 - * score.apply("Workshop", "ws", Locale.ENGLISH) = 2 - * score.apply("Workshop", "wo", Locale.ENGLISH) = 4 - * score.apply("Apache Software Foundation", "asf", Locale.ENGLISH) = 3 + * score.fuzzyScore(null, null, null) = IllegalArgumentException + * score.fuzzyScore("", "", Locale.ENGLISH) = 0 + * score.fuzzyScore("Workshop", "b", Locale.ENGLISH) = 0 + * score.fuzzyScore("Room", "o", Locale.ENGLISH) = 1 + * score.fuzzyScore("Workshop", "w", Locale.ENGLISH) = 1 + * score.fuzzyScore("Workshop", "ws", Locale.ENGLISH) = 2 + * score.fuzzyScore("Workshop", "wo", Locale.ENGLISH) = 4 + * score.fuzzyScore("Apache Software Foundation", "asf", Locale.ENGLISH) = 3 ** * @param term a full term that should be matched against, must not be null @@ -78,8 +80,7 @@ public class FuzzyScore implements StringMetric
* This code has been adapted from Apache Commons Lang 3.3. *
+ * + * @since 1.0 */ -public class JaroWrinklerDistance implements StringMetric* This code has been adapted from Apache Commons Lang 3.3. *
+ * + * @since 1.0 */ -public class LevenshteinDistance implements StringMetric- * A string metric measures the similarity between two character sequences. Depending on - * the algorithm, higher values can mean closer strings, or more distant strings. - *
- * - *
- * This is a BiFunction<CharSequence, CharSequence, R>.
- * The apply
method
- * accepts a pair of {@link CharSequence} parameters
- * and returns an R
type similarity score.
- *
- * This stores a {@link StringMetric} implementation and a {@link CharSequence} "left" string. - * The {@link #apply(CharSequence right)} method accepts the "right" string and invokes the - * comparison function for the pair of strings. - *
- * - *- * The following is an example which finds the most similar string: - *
- *- * StringMetric<Integer> metric = new LevenshteinDistance(); - * String target = "Apache"; - * StringMetricFrom<Integer> metricFrom = - * new StringMetricFrom<Integer>(metric, target); - * String mostSimilar = null; - * Integer shortestDistance = null; - * - * for (String test : new String[] { "Appaloosa", "a patchy", "apple" }) { - * Integer distance = metricFrom.apply(test); - * if (shortestDistance == null || distance < shortestDistance) { - * shortestDistance = distance; - * mostSimilar = test; - * } - * } - * - * System.out.println("The string most similar to \"" + target + "\" " - * + "is \"" + mostSimilar + "\" because " - * + "its distance is only " + shortestDistance + "."); - *- * - * @param
This accepts the metric implementation and the "left" string.
- * - * @param metric This may not be null. - * @param left This may be null here, - * but the StringMetric#compare(CharSequence left, CharSequence right) - * implementation may not accept nulls. - */ - public StringMetricFrom(final StringMetric- * This compares "left" field against the "right" parameter - * using the "metric" implementation. - *
- * - * @param right the second CharSequence - * @return the similarity score between two CharSequences - */ - public R apply(CharSequence right) { - return metric.apply(left, right); - } - - /** - * Gets the left parameter. - * - * @return the left parameter - */ - public CharSequence getLeft() { - return left; - } - - /** - * Gets the right parameter. - * - * @return the right parameter - */ - public StringMetric