Return-Path: X-Original-To: apmail-commons-commits-archive@minotaur.apache.org Delivered-To: apmail-commons-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 73D2A6E2E for ; Sat, 6 Aug 2011 02:36:57 +0000 (UTC) Received: (qmail 15679 invoked by uid 500); 6 Aug 2011 02:36:56 -0000 Delivered-To: apmail-commons-commits-archive@commons.apache.org Received: (qmail 15417 invoked by uid 500); 6 Aug 2011 02:36:54 -0000 Mailing-List: contact commits-help@commons.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@commons.apache.org Delivered-To: mailing list commits@commons.apache.org Received: (qmail 15410 invoked by uid 99); 6 Aug 2011 02:36:53 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 06 Aug 2011 02:36:53 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 06 Aug 2011 02:36:51 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 45AE723889E5 for ; Sat, 6 Aug 2011 02:36:32 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1154434 - in /commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm: PhoneticEngine.java Rule.java Date: Sat, 06 Aug 2011 02:36:32 -0000 To: commits@commons.apache.org From: ggregory@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110806023632.45AE723889E5@eris.apache.org> Author: ggregory Date: Sat Aug 6 02:36:31 2011 New Revision: 1154434 URL: http://svn.apache.org/viewvc?rev=1154434&view=rev Log: Sort methods AB. Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java?rev=1154434&r1=1154433&r2=1154434&view=diff ============================================================================== --- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java (original) +++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java Sat Aug 6 02:36:31 2011 @@ -268,42 +268,6 @@ public class PhoneticEngine { } /** - * Gets the Lang language guessing rules being used. - * - * @return the Lang in use - */ - public Lang getLang() { - return this.lang; - } - - /** - * Gets the NameType being used. - * - * @return the NameType in use - */ - public NameType getNameType() { - return this.nameType; - } - - /** - * Gets the RuleType being used. - * - * @return the RuleType in use - */ - public RuleType getRuleType() { - return this.ruleType; - } - - /** - * Gets if multiple phonetic encodings are concatenated or if just the first one is kept. - * - * @return true if multiple phonetic encodings are returned, false if just the first is. - */ - public boolean isConcat() { - return this.concat; - } - - /** * Encodes an input string into an output phonetic representation, given a set of possible origin languages. * * @param input @@ -399,4 +363,40 @@ public class PhoneticEngine { return phonemeBuilder.makeString(); } + + /** + * Gets the Lang language guessing rules being used. + * + * @return the Lang in use + */ + public Lang getLang() { + return this.lang; + } + + /** + * Gets the NameType being used. + * + * @return the NameType in use + */ + public NameType getNameType() { + return this.nameType; + } + + /** + * Gets the RuleType being used. + * + * @return the RuleType in use + */ + public RuleType getRuleType() { + return this.ruleType; + } + + /** + * Gets if multiple phonetic encodings are concatenated or if just the first one is kept. + * + * @return true if multiple phonetic encodings are returned, false if just the first is. + */ + public boolean isConcat() { + return this.concat; + } } Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java?rev=1154434&r1=1154433&r2=1154434&view=diff ============================================================================== --- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java (original) +++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java Sat Aug 6 02:36:31 2011 @@ -80,34 +80,79 @@ import java.util.regex.Pattern; */ public class Rule { - public static class Phoneme implements PhonemeExpr, Comparable { + private static class AppendableCharSeqeuence implements CharSequence { + + private final CharSequence left; + private final CharSequence right; + private final int length; + private String contentCache = null; - private final CharSequence phonemeText; - private final Languages.LanguageSet languages; + private AppendableCharSeqeuence(CharSequence left, CharSequence right) { + this.left = left; + this.right = right; + this.length = left.length() + right.length(); + } - public Phoneme(CharSequence phonemeText, Languages.LanguageSet languages) { - this.phonemeText = phonemeText; - this.languages = languages; + public void buildString(StringBuilder sb) { + if (left instanceof AppendableCharSeqeuence) { + ((AppendableCharSeqeuence) left).buildString(sb); + } else { + sb.append(left); + } + if (right instanceof AppendableCharSeqeuence) { + ((AppendableCharSeqeuence) right).buildString(sb); + } else { + sb.append(right); + } } - public Phoneme append(CharSequence str) { - return new Phoneme(new AppendableCharSeqeuence(this.phonemeText, str), this.languages); + public char charAt(int index) { + // int lLength = left.length(); + // if(index < lLength) return left.charAt(index); + // else return right.charAt(index - lLength); + return toString().charAt(index); } - public Languages.LanguageSet getLanguages() { - return this.languages; + public int length() { + return length; } - public Iterable getPhonemes() { - return Collections.singleton(this); + public CharSequence subSequence(int start, int end) { + // int lLength = left.length(); + // if(start > lLength) return right.subSequence(start - lLength, end - lLength); + // else if(end <= lLength) return left.subSequence(start, end); + // else { + // CharSequence newLeft = left.subSequence(start, lLength); + // CharSequence newRight = right.subSequence(0, end - lLength); + // return new AppendableCharSeqeuence(newLeft, newRight); + // } + return toString().subSequence(start, end); } - public CharSequence getPhonemeText() { - return this.phonemeText; + @Override + public String toString() { + if (contentCache == null) { + StringBuilder sb = new StringBuilder(); + buildString(sb); + contentCache = sb.toString(); + // System.err.println("Materialized string: " + contentCache); + } + return contentCache; } + } - public Phoneme join(Phoneme right) { - return new Phoneme(new AppendableCharSeqeuence(this.phonemeText, right.phonemeText), this.languages.restrictTo(right.languages)); + public static class Phoneme implements PhonemeExpr, Comparable { + + private final CharSequence phonemeText; + private final Languages.LanguageSet languages; + + public Phoneme(CharSequence phonemeText, Languages.LanguageSet languages) { + this.phonemeText = phonemeText; + this.languages = languages; + } + + public Phoneme append(CharSequence str) { + return new Phoneme(new AppendableCharSeqeuence(this.phonemeText, str), this.languages); } public int compareTo(Phoneme o) { @@ -127,6 +172,22 @@ public class Rule { return 0; } + + public Languages.LanguageSet getLanguages() { + return this.languages; + } + + public Iterable getPhonemes() { + return Collections.singleton(this); + } + + public CharSequence getPhonemeText() { + return this.phonemeText; + } + + public Phoneme join(Phoneme right) { + return new Phoneme(new AppendableCharSeqeuence(this.phonemeText, right.phonemeText), this.languages.restrictTo(right.languages)); + } } public interface PhonemeExpr { @@ -145,6 +206,20 @@ public class Rule { } } + /** + * A minimal wrapper around the functionality of Matcher that we use, to allow for alternate implementations. + */ + public static interface RMatcher { + public boolean find(); + } + + /** + * A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations. + */ + public static interface RPattern { + public RMatcher matcher(CharSequence input); + } + public static final String ALL = "ALL"; private static final String DOUBLE_QUOTE = "\""; @@ -180,6 +255,15 @@ public class Rule { } } + private static boolean contains(CharSequence chars, char input) { + for (int i = 0; i < chars.length(); i++) { + if (chars.charAt(i) == input) { + return true; + } + } + return false; + } + private static String createResourceName(NameType nameType, RuleType rt, String lang) { return String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt", nameType.getName(), rt.getName(), lang); } @@ -206,6 +290,18 @@ public class Rule { return new Scanner(rulesIS, ResourceConstants.ENCODING); } + private static boolean endsWith(CharSequence input, CharSequence suffix) { + if (suffix.length() > input.length()) { + return false; + } + for (int i = input.length() - 1, j = suffix.length() - 1; j >= 0; i--, j--) { + if (input.charAt(i) != suffix.charAt(j)) { + return false; + } + } + return true; + } + /** * Gets rules for a combination of name type, rule type and languages. * @@ -359,123 +455,6 @@ public class Rule { return lines; } - private static String stripQuotes(String str) { - if (str.startsWith(DOUBLE_QUOTE)) { - str = str.substring(1); - } - - if (str.endsWith(DOUBLE_QUOTE)) { - str = str.substring(0, str.length() - 1); - } - - return str; - } - - private final RPattern lContext; - - private final String pattern; - - private final PhonemeExpr phoneme; - - private final RPattern rContext; - - /** - * Creates a new rule. - * - * @param pattern - * the pattern - * @param lContext - * the left context - * @param rContext - * the right context - * @param phoneme - * the resulting phoneme - */ - public Rule(String pattern, String lContext, String rContext, PhonemeExpr phoneme) { - this.pattern = pattern; - this.lContext = pattern(lContext + "$"); - this.rContext = pattern("^" + rContext); - this.phoneme = phoneme; - } - - /** - * Gets the left context. This is a regular expression that must match to the left of the pattern. - * - * @return the left context Pattern - */ - public RPattern getLContext() { - return this.lContext; - } - - /** - * Gets the pattern. This is a string-literal that must exactly match. - * - * @return the pattern - */ - public String getPattern() { - return this.pattern; - } - - /** - * Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match. - * - * @return the phoneme - */ - public PhonemeExpr getPhoneme() { - return this.phoneme; - } - - /** - * Gets the right context. This is a regular expression that must match to the right of the pattern. - * - * @return the right context Pattern - */ - public RPattern getRContext() { - return this.rContext; - } - - /** - * Decides if the pattern and context match the input starting at a position. - * - * @param input - * the input String - * @param i - * the int position within the input - * @return true if the pattern and left/right context match, false otherwise - */ - public boolean patternAndContextMatches(CharSequence input, int i) { - if (i < 0) - throw new IndexOutOfBoundsException("Can not match pattern at negative indexes"); - - int patternLength = this.pattern.length(); - int ipl = i + patternLength; - - if (ipl > input.length()) { - // not enough room for the pattern to match - return false; - } - - boolean patternMatches = input.subSequence(i, ipl).equals(this.pattern); - boolean rContextMatches = this.rContext.matcher(input.subSequence(ipl, input.length())).find(); - boolean lContextMatches = this.lContext.matcher(input.subSequence(0, i)).find(); - - return patternMatches && rContextMatches && lContextMatches; - } - - /** - * A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations. - */ - public static interface RPattern { - public RMatcher matcher(CharSequence input); - } - - /** - * A minimal wrapper around the functionality of Matcher that we use, to allow for alternate implementations. - */ - public static interface RMatcher { - public boolean find(); - } - /** * Attempt to compile the regex into direct string ops, falling back to Pattern and Matcher in the worst case. * @@ -628,85 +607,106 @@ public class Rule { return true; } - private static boolean endsWith(CharSequence input, CharSequence suffix) { - if (suffix.length() > input.length()) { - return false; + private static String stripQuotes(String str) { + if (str.startsWith(DOUBLE_QUOTE)) { + str = str.substring(1); } - for (int i = input.length() - 1, j = suffix.length() - 1; j >= 0; i--, j--) { - if (input.charAt(i) != suffix.charAt(j)) { - return false; - } + + if (str.endsWith(DOUBLE_QUOTE)) { + str = str.substring(0, str.length() - 1); } - return true; + + return str; } - private static boolean contains(CharSequence chars, char input) { - for (int i = 0; i < chars.length(); i++) { - if (chars.charAt(i) == input) { - return true; - } - } - return false; + private final RPattern lContext; + + private final String pattern; + + private final PhonemeExpr phoneme; + + private final RPattern rContext; + + /** + * Creates a new rule. + * + * @param pattern + * the pattern + * @param lContext + * the left context + * @param rContext + * the right context + * @param phoneme + * the resulting phoneme + */ + public Rule(String pattern, String lContext, String rContext, PhonemeExpr phoneme) { + this.pattern = pattern; + this.lContext = pattern(lContext + "$"); + this.rContext = pattern("^" + rContext); + this.phoneme = phoneme; } - private static class AppendableCharSeqeuence implements CharSequence { - - private final CharSequence left; - private final CharSequence right; - private final int length; - private String contentCache = null; + /** + * Gets the left context. This is a regular expression that must match to the left of the pattern. + * + * @return the left context Pattern + */ + public RPattern getLContext() { + return this.lContext; + } - private AppendableCharSeqeuence(CharSequence left, CharSequence right) { - this.left = left; - this.right = right; - this.length = left.length() + right.length(); - } + /** + * Gets the pattern. This is a string-literal that must exactly match. + * + * @return the pattern + */ + public String getPattern() { + return this.pattern; + } - public int length() { - return length; - } + /** + * Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match. + * + * @return the phoneme + */ + public PhonemeExpr getPhoneme() { + return this.phoneme; + } - public char charAt(int index) { - // int lLength = left.length(); - // if(index < lLength) return left.charAt(index); - // else return right.charAt(index - lLength); - return toString().charAt(index); - } + /** + * Gets the right context. This is a regular expression that must match to the right of the pattern. + * + * @return the right context Pattern + */ + public RPattern getRContext() { + return this.rContext; + } - public CharSequence subSequence(int start, int end) { - // int lLength = left.length(); - // if(start > lLength) return right.subSequence(start - lLength, end - lLength); - // else if(end <= lLength) return left.subSequence(start, end); - // else { - // CharSequence newLeft = left.subSequence(start, lLength); - // CharSequence newRight = right.subSequence(0, end - lLength); - // return new AppendableCharSeqeuence(newLeft, newRight); - // } - return toString().subSequence(start, end); - } + /** + * Decides if the pattern and context match the input starting at a position. + * + * @param input + * the input String + * @param i + * the int position within the input + * @return true if the pattern and left/right context match, false otherwise + */ + public boolean patternAndContextMatches(CharSequence input, int i) { + if (i < 0) + throw new IndexOutOfBoundsException("Can not match pattern at negative indexes"); - @Override - public String toString() { - if (contentCache == null) { - StringBuilder sb = new StringBuilder(); - buildString(sb); - contentCache = sb.toString(); - // System.err.println("Materialized string: " + contentCache); - } - return contentCache; - } + int patternLength = this.pattern.length(); + int ipl = i + patternLength; - public void buildString(StringBuilder sb) { - if (left instanceof AppendableCharSeqeuence) { - ((AppendableCharSeqeuence) left).buildString(sb); - } else { - sb.append(left); - } - if (right instanceof AppendableCharSeqeuence) { - ((AppendableCharSeqeuence) right).buildString(sb); - } else { - sb.append(right); - } + if (ipl > input.length()) { + // not enough room for the pattern to match + return false; } + + boolean patternMatches = input.subSequence(i, ipl).equals(this.pattern); + boolean rContextMatches = this.rContext.matcher(input.subSequence(ipl, input.length())).find(); + boolean lContextMatches = this.lContext.matcher(input.subSequence(0, i)).find(); + + return patternMatches && rContextMatches && lContextMatches; } }