commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ggreg...@apache.org
Subject svn commit: r1151603 - in /commons/proper/codec/trunk/src: java/org/apache/commons/codec/language/bm/PhoneticEngine.java java/org/apache/commons/codec/language/bm/Rule.java test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
Date Wed, 27 Jul 2011 19:47:48 GMT
Author: ggregory
Date: Wed Jul 27 19:47:48 2011
New Revision: 1151603

URL: http://svn.apache.org/viewvc?rev=1151603&view=rev
Log:
Fix failing test "gna": org.apache.commons.codec.language.bm.BeiderMorseEncoderTest.testEncodeGna()

Modified:
    commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
    commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
    commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java

Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java?rev=1151603&r1=1151602&r2=1151603&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
(original)
+++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
Wed Jul 27 19:47:48 2011
@@ -145,9 +145,11 @@ public class PhoneticEngine {
      * @return a phonetic representation of the input; a String containing '-'-separated
phonetic representations of the input
      */
     public String phoneticUtf8(String input, final Set<String> languageSet) {
-        List<Rule> rules = Rule.instance(this.nameType, RuleType.RULES, languageSet);
-        List<Rule> finalRules1 = Rule.instance(this.nameType, this.ruleType, "common");
-        List<Rule> finalRules2 = Rule.instance(this.nameType, this.ruleType, languageSet);
+        final List<Rule> rules = Rule.instance(this.nameType, RuleType.RULES, languageSet);
+        final List<Rule> finalRules1 = Rule.instance(this.nameType, this.ruleType,
"common");
+        final List<Rule> finalRules2 = Rule.instance(this.nameType, this.ruleType,
languageSet);
+        // System.err.println("Languages: " + languageSet);
+        // System.err.println("Rules: " + rules);
 
         // tidy the input
         // lower case is a locale-dependent operation
@@ -345,6 +347,11 @@ public class PhoneticEngine {
         String prefix = phonetic.substring(0, altStart);
         altStart++;
         int altEnd = phonetic.indexOf(')');
+
+        if (altEnd < altStart) {
+            throw new IllegalArgumentException("Phonetic string has a close-bracket before
the first open-bracket");
+        }
+        
         String altString = phonetic.substring(altStart, altEnd);
         altEnd++;
         String suffix = phonetic.substring(altEnd);

Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java?rev=1151603&r1=1151602&r2=1151603&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java (original)
+++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java Wed
Jul 27 19:47:48 2011
@@ -27,6 +27,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Scanner;
 import java.util.Set;
+import java.util.Stack;
 import java.util.regex.Pattern;
 
 /**
@@ -77,10 +78,10 @@ import java.util.regex.Pattern;
  * @since 2.0
  */
 public class Rule {
-    private static final String DOUBLE_QUOTE = "\"";
-
     public static final String ALL = "ALL";
 
+    private static final String DOUBLE_QUOTE = "\"";
+
     private static final String HASH_INCLUDE = "#include";
 
     private static final Map<NameType, Map<RuleType, Map<String, List<Rule>>>>
RULES = new EnumMap<NameType, Map<RuleType, Map<String, List<Rule>>>>(
@@ -95,10 +96,14 @@ public class Rule {
 
                 Languages ls = Languages.instance(s);
                 for (String l : ls.getLanguages()) {
-                    rs.put(l, parseRules(mkScanner(s, rt, l)));
+                    try {
+                        rs.put(l, parseRules(createScanner(s, rt, l)));
+                    } catch (IllegalStateException e) {
+                        throw new IllegalStateException("Problem processing " + createResourceName(s,
rt, l), e);
+                    }
                 }
                 if (!rt.equals(RuleType.RULES)) {
-                    rs.put("common", parseRules(mkScanner(s, rt, "common")));
+                    rs.put("common", parseRules(createScanner(s, rt, "common")));
                 }
 
                 rts.put(rt, Collections.unmodifiableMap(rs));
@@ -108,6 +113,32 @@ public class Rule {
         }
     }
 
+    private static String createResourceName(NameType nameType, RuleType rt, String lang)
{
+        return String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt", nameType.getName(),
rt.getName(), lang);
+    }
+
+    private static Scanner createScanner(NameType nameType, RuleType rt, String lang) {
+        String resName = createResourceName(nameType, rt, lang);
+        InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName);
+
+        if (rulesIS == null) {
+            throw new IllegalArgumentException("Unable to load resource: " + resName);
+        }
+
+        return new Scanner(rulesIS, ResourceConstants.ENCODING);
+    }
+
+    private static Scanner createScanner(String lang) {
+        String resName = String.format("org/apache/commons/codec/language/bm/%s.txt", lang);
+        InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName);
+
+        if (rulesIS == null) {
+            throw new IllegalArgumentException("Unable to load resource: " + resName);
+        }
+
+        return new Scanner(rulesIS, ResourceConstants.ENCODING);
+    }
+
     /**
      * Gets rules for a combination of name type, rule type and languages.
      * 
@@ -148,33 +179,13 @@ public class Rule {
         return rules;
     }
 
-    private static Scanner mkScanner(NameType nameType, RuleType rt, String lang) {
-        String resName = String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt",
nameType.getName(), rt.getName(), lang);
-        InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName);
-
-        if (rulesIS == null) {
-            throw new IllegalArgumentException("Unable to load resource: " + resName);
-        }
-
-        return new Scanner(rulesIS, ResourceConstants.ENCODING);
-    }
-
-    private static Scanner mkScanner(String lang) {
-        String resName = String.format("org/apache/commons/codec/language/bm/%s.txt", lang);
-        InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName);
-
-        if (rulesIS == null) {
-            throw new IllegalArgumentException("Unable to load resource: " + resName);
-        }
-
-        return new Scanner(rulesIS, ResourceConstants.ENCODING);
-    }
-
     private static List<Rule> parseRules(Scanner scanner) {
         List<Rule> lines = new ArrayList<Rule>();
+        int currentLine = 0;
 
         boolean inMultilineComment = false;
         while (scanner.hasNextLine()) {
+            currentLine++;
             String rawLine = scanner.nextLine();
             String line = rawLine;
 
@@ -206,7 +217,7 @@ public class Rule {
                         if (incl.contains(" ")) {
                             System.err.println("Warining: malformed import statement: " +
rawLine);
                         } else {
-                            lines.addAll(parseRules(mkScanner(incl)));
+                            lines.addAll(parseRules(createScanner(incl)));
                         }
                     } else {
                         // rule
@@ -218,6 +229,11 @@ public class Rule {
                             String lCon = stripQuotes(parts[1]);
                             String rCon = stripQuotes(parts[2]);
                             String ph = stripQuotes(parts[3]);
+                            try {
+                                validatePhenome(ph);
+                            } catch (IllegalArgumentException e) {
+                                throw new IllegalStateException("Problem parsing line " +
currentLine, e);
+                            }
                             Rule r = new Rule(pat, lCon, rCon, ph, Collections.<String>
emptySet(), ""); // guessing last 2 parameters
                             lines.add(r);
                         }
@@ -241,6 +257,40 @@ public class Rule {
         return str;
     }
 
+    private static void validatePhenome(CharSequence ph) {
+        Stack<Character> stack = new Stack<Character>();
+        for (int i = 0; i < ph.length(); i++) {
+            switch (ph.charAt(i)) {
+            case '(':
+                stack.push('(');
+                break;
+            case '[':
+                stack.push('[');
+                break;
+            case ')': {
+                if (stack.isEmpty())
+                    throw new IllegalArgumentException("Closing ')' at " + i + " without
an opening '('" + " in " + ph);
+                char c = stack.pop();
+                if (c != '(')
+                    throw new IllegalArgumentException("Closing ')' does not pair with opening
'" + c + "' at " + i + " in " + ph);
+                break;
+            }
+            case ']': {
+                if (stack.isEmpty())
+                    throw new IllegalArgumentException("Closing ']' at " + i + " without
an opening '['" + " in " + ph);
+                char c = stack.pop();
+                if (c != '[')
+                    throw new IllegalArgumentException("Closing ']' does not pair with opening
'" + c + "' at " + i + " in " + ph);
+                break;
+            }
+            default:
+                break;
+            }
+        }
+        if (!stack.isEmpty())
+            throw new IllegalArgumentException("Bracket(s) opened without corresponding closes:
" + stack + " in " + ph);
+    }
+
     private final Set<String> languages;
 
     private final Pattern lContext;

Modified: commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java?rev=1151603&r1=1151602&r2=1151603&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
(original)
+++ commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
Wed Jul 27 19:47:48 2011
@@ -46,7 +46,7 @@ public class BeiderMorseEncoderTest exte
      * 
      * @throws EncoderException
      */
-    @Ignore
+    // @Ignore
     @Test
     public void testEncodeGna() throws EncoderException {
         BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
@@ -71,7 +71,7 @@ public class BeiderMorseEncoderTest exte
     }
 
     @Ignore
-    @Test
+    @Test(timeout = 10000L)
     public void testLongestEnglishSurname() throws EncoderException {
         BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
         bmpm.setNameType(NameType.GENERIC);
@@ -113,7 +113,7 @@ public class BeiderMorseEncoderTest exte
     }
 
     @Ignore
-    @Test
+    @Test(timeout = 10000L)
     public void testSpeedCheck() throws EncoderException {
         char[] chars = new char[] { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'o', 'u' };
         BeiderMorseEncoder bmpm = new BeiderMorseEncoder();



Mime
View raw message