commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ggreg...@apache.org
Subject svn commit: r1155135 - in /commons/proper/codec/trunk/src: java/org/apache/commons/codec/language/bm/ test/org/apache/commons/codec/language/bm/
Date Mon, 08 Aug 2011 22:47:26 GMT
Author: ggregory
Date: Mon Aug  8 22:47:24 2011
New Revision: 1155135

URL: http://svn.apache.org/viewvc?rev=1155135&view=rev
Log:
[CODEC-125] Implement a Beider-Morse phonetic matching codec. Applied patch https://issues.apache.org/jira/secure/attachment/12489755/fightingMemoryChurn.patch

Modified:
    commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java
    commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java
    commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/NameType.java
    commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
    commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
    commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
    commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java

Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java?rev=1155135&r1=1155134&r2=1155135&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java (original)
+++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Lang.java Mon
Aug  8 22:47:24 2011
@@ -24,6 +24,7 @@ import java.util.Collections;
 import java.util.EnumMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Scanner;
 import java.util.Set;
@@ -71,7 +72,7 @@ import java.util.regex.Pattern;
  */
 public class Lang {
 
-    private static class LangRule {
+    private static final class LangRule {
         private final boolean acceptOnMatch;
         private final Set<String> languages;
         private final Pattern pattern;
@@ -199,7 +200,7 @@ public class Lang {
      */
     public String guessLanguage(String text) {
         Languages.LanguageSet ls = guessLanguages(text);
-        return ls.isSingleton() ? ls.getAny() : Languages.ANY; 
+        return ls.isSingleton() ? ls.getAny() : Languages.ANY;
     }
 
     /**
@@ -210,7 +211,7 @@ public class Lang {
      * @return a Set of Strings of language names that are potential matches for the input
word
      */
     public Languages.LanguageSet guessLanguages(String input) {
-        String text = input.toLowerCase(); // todo: locale?
+        String text = input.toLowerCase(Locale.ENGLISH);
         // System.out.println("Testing text: '" + text + "'");
 
         Set<String> langs = new HashSet<String>(this.languages.getLanguages());

Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java?rev=1155135&r1=1155134&r2=1155135&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java
(original)
+++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Languages.java
Mon Aug  8 22:47:24 2011
@@ -58,9 +58,9 @@ public class Languages {
      * A set of languages.
      */
     public static abstract class LanguageSet {
-        
+
         public static LanguageSet from(Set<String> langs) {
-            return langs.isEmpty() ? NO_LANGUAGES : new SomeLanguages(langs);  
+            return langs.isEmpty() ? NO_LANGUAGES : new SomeLanguages(langs);
         }
 
         public abstract boolean contains(String language);
@@ -77,7 +77,7 @@ public class Languages {
     /**
      * Some languages, explicitly enumerated.
      */
-    public static class SomeLanguages extends LanguageSet {
+    public static final class SomeLanguages extends LanguageSet {
         private final Set<String> languages;
 
         private SomeLanguages(Set<String> languages) {
@@ -116,9 +116,13 @@ public class Languages {
                 return this;
             } else {
                 SomeLanguages sl = (SomeLanguages) other;
-                Set<String> ls = new HashSet<String>(this.languages);
-                ls.retainAll(sl.languages);
-                return from(ls);
+                if (sl.languages.containsAll(languages)) {
+                    return this;
+                } else {
+                    Set<String> ls = new HashSet<String>(this.languages);
+                    ls.retainAll(sl.languages);
+                    return from(ls);
+                }
             }
         }
 

Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/NameType.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/NameType.java?rev=1155135&r1=1155134&r2=1155135&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/NameType.java
(original)
+++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/NameType.java
Mon Aug  8 22:47:24 2011
@@ -24,13 +24,13 @@ package org.apache.commons.codec.languag
  * @since 2.0
  */
 public enum NameType {
-    
+
     /** Ashkenazi family names */
     ASHKENAZI("ash"),
-    
+
     /** Generic names and words */
     GENERIC("gen"),
-    
+
     /** Sephardic family names */
     SEPHARDIC("sep");
 

Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java?rev=1155135&r1=1155134&r2=1155135&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
(original)
+++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/PhoneticEngine.java
Mon Aug  8 22:47:24 2011
@@ -51,7 +51,7 @@ import java.util.TreeSet;
  */
 public class PhoneticEngine {
 
-    static class PhonemeBuilder {
+    static final class PhonemeBuilder {
 
         public static PhonemeBuilder empty(Languages.LanguageSet languages) {
             return new PhonemeBuilder(Collections.singleton(new Rule.Phoneme("", languages)));
@@ -108,7 +108,7 @@ public class PhoneticEngine {
         }
     }
 
-    private static class RulesApplication {
+    private static final class RulesApplication {
         private final List<Rule> finalRules;
         private final CharSequence input;
 
@@ -176,6 +176,32 @@ public class PhoneticEngine {
                 "de la", "della", "des", "di", "do", "dos", "du", "van", "von"))));
     }
 
+    private static CharSequence cacheSubSequence(final CharSequence cached) {
+        // return cached;
+        final CharSequence[][] cache = new CharSequence[cached.length()][cached.length()];
+        return new CharSequence() {
+            public char charAt(int index) {
+                return cached.charAt(index);
+            }
+
+            public int length() {
+                return cached.length();
+            }
+
+            public CharSequence subSequence(int start, int end) {
+                if (start == end)
+                    return "";
+
+                CharSequence res = cache[start][end - 1];
+                if (res == null) {
+                    res = cached.subSequence(start, end);
+                    cache[start][end - 1] = res;
+                }
+                return res;
+            }
+        };
+    }
+
     private static String join(Iterable<String> strings, String sep) {
         StringBuilder sb = new StringBuilder();
         Iterator<String> si = strings.iterator();
@@ -229,7 +255,7 @@ public class PhoneticEngine {
 
         for (Rule.Phoneme phoneme : phonemeBuilder.getPhonemes()) {
             PhonemeBuilder subBuilder = PhonemeBuilder.empty(phoneme.getLanguages());
-            CharSequence phonemeText = phoneme.getPhonemeText();
+            CharSequence phonemeText = cacheSubSequence(phoneme.getPhonemeText());
             // System.err.println("Expanding: " + phonemeText);
 
             for (int i = 0; i < phonemeText.length();) {
@@ -248,7 +274,7 @@ public class PhoneticEngine {
             }
 
             // System.err.println("Expanded to: " + subBuilder.makeString());
-
+            // System.err.println("phenomes in collection of type: " + subBuilder.getPhonemes().getClass());
             phonemes.addAll(subBuilder.getPhonemes());
         }
 
@@ -345,8 +371,9 @@ public class PhoneticEngine {
         PhonemeBuilder phonemeBuilder = PhonemeBuilder.empty(languageSet);
 
         // loop over each char in the input - we will handle the increment manually
-        for (int i = 0; i < input.length();) {
-            RulesApplication rulesApplication = new RulesApplication(rules, input, phonemeBuilder,
i).invoke();
+        CharSequence inputCache = cacheSubSequence(input);
+        for (int i = 0; i < inputCache.length();) {
+            RulesApplication rulesApplication = new RulesApplication(rules, inputCache, phonemeBuilder,
i).invoke();
             i = rulesApplication.getI();
             phonemeBuilder = rulesApplication.getPhonemeBuilder();
             // System.err.println(input + " " + i + ": " + phonemeBuilder.makeString());

Modified: commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java?rev=1155135&r1=1155134&r2=1155135&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java (original)
+++ commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java Mon
Aug  8 22:47:24 2011
@@ -80,68 +80,7 @@ import java.util.regex.Pattern;
  */
 public class Rule {
 
-    private static class AppendableCharSeqeuence implements CharSequence {
-
-        private final CharSequence left;
-        private final CharSequence right;
-        private final int length;
-        private String contentCache = null;
-
-        private AppendableCharSeqeuence(CharSequence left, CharSequence right) {
-            this.left = left;
-            this.right = right;
-            this.length = left.length() + right.length();
-        }
-
-        public void buildString(StringBuilder sb) {
-            if (left instanceof AppendableCharSeqeuence) {
-                ((AppendableCharSeqeuence) left).buildString(sb);
-            } else {
-                sb.append(left);
-            }
-            if (right instanceof AppendableCharSeqeuence) {
-                ((AppendableCharSeqeuence) right).buildString(sb);
-            } else {
-                sb.append(right);
-            }
-        }
-
-        public char charAt(int index) {
-            // int lLength = left.length();
-            // if(index < lLength) return left.charAt(index);
-            // else return right.charAt(index - lLength);
-            return toString().charAt(index);
-        }
-
-        public int length() {
-            return length;
-        }
-
-        public CharSequence subSequence(int start, int end) {
-            // int lLength = left.length();
-            // if(start > lLength) return right.subSequence(start - lLength, end - lLength);
-            // else if(end <= lLength) return left.subSequence(start, end);
-            // else {
-            // CharSequence newLeft = left.subSequence(start, lLength);
-            // CharSequence newRight = right.subSequence(0, end - lLength);
-            // return new AppendableCharSeqeuence(newLeft, newRight);
-            // }
-            return toString().subSequence(start, end);
-        }
-
-        @Override
-        public String toString() {
-            if (contentCache == null) {
-                StringBuilder sb = new StringBuilder();
-                buildString(sb);
-                contentCache = sb.toString();
-                // System.err.println("Materialized string: " + contentCache);
-            }
-            return contentCache;
-        }
-    }
-
-    public static class Phoneme implements PhonemeExpr, Comparable<Phoneme> {
+    public static final class Phoneme implements PhonemeExpr, Comparable<Phoneme> {
 
         private final CharSequence phonemeText;
         private final Languages.LanguageSet languages;
@@ -152,7 +91,7 @@ public class Rule {
         }
 
         public Phoneme append(CharSequence str) {
-            return new Phoneme(new AppendableCharSeqeuence(this.phonemeText, str), this.languages);
+            return new Phoneme(this.phonemeText.toString() + str.toString(), this.languages);
         }
 
         public int compareTo(Phoneme o) {
@@ -186,7 +125,7 @@ public class Rule {
         }
 
         public Phoneme join(Phoneme right) {
-            return new Phoneme(new AppendableCharSeqeuence(this.phonemeText, right.phonemeText),
this.languages.restrictTo(right.languages));
+            return new Phoneme(this.phonemeText.toString() + right.phonemeText.toString(),
this.languages.restrictTo(right.languages));
         }
     }
 
@@ -194,7 +133,7 @@ public class Rule {
         Iterable<Phoneme> getPhonemes();
     }
 
-    public static class PhonemeList implements PhonemeExpr {
+    public static final class PhonemeList implements PhonemeExpr {
         private final List<Phoneme> phonemes;
 
         public PhonemeList(List<Phoneme> phonemes) {
@@ -207,44 +146,17 @@ public class Rule {
     }
 
     /**
-     * A minimal wrapper around the functionality of Matcher that we use, to allow for alternate
implementations.
+     * A minimal wrapper around the functionality of Pattern that we use, to allow for alternate
implementations.
      */
-    public static interface RMatcher {
-        boolean find();
+    public static interface RPattern {
+        boolean isMatch(CharSequence input);
     }
 
-    /**
-     * Always returns true.
-     */
-    private static class TrueRMatcher implements RMatcher {
-
-        private static TrueRMatcher INSTANCE = new TrueRMatcher();
-
-        /**
-         * Always returns true.
-         */
-        public boolean find() {
+    public static final RPattern ALL_STRINGS_RMATCHER = new RPattern() {
+        public boolean isMatch(CharSequence input) {
             return true;
         }
-
-    }
-
-    private static class AllStringsRMatcher implements RPattern {
-
-        private static AllStringsRMatcher INSTANCE = new AllStringsRMatcher();
-
-        public RMatcher matcher(CharSequence input) {
-            return TrueRMatcher.INSTANCE;
-        }
-
-    }
-
-    /**
-     * A minimal wrapper around the functionality of Pattern that we use, to allow for alternate
implementations.
-     */
-    public static interface RPattern {
-        RMatcher matcher(CharSequence input);
-    }
+    };
 
     public static final String ALL = "ALL";
 
@@ -500,48 +412,32 @@ public class Rule {
                 if (content.length() == 0) {
                     // empty
                     return new RPattern() {
-                        public RMatcher matcher(final CharSequence input) {
-                            return new RMatcher() {
-                                public boolean find() {
-                                    return input.length() == 0;
-                                }
-                            };
+                        public boolean isMatch(CharSequence input) {
+                            return input.length() == 0;
                         }
                     };
                 } else {
                     return new RPattern() {
-                        public RMatcher matcher(final CharSequence input) {
-                            return new RMatcher() {
-                                public boolean find() {
-                                    return input.equals(content);
-                                }
-                            };
+                        public boolean isMatch(CharSequence input) {
+                            return input.equals(content);
                         }
                     };
                 }
             } else if ((startsWith || endsWith) && content.length() == 0) {
                 // matches every string
-                return AllStringsRMatcher.INSTANCE;
+                return ALL_STRINGS_RMATCHER;
             } else if (startsWith) {
                 // matches from start
                 return new RPattern() {
-                    public RMatcher matcher(final CharSequence input) {
-                        return new RMatcher() {
-                            public boolean find() {
-                                return startsWith(input, content);
-                            }
-                        };
+                    public boolean isMatch(CharSequence input) {
+                        return startsWith(input, content);
                     }
                 };
             } else if (endsWith) {
                 // matches from start
                 return new RPattern() {
-                    public RMatcher matcher(final CharSequence input) {
-                        return new RMatcher() {
-                            public boolean find() {
-                                return endsWith(input, content);
-                            }
-                        };
+                    public boolean isMatch(CharSequence input) {
+                        return endsWith(input, content);
                     }
                 };
             }
@@ -563,34 +459,22 @@ public class Rule {
                     if (startsWith && endsWith) {
                         // exact match
                         return new RPattern() {
-                            public RMatcher matcher(final CharSequence input) {
-                                return new RMatcher() {
-                                    public boolean find() {
-                                        return input.length() == 1 && (contains(bContent,
input.charAt(0)) == shouldMatch);
-                                    }
-                                };
+                            public boolean isMatch(CharSequence input) {
+                                return input.length() == 1 && (contains(bContent,
input.charAt(0)) == shouldMatch);
                             }
                         };
                     } else if (startsWith) {
                         // first char
                         return new RPattern() {
-                            public RMatcher matcher(final CharSequence input) {
-                                return new RMatcher() {
-                                    public boolean find() {
-                                        return input.length() > 0 && (contains(bContent,
input.charAt(0)) == shouldMatch);
-                                    }
-                                };
+                            public boolean isMatch(CharSequence input) {
+                                return input.length() > 0 && (contains(bContent,
input.charAt(0)) == shouldMatch);
                             }
                         };
                     } else if (endsWith) {
                         // last char
                         return new RPattern() {
-                            public RMatcher matcher(final CharSequence input) {
-                                return new RMatcher() {
-                                    public boolean find() {
-                                        return input.length() > 0 && (contains(bContent,
input.charAt(input.length() - 1)) == shouldMatch);
-                                    }
-                                };
+                            public boolean isMatch(CharSequence input) {
+                                return input.length() > 0 && (contains(bContent,
input.charAt(input.length() - 1)) == shouldMatch);
                             }
                         };
                     }
@@ -602,13 +486,9 @@ public class Rule {
         return new RPattern() {
             Pattern pattern = Pattern.compile(regex);
 
-            public RMatcher matcher(CharSequence input) {
-                final Matcher matcher = pattern.matcher(input);
-                return new RMatcher() {
-                    public boolean find() {
-                        return matcher.find();
-                    }
-                };
+            public boolean isMatch(CharSequence input) {
+                Matcher matcher = pattern.matcher(input);
+                return matcher.find();
             }
         };
     }
@@ -723,8 +603,8 @@ public class Rule {
         }
 
         boolean patternMatches = input.subSequence(i, ipl).equals(this.pattern);
-        boolean rContextMatches = this.rContext.matcher(input.subSequence(ipl, input.length())).find();
-        boolean lContextMatches = this.lContext.matcher(input.subSequence(0, i)).find();
+        boolean rContextMatches = this.rContext.isMatch(input.subSequence(ipl, input.length()));
+        boolean lContextMatches = this.lContext.isMatch(input.subSequence(0, i));
 
         return patternMatches && rContextMatches && lContextMatches;
     }

Modified: commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java?rev=1155135&r1=1155134&r2=1155135&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
(original)
+++ commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
Mon Aug  8 22:47:24 2011
@@ -19,8 +19,6 @@ package org.apache.commons.codec.languag
 
 import static org.junit.Assert.assertEquals;
 
-import java.util.Random;
-
 import org.apache.commons.codec.EncoderException;
 import org.apache.commons.codec.StringEncoder;
 import org.apache.commons.codec.StringEncoderAbstractTest;
@@ -165,7 +163,7 @@ public class BeiderMorseEncoderTest exte
     }
 
     /**
-     * (Un)luckily, the worse performing test because of the data in {@link TEST_CHARS}
+     * (Un)luckily, the worse performing test because of the data in {@link #TEST_CHARS}
      * 
      * @throws EncoderException
      */
@@ -183,34 +181,23 @@ public class BeiderMorseEncoderTest exte
         }
     }
 
-    /**
-     * Another odd performance edge case.
-     * 
-     * @throws EncoderException
-     */
-    @Test(/* timeout = 20000L */)
-    public void testSpeedCheckAZ() throws EncoderException {
+    @Test
+    public void testSpeedCheck2() throws EncoderException {
         BeiderMorseEncoder bmpm = this.createGenericApproxEncoder();
-        String phrase = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
+        String phrase = "ItstheendoftheworldasweknowitandIfeelfine";
+
         for (int i = 1; i <= phrase.length(); i++) {
             bmpm.encode(phrase.subSequence(0, i));
         }
     }
 
-    /**
-     * Runs between 1.6 and 13 seconds at length 40 for me (Gary Gregory, 2011/08/06)
-     * 
-     * @throws EncoderException
-     */
-    @Test(/* timeout = 20000L */)
-    public void testSpeedCheckRandom() throws EncoderException {
+    @Test
+    public void testSpeedCheck3() throws EncoderException {
         BeiderMorseEncoder bmpm = this.createGenericApproxEncoder();
-        StringBuffer stringBuffer = new StringBuffer();
-        Random rand = new Random();
-        stringBuffer.append(TEST_CHARS[rand.nextInt(TEST_CHARS.length)]);
-        for (int i = 0; i < 40; i++) {
-            bmpm.encode(stringBuffer.toString());
-            stringBuffer.append(TEST_CHARS[rand.nextInt(TEST_CHARS.length)]);
+        String phrase = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
+
+        for (int i = 1; i <= phrase.length(); i++) {
+            bmpm.encode(phrase.subSequence(0, i));
         }
     }
 }

Modified: commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java
URL: http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java?rev=1155135&r1=1155134&r2=1155135&view=diff
==============================================================================
--- commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java
(original)
+++ commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/RuleTest.java
Mon Aug  8 22:47:24 2011
@@ -80,4 +80,68 @@ public class RuleTest {
             }
         }
     }
+
+    @Test
+    public void subSequenceWorks() {
+        // AppendableCharSequence is private to Rule. We can only make it through a Phoneme.
+
+        Rule.Phoneme a = new Rule.Phoneme("a", null);
+        Rule.Phoneme b = new Rule.Phoneme("b", null);
+        Rule.Phoneme cd = new Rule.Phoneme("cd", null);
+        Rule.Phoneme ef = new Rule.Phoneme("ef", null);
+        Rule.Phoneme ghi = new Rule.Phoneme("ghi", null);
+        Rule.Phoneme jkl = new Rule.Phoneme("jkl", null);
+
+        assertEquals('a', a.getPhonemeText().charAt(0));
+        assertEquals('b', b.getPhonemeText().charAt(0));
+        assertEquals('c', cd.getPhonemeText().charAt(0));
+        assertEquals('d', cd.getPhonemeText().charAt(1));
+        assertEquals('e', ef.getPhonemeText().charAt(0));
+        assertEquals('f', ef.getPhonemeText().charAt(1));
+        assertEquals('g', ghi.getPhonemeText().charAt(0));
+        assertEquals('h', ghi.getPhonemeText().charAt(1));
+        assertEquals('i', ghi.getPhonemeText().charAt(2));
+        assertEquals('j', jkl.getPhonemeText().charAt(0));
+        assertEquals('k', jkl.getPhonemeText().charAt(1));
+        assertEquals('l', jkl.getPhonemeText().charAt(2));
+
+        Rule.Phoneme a_b = a.append(b.getPhonemeText());
+        assertEquals('a', a_b.getPhonemeText().charAt(0));
+        assertEquals('b', a_b.getPhonemeText().charAt(1));
+        assertEquals("ab", a_b.getPhonemeText().subSequence(0, 2).toString());
+        assertEquals("a", a_b.getPhonemeText().subSequence(0, 1).toString());
+        assertEquals("b", a_b.getPhonemeText().subSequence(1, 2).toString());
+
+        Rule.Phoneme cd_ef = cd.append(ef.getPhonemeText());
+        assertEquals('c', cd_ef.getPhonemeText().charAt(0));
+        assertEquals('d', cd_ef.getPhonemeText().charAt(1));
+        assertEquals('e', cd_ef.getPhonemeText().charAt(2));
+        assertEquals('f', cd_ef.getPhonemeText().charAt(3));
+        assertEquals("c", cd_ef.getPhonemeText().subSequence(0, 1).toString());
+        assertEquals("d", cd_ef.getPhonemeText().subSequence(1, 2).toString());
+        assertEquals("e", cd_ef.getPhonemeText().subSequence(2, 3).toString());
+        assertEquals("f", cd_ef.getPhonemeText().subSequence(3, 4).toString());
+        assertEquals("cd", cd_ef.getPhonemeText().subSequence(0, 2).toString());
+        assertEquals("de", cd_ef.getPhonemeText().subSequence(1, 3).toString());
+        assertEquals("ef", cd_ef.getPhonemeText().subSequence(2, 4).toString());
+        assertEquals("cde", cd_ef.getPhonemeText().subSequence(0, 3).toString());
+        assertEquals("def", cd_ef.getPhonemeText().subSequence(1, 4).toString());
+        assertEquals("cdef", cd_ef.getPhonemeText().subSequence(0, 4).toString());
+
+        Rule.Phoneme a_b_cd = a.append(b.getPhonemeText()).append(cd.getPhonemeText());
+        assertEquals('a', a_b_cd.getPhonemeText().charAt(0));
+        assertEquals('b', a_b_cd.getPhonemeText().charAt(1));
+        assertEquals('c', a_b_cd.getPhonemeText().charAt(2));
+        assertEquals('d', a_b_cd.getPhonemeText().charAt(3));
+        assertEquals("a", a_b_cd.getPhonemeText().subSequence(0, 1).toString());
+        assertEquals("b", a_b_cd.getPhonemeText().subSequence(1, 2).toString());
+        assertEquals("c", a_b_cd.getPhonemeText().subSequence(2, 3).toString());
+        assertEquals("d", a_b_cd.getPhonemeText().subSequence(3, 4).toString());
+        assertEquals("ab", a_b_cd.getPhonemeText().subSequence(0, 2).toString());
+        assertEquals("bc", a_b_cd.getPhonemeText().subSequence(1, 3).toString());
+        assertEquals("cd", a_b_cd.getPhonemeText().subSequence(2, 4).toString());
+        assertEquals("abc", a_b_cd.getPhonemeText().subSequence(0, 3).toString());
+        assertEquals("bcd", a_b_cd.getPhonemeText().subSequence(1, 4).toString());
+        assertEquals("abcd", a_b_cd.getPhonemeText().subSequence(0, 4).toString());
+    }
 }



Mime
View raw message