commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From scolebou...@apache.org
Subject cvs commit: jakarta-commons/lang/src/test/org/apache/commons/lang CharSetTest.java
Date Mon, 04 Aug 2003 00:50:14 GMT
scolebourne    2003/08/03 17:50:14

  Modified:    lang/src/java/org/apache/commons/lang CharSetUtils.java
                        CharSet.java
               lang/src/test/org/apache/commons/lang CharSetTest.java
  Log:
  Improve CharSet testing
  bug 22095, from Phil Steitz
  Rewrite CharSet parsing, much neater and simpler now
  
  Revision  Changes    Path
  1.21      +7 -7      jakarta-commons/lang/src/java/org/apache/commons/lang/CharSetUtils.java
  
  Index: CharSetUtils.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/lang/src/java/org/apache/commons/lang/CharSetUtils.java,v
  retrieving revision 1.20
  retrieving revision 1.21
  diff -u -r1.20 -r1.21
  --- CharSetUtils.java	2 Aug 2003 18:18:33 -0000	1.20
  +++ CharSetUtils.java	4 Aug 2003 00:50:14 -0000	1.21
  @@ -62,6 +62,7 @@
    * 
    * @author <a href="bayard@generationjava.com">Henri Yandell</a>
    * @author Stephen Colebourne
  + * @author Phil Steitz
    * @since 1.0
    * @version $Id$
    */
  @@ -80,13 +81,12 @@
       // Factory
       //-----------------------------------------------------------------------
       /**
  -     * <p>Creates a <code>CharSetUtils</code> object which allows a certain
amount of
  +     * <p>Creates a <code>CharSet</code> instance which allows a certain
amount of
        * set logic to be performed.</p>
        * <p>The syntax is:</p>
        * <ul>
        *  <li>&quot;aeio&quot; which implies 'a','e',..</li>
  -     *  <li>&quot;^e&quot; implies not e. However it only negates, it's not
  -     *   a set in itself due to the size of that set in unicode.</li>
  +     *  <li>&quot;^e&quot; implies not e.</li>
        *  <li>&quot;ej-m&quot; implies e,j->m. e,j,k,l,m.</li>
        * </ul>
        * 
  @@ -94,6 +94,7 @@
        * CharSetUtils.evaluateSet(null)  = null
        * CharSetUtils.evaluateSet("")    = CharSet matching nothing
        * CharSetUtils.evaluateSet("a-e") = CharSet matching a,b,c,d,e
  +     * CharSetUtils.evaluateSet("abe-g") = CharSet matching a,b,e,f,g
        * </pre>
        *
        * @param set  the set, may be null
  @@ -109,13 +110,12 @@
       }
   
       /**
  -     * <p>Creates a <code>CharSetUtils</code> object which allows a certain
amount of
  +     * <p>Creates a <code>CharSet</code> instance which allows a certain
amount of
        * set logic to be performed.</p>
        * <p>The syntax is:</p>
        * <ul>
        *  <li>&quot;aeio&quot; which implies 'a','e',..</li>
  -     *  <li>&quot;^e&quot; implies not e. However it only negates, it's not
  -     *   a set in itself due to the size of that set in unicode.</li>
  +     *  <li>&quot;^e&quot; implies not e.</li>
        *  <li>&quot;ej-m&quot; implies e,j->m. e,j,k,l,m.</li>
        * </ul>
        * 
  
  
  
  1.11      +39 -67    jakarta-commons/lang/src/java/org/apache/commons/lang/CharSet.java
  
  Index: CharSet.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/lang/src/java/org/apache/commons/lang/CharSet.java,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- CharSet.java	2 Aug 2003 18:18:33 -0000	1.10
  +++ CharSet.java	4 Aug 2003 00:50:14 -0000	1.11
  @@ -67,6 +67,7 @@
    *
    * @author Henri Yandell
    * @author Stephen Colebourne
  + * @author Phil Steitz
    * @since 1.0
    * @version $Id$
    */
  @@ -126,10 +127,26 @@
        *  - set containing all the characters from the individual sets</li>
        * </ul>
        * 
  +     * <p>The matching order is:</p>
  +     * <ol
  +     *  <li>Negated multi character range, such as "^a-e"
  +     *  <li>Ordinary multi character range, such as "a-e"
  +     *  <li>Negated single character, such as "^a"
  +     *  <li>Ordinary single character, such as "a"
  +     * </ol>
  +     * <p>Matching works left to right. Once a match is found the
  +     * search starts again from the next character.</p>
  +     * 
        * <p>If the same range is defined twice using the same syntax, only
        * one range will be kept.
  -     * Thus, "a-ca-c" creates only one range of "a-c".
  -     * However, "a-cabc" creates two ranges as they are defined differently.</p>
  +     * Thus, "a-ca-c" creates only one range of "a-c".</p>
  +     *
  +     * <p>If the start and end of a range are in the wrong order,
  +     * they are reversed. Thus "a-e" is the same as "e-a".
  +     * As a result, "a-ee-a" would create only one range,
  +     * as the "a-e" and "e-a" are the same.</p>
  +     *
  +     * <p>The set of characters represented is the union of the specified ranges.</p>
        *
        * <p>All CharSet objects returned by this method will be immutable.</p>
        * 
  @@ -180,71 +197,26 @@
           }
   
           int len = str.length();
  -        switch (len) {
  -            case 0:
  -            // do nothing
  -            break;
  -            
  -            case 1:
  -            set.add(new CharRange(str.charAt(0)));
  -            break;
  -            
  -            default:
  -            int start = -1;
  -            boolean negated = false;
  -            for (int i = 0; i < len; i++) {
  -                char ch = str.charAt(i);
  -                if (ch == '-') {
  -                    if (start == -1) {
  -                        // dash found not as range separator
  -                        // treat as ordinary start block char
  -                        start = ch; 
  -                    } else if (i == len - 1) {
  -                        // dash is last character, store two single characters
  -                        set.add(new CharRange((char) start, (char) start, negated));
  -                        set.add(DASH);
  -                        start = -1;
  -                        negated = false;
  -                    } else {
  -                        // range block found, store it
  -                        set.add(new CharRange((char) start, str.charAt(++i), negated));
  -                        start = -1;
  -                        negated = false;
  -                    }
  -                } else if (ch == '^') {
  -                    if (start == -1) {
  -                        if (negated) {
  -                            // double negate, treat second as ordinary start block char
  -                            start = ch;
  -                        } else {
  -                            // negate next block
  -                            negated = true;
  -                        }
  -                    } else {
  -                        // previous block has ended, store it
  -                        set.add(new CharRange((char) start, (char) start, negated));
  -                        start = -1;
  -                        negated = true;
  -                    }
  -                } else {
  -                    if (start == -1) {
  -                        // start of block
  -                        start = ch;
  -                    } else {
  -                        // previous block has ended, store it, and start next block
  -                        set.add(new CharRange((char) start, (char) start, negated));
  -                        start = ch;
  -                        negated = false;
  -                    }
  -                }
  -            }
  -            // handle leftovers
  -            if (start != -1) {
  -                set.add(new CharRange((char) start, (char) start, negated));
  -            } else if (negated) {
  -                set.add(NEGATE);
  +        int pos = 0;
  +        while (pos < len) {
  +            int remainder = (len - pos);
  +            if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos
+ 2) == '-') {
  +                // negated range
  +                set.add(new CharRange(str.charAt(pos + 1), str.charAt(pos + 3), true));
  +                pos += 4;
  +            } else if (remainder >= 3 && str.charAt(pos + 1) == '-') {
  +                // range
  +                set.add(new CharRange(str.charAt(pos), str.charAt(pos + 2)));
  +                pos += 3;
  +            } else if (remainder >= 2 && str.charAt(pos) == '^') {
  +                // negated char
  +                set.add(new CharRange(str.charAt(pos + 1), true));
  +                pos += 2;
  +            } else {
  +                // char
  +                set.add(new CharRange(str.charAt(pos)));
  +                pos += 1;
               }
  -            break;
           }
       }
   
  
  
  
  1.2       +76 -16    jakarta-commons/lang/src/test/org/apache/commons/lang/CharSetTest.java
  
  Index: CharSetTest.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons/lang/src/test/org/apache/commons/lang/CharSetTest.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- CharSetTest.java	2 Aug 2003 18:18:33 -0000	1.1
  +++ CharSetTest.java	4 Aug 2003 00:50:14 -0000	1.2
  @@ -64,6 +64,7 @@
    * Unit tests {@link org.apache.commons.lang.CharSet}.
    *
    * @author Stephen Colebourne
  + * @author Phil Steitz
    * @version $Id$
    */
   public class CharSetTest extends TestCase {
  @@ -278,59 +279,107 @@
           set = CharSet.getInstance("^");
           array = set.getCharRanges();
           assertEquals(1, array.length);
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('^')));
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('^'))); // "^"
           
           set = CharSet.getInstance("^^");
           array = set.getCharRanges();
           assertEquals(1, array.length);
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true)));
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true)));
// "^^"
           
           set = CharSet.getInstance("^^^");
           array = set.getCharRanges();
           assertEquals(2, array.length);
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true)));
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^')));
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true)));
// "^^"
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^'))); // "^"
           
           set = CharSet.getInstance("^^^^");
           array = set.getCharRanges();
           assertEquals(1, array.length);
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true)));
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true)));
// "^^" x2
           
           set = CharSet.getInstance("a^");
           array = set.getCharRanges();
           assertEquals(2, array.length);
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('a')));
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('^')));
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('a'))); // "a"
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('^'))); // "^"
           
           set = CharSet.getInstance("^a-");
           array = set.getCharRanges();
           assertEquals(2, array.length);
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('a', 'a', true)));
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('-')));
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('a', 'a', true)));
// "^a"
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('-'))); // "-"
           
           set = CharSet.getInstance("^^-c");
           array = set.getCharRanges();
           assertEquals(1, array.length);
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('^', 'c', true)));
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('^', 'c', true)));
// "^^-c"
           
           set = CharSet.getInstance("^c-^");
           array = set.getCharRanges();
           assertEquals(1, array.length);
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true)));
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true)));
// "^c-^"
           
           set = CharSet.getInstance("^c-^d");
           array = set.getCharRanges();
           assertEquals(2, array.length);
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true)));
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('d')));
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true)));
// "^c-^"
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('d'))); // "d"
           
           set = CharSet.getInstance("^^-");
           array = set.getCharRanges();
           assertEquals(2, array.length);
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true)));
  -        assertEquals(true, ArrayUtils.contains(array, new CharRange('-')));
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true)));
// "^^"
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('-'))); // "-"
       }
       
  +    public void testConstructor_String_oddCombinations() {
  +        CharSet set;
  +        CharRange[] array = null;
  +        
  +        set = CharSet.getInstance("a-^c");
  +        array = set.getCharRanges();
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('a', '^'))); // "a-^"
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('c'))); // "c"
  +        assertEquals(false, set.contains('b'));
  +        assertEquals(true, set.contains('^'));  
  +        assertEquals(true, set.contains('_')); // between ^ and a
  +        assertEquals(true, set.contains('c'));  
  +        
  +        set = CharSet.getInstance("^a-^c");
  +        array = set.getCharRanges();
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('a', '^', true)));
// "^a-^"
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('c'))); // "c"
  +        assertEquals(true, set.contains('b'));
  +        assertEquals(false, set.contains('^'));  
  +        assertEquals(false, set.contains('_')); // between ^ and a
  +        
  +        set = CharSet.getInstance("a- ^-- "); //contains everything
  +        array = set.getCharRanges();
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('a', ' '))); // "a-
"
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('-', ' ', true)));
// "^-- "
  +        assertEquals(true, set.contains('#'));
  +        assertEquals(true, set.contains('^'));
  +        assertEquals(true, set.contains('a'));
  +        assertEquals(true, set.contains('*'));
  +        assertEquals(true, set.contains('A'));
  +        
  +        set = CharSet.getInstance("^-b");
  +        array = set.getCharRanges();
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('^','b'))); // "^-b"
  +        assertEquals(true, set.contains('b'));
  +        assertEquals(true, set.contains('_')); // between ^ and a
  +        assertEquals(false, set.contains('A'));
  +        assertEquals(true, set.contains('^')); 
  +        
  +        set = CharSet.getInstance("b-^");
  +        array = set.getCharRanges();
  +        assertEquals(true, ArrayUtils.contains(array, new CharRange('^','b'))); // "b-^"
  +        assertEquals(true, set.contains('b'));
  +        assertEquals(true, set.contains('^'));
  +        assertEquals(true, set.contains('a')); // between ^ and b
  +        assertEquals(false, set.contains('c')); 
  +    }
  +        
       //-----------------------------------------------------------------------    
       public void testEquals_Object() {
           CharSet abc = CharSet.getInstance("abc");
  @@ -377,6 +426,7 @@
       //-----------------------------------------------------------------------    
       public void testContains_Char() {
           CharSet btod = CharSet.getInstance("b-d");
  +        CharSet dtob = CharSet.getInstance("d-b");
           CharSet bcd = CharSet.getInstance("bcd");
           CharSet bd = CharSet.getInstance("bd");
           CharSet notbtod = CharSet.getInstance("^b-d");
  @@ -404,6 +454,16 @@
           assertEquals(false, notbtod.contains('c'));
           assertEquals(false, notbtod.contains('d'));
           assertEquals(true, notbtod.contains('e'));
  +        
  +        assertEquals(false, dtob.contains('a'));
  +        assertEquals(true, dtob.contains('b'));
  +        assertEquals(true, dtob.contains('c'));
  +        assertEquals(true, dtob.contains('d'));
  +        assertEquals(false, dtob.contains('e'));
  +      
  +        CharRange[] array = dtob.getCharRanges();
  +        assertEquals("[b-d]", dtob.toString());
  +        assertEquals(1, array.length);
       }
       
       //-----------------------------------------------------------------------    
  
  
  

Mime
View raw message