Return-Path: Mailing-List: contact commons-dev-help@jakarta.apache.org; run by ezmlm Delivered-To: mailing list commons-dev@jakarta.apache.org Received: (qmail 9680 invoked by uid 500); 4 Aug 2003 00:49:58 -0000 Received: (qmail 9677 invoked from network); 4 Aug 2003 00:49:58 -0000 Received: from unknown (HELO minotaur.apache.org) (209.237.227.194) by daedalus.apache.org with SMTP; 4 Aug 2003 00:49:58 -0000 Received: (qmail 65542 invoked by uid 1529); 4 Aug 2003 00:50:14 -0000 Date: 4 Aug 2003 00:50:14 -0000 Message-ID: <20030804005014.65541.qmail@minotaur.apache.org> From: scolebourne@apache.org To: jakarta-commons-cvs@apache.org Subject: cvs commit: jakarta-commons/lang/src/test/org/apache/commons/lang CharSetTest.java X-Spam-Rating: daedalus.apache.org 1.6.2 0/1000/N scolebourne 2003/08/03 17:50:14 Modified: lang/src/java/org/apache/commons/lang CharSetUtils.java CharSet.java lang/src/test/org/apache/commons/lang CharSetTest.java Log: Improve CharSet testing bug 22095, from Phil Steitz Rewrite CharSet parsing, much neater and simpler now Revision Changes Path 1.21 +7 -7 jakarta-commons/lang/src/java/org/apache/commons/lang/CharSetUtils.java Index: CharSetUtils.java =================================================================== RCS file: /home/cvs/jakarta-commons/lang/src/java/org/apache/commons/lang/CharSetUtils.java,v retrieving revision 1.20 retrieving revision 1.21 diff -u -r1.20 -r1.21 --- CharSetUtils.java 2 Aug 2003 18:18:33 -0000 1.20 +++ CharSetUtils.java 4 Aug 2003 00:50:14 -0000 1.21 @@ -62,6 +62,7 @@ * * @author Henri Yandell * @author Stephen Colebourne + * @author Phil Steitz * @since 1.0 * @version $Id$ */ @@ -80,13 +81,12 @@ // Factory //----------------------------------------------------------------------- /** - *

Creates a CharSetUtils object which allows a certain amount of + *

Creates a CharSet instance which allows a certain amount of * set logic to be performed.

*

The syntax is:

* * @@ -94,6 +94,7 @@ * CharSetUtils.evaluateSet(null) = null * CharSetUtils.evaluateSet("") = CharSet matching nothing * CharSetUtils.evaluateSet("a-e") = CharSet matching a,b,c,d,e + * CharSetUtils.evaluateSet("abe-g") = CharSet matching a,b,e,f,g * * * @param set the set, may be null @@ -109,13 +110,12 @@ } /** - *

Creates a CharSetUtils object which allows a certain amount of + *

Creates a CharSet instance which allows a certain amount of * set logic to be performed.

*

The syntax is:

* * 1.11 +39 -67 jakarta-commons/lang/src/java/org/apache/commons/lang/CharSet.java Index: CharSet.java =================================================================== RCS file: /home/cvs/jakarta-commons/lang/src/java/org/apache/commons/lang/CharSet.java,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- CharSet.java 2 Aug 2003 18:18:33 -0000 1.10 +++ CharSet.java 4 Aug 2003 00:50:14 -0000 1.11 @@ -67,6 +67,7 @@ * * @author Henri Yandell * @author Stephen Colebourne + * @author Phil Steitz * @since 1.0 * @version $Id$ */ @@ -126,10 +127,26 @@ * - set containing all the characters from the individual sets * * + *

The matching order is:

+ *
    Negated multi character range, such as "^a-e" + *
  1. Ordinary multi character range, such as "a-e" + *
  2. Negated single character, such as "^a" + *
  3. Ordinary single character, such as "a" + *
+ *

Matching works left to right. Once a match is found the + * search starts again from the next character.

+ * *

If the same range is defined twice using the same syntax, only * one range will be kept. - * Thus, "a-ca-c" creates only one range of "a-c". - * However, "a-cabc" creates two ranges as they are defined differently.

+ * Thus, "a-ca-c" creates only one range of "a-c".

+ * + *

If the start and end of a range are in the wrong order, + * they are reversed. Thus "a-e" is the same as "e-a". + * As a result, "a-ee-a" would create only one range, + * as the "a-e" and "e-a" are the same.

+ * + *

The set of characters represented is the union of the specified ranges.

* *

All CharSet objects returned by this method will be immutable.

* @@ -180,71 +197,26 @@ } int len = str.length(); - switch (len) { - case 0: - // do nothing - break; - - case 1: - set.add(new CharRange(str.charAt(0))); - break; - - default: - int start = -1; - boolean negated = false; - for (int i = 0; i < len; i++) { - char ch = str.charAt(i); - if (ch == '-') { - if (start == -1) { - // dash found not as range separator - // treat as ordinary start block char - start = ch; - } else if (i == len - 1) { - // dash is last character, store two single characters - set.add(new CharRange((char) start, (char) start, negated)); - set.add(DASH); - start = -1; - negated = false; - } else { - // range block found, store it - set.add(new CharRange((char) start, str.charAt(++i), negated)); - start = -1; - negated = false; - } - } else if (ch == '^') { - if (start == -1) { - if (negated) { - // double negate, treat second as ordinary start block char - start = ch; - } else { - // negate next block - negated = true; - } - } else { - // previous block has ended, store it - set.add(new CharRange((char) start, (char) start, negated)); - start = -1; - negated = true; - } - } else { - if (start == -1) { - // start of block - start = ch; - } else { - // previous block has ended, store it, and start next block - set.add(new CharRange((char) start, (char) start, negated)); - start = ch; - negated = false; - } - } - } - // handle leftovers - if (start != -1) { - set.add(new CharRange((char) start, (char) start, negated)); - } else if (negated) { - set.add(NEGATE); + int pos = 0; + while (pos < len) { + int remainder = (len - pos); + if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') { + // negated range + set.add(new CharRange(str.charAt(pos + 1), str.charAt(pos + 3), true)); + pos += 4; + } else if (remainder >= 3 && str.charAt(pos + 1) == '-') { + // range + set.add(new CharRange(str.charAt(pos), str.charAt(pos + 2))); + pos += 3; + } else if (remainder >= 2 && str.charAt(pos) == '^') { + // negated char + set.add(new CharRange(str.charAt(pos + 1), true)); + pos += 2; + } else { + // char + set.add(new CharRange(str.charAt(pos))); + pos += 1; } - break; } } 1.2 +76 -16 jakarta-commons/lang/src/test/org/apache/commons/lang/CharSetTest.java Index: CharSetTest.java =================================================================== RCS file: /home/cvs/jakarta-commons/lang/src/test/org/apache/commons/lang/CharSetTest.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- CharSetTest.java 2 Aug 2003 18:18:33 -0000 1.1 +++ CharSetTest.java 4 Aug 2003 00:50:14 -0000 1.2 @@ -64,6 +64,7 @@ * Unit tests {@link org.apache.commons.lang.CharSet}. * * @author Stephen Colebourne + * @author Phil Steitz * @version $Id$ */ public class CharSetTest extends TestCase { @@ -278,59 +279,107 @@ set = CharSet.getInstance("^"); array = set.getCharRanges(); assertEquals(1, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^'))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^'))); // "^" set = CharSet.getInstance("^^"); array = set.getCharRanges(); assertEquals(1, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); // "^^" set = CharSet.getInstance("^^^"); array = set.getCharRanges(); assertEquals(2, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^'))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); // "^^" + assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^'))); // "^" set = CharSet.getInstance("^^^^"); array = set.getCharRanges(); assertEquals(1, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); // "^^" x2 set = CharSet.getInstance("a^"); array = set.getCharRanges(); assertEquals(2, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('a'))); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^'))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('a'))); // "a" + assertEquals(true, ArrayUtils.contains(array, new CharRange('^'))); // "^" set = CharSet.getInstance("^a-"); array = set.getCharRanges(); assertEquals(2, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('a', 'a', true))); - assertEquals(true, ArrayUtils.contains(array, new CharRange('-'))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('a', 'a', true))); // "^a" + assertEquals(true, ArrayUtils.contains(array, new CharRange('-'))); // "-" set = CharSet.getInstance("^^-c"); array = set.getCharRanges(); assertEquals(1, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^', 'c', true))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^', 'c', true))); // "^^-c" set = CharSet.getInstance("^c-^"); array = set.getCharRanges(); assertEquals(1, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true))); // "^c-^" set = CharSet.getInstance("^c-^d"); array = set.getCharRanges(); assertEquals(2, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true))); - assertEquals(true, ArrayUtils.contains(array, new CharRange('d'))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true))); // "^c-^" + assertEquals(true, ArrayUtils.contains(array, new CharRange('d'))); // "d" set = CharSet.getInstance("^^-"); array = set.getCharRanges(); assertEquals(2, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); - assertEquals(true, ArrayUtils.contains(array, new CharRange('-'))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); // "^^" + assertEquals(true, ArrayUtils.contains(array, new CharRange('-'))); // "-" } + public void testConstructor_String_oddCombinations() { + CharSet set; + CharRange[] array = null; + + set = CharSet.getInstance("a-^c"); + array = set.getCharRanges(); + assertEquals(true, ArrayUtils.contains(array, new CharRange('a', '^'))); // "a-^" + assertEquals(true, ArrayUtils.contains(array, new CharRange('c'))); // "c" + assertEquals(false, set.contains('b')); + assertEquals(true, set.contains('^')); + assertEquals(true, set.contains('_')); // between ^ and a + assertEquals(true, set.contains('c')); + + set = CharSet.getInstance("^a-^c"); + array = set.getCharRanges(); + assertEquals(true, ArrayUtils.contains(array, new CharRange('a', '^', true))); // "^a-^" + assertEquals(true, ArrayUtils.contains(array, new CharRange('c'))); // "c" + assertEquals(true, set.contains('b')); + assertEquals(false, set.contains('^')); + assertEquals(false, set.contains('_')); // between ^ and a + + set = CharSet.getInstance("a- ^-- "); //contains everything + array = set.getCharRanges(); + assertEquals(true, ArrayUtils.contains(array, new CharRange('a', ' '))); // "a- " + assertEquals(true, ArrayUtils.contains(array, new CharRange('-', ' ', true))); // "^-- " + assertEquals(true, set.contains('#')); + assertEquals(true, set.contains('^')); + assertEquals(true, set.contains('a')); + assertEquals(true, set.contains('*')); + assertEquals(true, set.contains('A')); + + set = CharSet.getInstance("^-b"); + array = set.getCharRanges(); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^','b'))); // "^-b" + assertEquals(true, set.contains('b')); + assertEquals(true, set.contains('_')); // between ^ and a + assertEquals(false, set.contains('A')); + assertEquals(true, set.contains('^')); + + set = CharSet.getInstance("b-^"); + array = set.getCharRanges(); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^','b'))); // "b-^" + assertEquals(true, set.contains('b')); + assertEquals(true, set.contains('^')); + assertEquals(true, set.contains('a')); // between ^ and b + assertEquals(false, set.contains('c')); + } + //----------------------------------------------------------------------- public void testEquals_Object() { CharSet abc = CharSet.getInstance("abc"); @@ -377,6 +426,7 @@ //----------------------------------------------------------------------- public void testContains_Char() { CharSet btod = CharSet.getInstance("b-d"); + CharSet dtob = CharSet.getInstance("d-b"); CharSet bcd = CharSet.getInstance("bcd"); CharSet bd = CharSet.getInstance("bd"); CharSet notbtod = CharSet.getInstance("^b-d"); @@ -404,6 +454,16 @@ assertEquals(false, notbtod.contains('c')); assertEquals(false, notbtod.contains('d')); assertEquals(true, notbtod.contains('e')); + + assertEquals(false, dtob.contains('a')); + assertEquals(true, dtob.contains('b')); + assertEquals(true, dtob.contains('c')); + assertEquals(true, dtob.contains('d')); + assertEquals(false, dtob.contains('e')); + + CharRange[] array = dtob.getCharRanges(); + assertEquals("[b-d]", dtob.toString()); + assertEquals(1, array.length); } //-----------------------------------------------------------------------