Return-Path: Creates a Creates a The syntax is: Creates a Creates a The syntax is: The matching order is: Matching works left to right. Once a match is found the
+ * search starts again from the next character. If the same range is defined twice using the same syntax, only
* one range will be kept.
- * Thus, "a-ca-c" creates only one range of "a-c".
- * However, "a-cabc" creates two ranges as they are defined differently.CharSetUtils
object which allows a certain amount of
+ * CharSet
instance which allows a certain amount of
* set logic to be performed.
*
*
@@ -94,6 +94,7 @@
* CharSetUtils.evaluateSet(null) = null
* CharSetUtils.evaluateSet("") = CharSet matching nothing
* CharSetUtils.evaluateSet("a-e") = CharSet matching a,b,c,d,e
+ * CharSetUtils.evaluateSet("abe-g") = CharSet matching a,b,e,f,g
*
*
* @param set the set, may be null
@@ -109,13 +110,12 @@
}
/**
- * CharSetUtils
object which allows a certain amount of
+ * CharSet
instance which allows a certain amount of
* set logic to be performed.
*
*
1.11 +39 -67 jakarta-commons/lang/src/java/org/apache/commons/lang/CharSet.java
Index: CharSet.java
===================================================================
RCS file: /home/cvs/jakarta-commons/lang/src/java/org/apache/commons/lang/CharSet.java,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- CharSet.java 2 Aug 2003 18:18:33 -0000 1.10
+++ CharSet.java 4 Aug 2003 00:50:14 -0000 1.11
@@ -67,6 +67,7 @@
*
* @author Henri Yandell
* @author Stephen Colebourne
+ * @author Phil Steitz
* @since 1.0
* @version $Id$
*/
@@ -126,10 +127,26 @@
* - set containing all the characters from the individual sets
*
*
+ * Negated multi character range, such as "^a-e"
+ *
+ *
If the start and end of a range are in the wrong order, + * they are reversed. Thus "a-e" is the same as "e-a". + * As a result, "a-ee-a" would create only one range, + * as the "a-e" and "e-a" are the same.
+ * + *The set of characters represented is the union of the specified ranges.
* *All CharSet objects returned by this method will be immutable.
* @@ -180,71 +197,26 @@ } int len = str.length(); - switch (len) { - case 0: - // do nothing - break; - - case 1: - set.add(new CharRange(str.charAt(0))); - break; - - default: - int start = -1; - boolean negated = false; - for (int i = 0; i < len; i++) { - char ch = str.charAt(i); - if (ch == '-') { - if (start == -1) { - // dash found not as range separator - // treat as ordinary start block char - start = ch; - } else if (i == len - 1) { - // dash is last character, store two single characters - set.add(new CharRange((char) start, (char) start, negated)); - set.add(DASH); - start = -1; - negated = false; - } else { - // range block found, store it - set.add(new CharRange((char) start, str.charAt(++i), negated)); - start = -1; - negated = false; - } - } else if (ch == '^') { - if (start == -1) { - if (negated) { - // double negate, treat second as ordinary start block char - start = ch; - } else { - // negate next block - negated = true; - } - } else { - // previous block has ended, store it - set.add(new CharRange((char) start, (char) start, negated)); - start = -1; - negated = true; - } - } else { - if (start == -1) { - // start of block - start = ch; - } else { - // previous block has ended, store it, and start next block - set.add(new CharRange((char) start, (char) start, negated)); - start = ch; - negated = false; - } - } - } - // handle leftovers - if (start != -1) { - set.add(new CharRange((char) start, (char) start, negated)); - } else if (negated) { - set.add(NEGATE); + int pos = 0; + while (pos < len) { + int remainder = (len - pos); + if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') { + // negated range + set.add(new CharRange(str.charAt(pos + 1), str.charAt(pos + 3), true)); + pos += 4; + } else if (remainder >= 3 && str.charAt(pos + 1) == '-') { + // range + set.add(new CharRange(str.charAt(pos), str.charAt(pos + 2))); + pos += 3; + } else if (remainder >= 2 && str.charAt(pos) == '^') { + // negated char + set.add(new CharRange(str.charAt(pos + 1), true)); + pos += 2; + } else { + // char + set.add(new CharRange(str.charAt(pos))); + pos += 1; } - break; } } 1.2 +76 -16 jakarta-commons/lang/src/test/org/apache/commons/lang/CharSetTest.java Index: CharSetTest.java =================================================================== RCS file: /home/cvs/jakarta-commons/lang/src/test/org/apache/commons/lang/CharSetTest.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- CharSetTest.java 2 Aug 2003 18:18:33 -0000 1.1 +++ CharSetTest.java 4 Aug 2003 00:50:14 -0000 1.2 @@ -64,6 +64,7 @@ * Unit tests {@link org.apache.commons.lang.CharSet}. * * @author Stephen Colebourne + * @author Phil Steitz * @version $Id$ */ public class CharSetTest extends TestCase { @@ -278,59 +279,107 @@ set = CharSet.getInstance("^"); array = set.getCharRanges(); assertEquals(1, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^'))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^'))); // "^" set = CharSet.getInstance("^^"); array = set.getCharRanges(); assertEquals(1, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); // "^^" set = CharSet.getInstance("^^^"); array = set.getCharRanges(); assertEquals(2, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^'))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); // "^^" + assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^'))); // "^" set = CharSet.getInstance("^^^^"); array = set.getCharRanges(); assertEquals(1, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); // "^^" x2 set = CharSet.getInstance("a^"); array = set.getCharRanges(); assertEquals(2, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('a'))); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^'))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('a'))); // "a" + assertEquals(true, ArrayUtils.contains(array, new CharRange('^'))); // "^" set = CharSet.getInstance("^a-"); array = set.getCharRanges(); assertEquals(2, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('a', 'a', true))); - assertEquals(true, ArrayUtils.contains(array, new CharRange('-'))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('a', 'a', true))); // "^a" + assertEquals(true, ArrayUtils.contains(array, new CharRange('-'))); // "-" set = CharSet.getInstance("^^-c"); array = set.getCharRanges(); assertEquals(1, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^', 'c', true))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^', 'c', true))); // "^^-c" set = CharSet.getInstance("^c-^"); array = set.getCharRanges(); assertEquals(1, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true))); // "^c-^" set = CharSet.getInstance("^c-^d"); array = set.getCharRanges(); assertEquals(2, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true))); - assertEquals(true, ArrayUtils.contains(array, new CharRange('d'))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true))); // "^c-^" + assertEquals(true, ArrayUtils.contains(array, new CharRange('d'))); // "d" set = CharSet.getInstance("^^-"); array = set.getCharRanges(); assertEquals(2, array.length); - assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); - assertEquals(true, ArrayUtils.contains(array, new CharRange('-'))); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); // "^^" + assertEquals(true, ArrayUtils.contains(array, new CharRange('-'))); // "-" } + public void testConstructor_String_oddCombinations() { + CharSet set; + CharRange[] array = null; + + set = CharSet.getInstance("a-^c"); + array = set.getCharRanges(); + assertEquals(true, ArrayUtils.contains(array, new CharRange('a', '^'))); // "a-^" + assertEquals(true, ArrayUtils.contains(array, new CharRange('c'))); // "c" + assertEquals(false, set.contains('b')); + assertEquals(true, set.contains('^')); + assertEquals(true, set.contains('_')); // between ^ and a + assertEquals(true, set.contains('c')); + + set = CharSet.getInstance("^a-^c"); + array = set.getCharRanges(); + assertEquals(true, ArrayUtils.contains(array, new CharRange('a', '^', true))); // "^a-^" + assertEquals(true, ArrayUtils.contains(array, new CharRange('c'))); // "c" + assertEquals(true, set.contains('b')); + assertEquals(false, set.contains('^')); + assertEquals(false, set.contains('_')); // between ^ and a + + set = CharSet.getInstance("a- ^-- "); //contains everything + array = set.getCharRanges(); + assertEquals(true, ArrayUtils.contains(array, new CharRange('a', ' '))); // "a- " + assertEquals(true, ArrayUtils.contains(array, new CharRange('-', ' ', true))); // "^-- " + assertEquals(true, set.contains('#')); + assertEquals(true, set.contains('^')); + assertEquals(true, set.contains('a')); + assertEquals(true, set.contains('*')); + assertEquals(true, set.contains('A')); + + set = CharSet.getInstance("^-b"); + array = set.getCharRanges(); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^','b'))); // "^-b" + assertEquals(true, set.contains('b')); + assertEquals(true, set.contains('_')); // between ^ and a + assertEquals(false, set.contains('A')); + assertEquals(true, set.contains('^')); + + set = CharSet.getInstance("b-^"); + array = set.getCharRanges(); + assertEquals(true, ArrayUtils.contains(array, new CharRange('^','b'))); // "b-^" + assertEquals(true, set.contains('b')); + assertEquals(true, set.contains('^')); + assertEquals(true, set.contains('a')); // between ^ and b + assertEquals(false, set.contains('c')); + } + //----------------------------------------------------------------------- public void testEquals_Object() { CharSet abc = CharSet.getInstance("abc"); @@ -377,6 +426,7 @@ //----------------------------------------------------------------------- public void testContains_Char() { CharSet btod = CharSet.getInstance("b-d"); + CharSet dtob = CharSet.getInstance("d-b"); CharSet bcd = CharSet.getInstance("bcd"); CharSet bd = CharSet.getInstance("bd"); CharSet notbtod = CharSet.getInstance("^b-d"); @@ -404,6 +454,16 @@ assertEquals(false, notbtod.contains('c')); assertEquals(false, notbtod.contains('d')); assertEquals(true, notbtod.contains('e')); + + assertEquals(false, dtob.contains('a')); + assertEquals(true, dtob.contains('b')); + assertEquals(true, dtob.contains('c')); + assertEquals(true, dtob.contains('d')); + assertEquals(false, dtob.contains('e')); + + CharRange[] array = dtob.getCharRanges(); + assertEquals("[b-d]", dtob.toString()); + assertEquals(1, array.length); } //-----------------------------------------------------------------------