commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From djo...@apache.org
Subject [text] TEXT-34: Add class to generate random strings
Date Tue, 20 Dec 2016 23:08:55 GMT
Repository: commons-text
Updated Branches:
  refs/heads/master d7d2d9157 -> 120409051


TEXT-34: Add class to generate random strings

Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/12040905
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/12040905
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/12040905

Branch: refs/heads/master
Commit: 120409051714e218d260b15531a5363ccbef6618
Parents: d7d2d91
Author: duncan <duncan@wortharead.com>
Authored: Tue Dec 20 23:08:43 2016 +0000
Committer: duncan <duncan@wortharead.com>
Committed: Tue Dec 20 23:08:43 2016 +0000

----------------------------------------------------------------------
 src/changes/changes.xml                         |   1 +
 .../commons/text/RandomStringBuilder.java       | 353 +++++++++++++++++++
 .../commons/text/RandomStringBuilderTest.java   | 235 ++++++++++++
 3 files changed, 589 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/12040905/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 4c2f03a..ec4d183 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -22,6 +22,7 @@
   <body>
 
   <release version="1.0" date="tba" description="tba">
+    <action issue="TEXT-34" type="add" dev="djones">Add class to generate random strings</action>
     <action issue="TEXT-35" type="fix" dev="kinow">Unfinished class Javadoc for CosineDistance</action>
     <action issue="TEXT-33" type="update" dev="chtompki">Consolidating since tags at
1.0, removing deprecated methods</action>
     <action issue="TEXT-29" type="add" dev="chtompki">Add a builder to StringEscapeUtils</action>

http://git-wip-us.apache.org/repos/asf/commons-text/blob/12040905/src/main/java/org/apache/commons/text/RandomStringBuilder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/RandomStringBuilder.java b/src/main/java/org/apache/commons/text/RandomStringBuilder.java
new file mode 100644
index 0000000..8f4253d
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/RandomStringBuilder.java
@@ -0,0 +1,353 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+/**
+ * <p>
+ * Generates a random Unicode string based on properties defined using a builder
+ * pattern.
+ * </p>
+ * <p>
+ * Overriding the default properties is optional, however callers will need to
+ * define the length of the output string using {@link #ofLength(int)} to avoid
+ * generating an empty string.
+ * </p>
+ * <p>
+ * All the property setting methods return the {@code RandomStringBuilder}
+ * instance to allow for method chaining:
+ * </p>
+ * 
+ * <pre>
+ * // Generates a 20 code point string, using only the letters a-z
+ * String random = new RandomStringBuilder().ofLength(20).withinRange('a','z').build();
+ * </pre>
+ * 
+ * <p>
+ * The type of code point returned can be filtered using
+ * {@link #filteredBy(CodePointPredicate...)}, which defines a collection of
+ * tests that are applied to the randomly generated code points. The code points
+ * will only be included in the result if they pass at least one of the tests.
+ * Some commonly used predicates are provided (e.g. {@link #LETTERS} or
+ * {@link #DIGITS}) and others can be created by implementing
+ * {@link CodePointPredicate}.
+ * </p>
+ * 
+ * <pre>
+ * // Generates a 10 code point string containing only letters
+ * 
+ * import static org.apache.commons.text.RandomStringBuilder.LETTERS;
+ * ...
+ * String random = new RandomStringBuilder().ofLength(10).filteredBy(LETTERS).build();
+ * </pre>
+ * 
+ * <p>
+ * A {@code RandomStringBuilder} instance can be used multiple times to generate
+ * different random strings, however it cannot safely be shared between threads.
+ * </p>
+ * 
+ * @since 1.0
+ */
+public class RandomStringBuilder implements Builder<String> {
+
+    /**
+     * Default source of randomness
+     */
+    private static final Random DEFAULT_RANDOM = new Random();
+
+    /**
+     * The default string length produced by this builder: {@value}
+     * 
+     * @since 1.0
+     */
+    public static final int DEFAULT_LENGTH = 0;
+
+    /**
+     * The default minimum code point allowed: {@value}
+     * 
+     * @since 1.0
+     */
+    public static final int DEFAULT_MINIMUM_CODE_POINT = 0;
+
+    /**
+     * The default maximum code point allowed: {@link Character#MAX_CODE_POINT}
+     * ({@value})
+     * 
+     * @since 1.0
+     */
+    public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT;
+
+    private int length = 0;
+    private int minimumCodePoint = 0;
+    private int maximumCodePoint = Character.MAX_CODE_POINT;
+    private Set<CodePointPredicate> inclusivePredicates = null;
+    private Random random = null;
+
+    /**
+     * <p>
+     * Constructs a builder with default properties:
+     * <ul>
+     * <li>Length: {@value #DEFAULT_LENGTH}</li>
+     * <li>Minimum code point: {@value #DEFAULT_MINIMUM_CODE_POINT}</li>
+     * <li>Maximum code point: {@link Character#MAX_CODE_POINT}</li>
+     * <li>Default source of randomness</li>
+     * <li>No character filters</li>
+     * </ul>
+     * </p>
+     * 
+     * @since 1.0
+     */
+    public RandomStringBuilder() {
+    }
+
+    /**
+     * <p>
+     * Specifies how many code points to generate in the random string.
+     * </p>
+     * <p>
+     * Note: the number of {@code char} code units generated will exceed
+     * {@code length} if the string contains supplementary characters. See the
+     * {@link Character} documentation to understand how Java stores Unicode
+     * values.
+     * </p>
+     * 
+     * @param length
+     *            the number of code points to generate
+     * @return {@code this}, to allow method chaining
+     * @throws IllegalArgumentException
+     *             if {@code length < 0}
+     * @since 1.0
+     */
+    public RandomStringBuilder ofLength(final int length) {
+        if (length < 0) {
+            throw new IllegalArgumentException(String.format("Length %d is smaller than zero.",
length));
+        }
+
+        this.length = length;
+        return this;
+    }
+
+    /**
+     * <p>
+     * Specifies the minimum and maximum code points allowed in the generated
+     * string.
+     * </p>
+     * 
+     * @param minimumCodePoint
+     *            the smallest code point allowed (inclusive)
+     * @param maximumCodePoint
+     *            the largest code point allowed (inclusive)
+     * @return {@code this}, to allow method chaining
+     * @throws IllegalArgumentException
+     *             if {@code maximumCodePoint >}
+     *             {@link Character#MAX_CODE_POINT}
+     * @throws IllegalArgumentException
+     *             if {@code minimumCodePoint < 0}
+     * @throws IllegalArgumentException
+     *             if {@code minimumCodePoint > maximumCodePoint}
+     * @since 1.0
+     */
+    public RandomStringBuilder withinRange(final int minimumCodePoint, final int maximumCodePoint)
{
+        if (minimumCodePoint > maximumCodePoint) {
+            throw new IllegalArgumentException(String.format(
+                    "Minimum code point %d is larger than maximum code point %d", minimumCodePoint,
maximumCodePoint));
+        }
+        if (minimumCodePoint < 0) {
+            throw new IllegalArgumentException(String.format("Minimum code point %d is negative",
minimumCodePoint));
+        }
+        if (maximumCodePoint > Character.MAX_CODE_POINT) {
+            throw new IllegalArgumentException(
+                    String.format("Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint));
+        }
+
+        this.minimumCodePoint = minimumCodePoint;
+        this.maximumCodePoint = maximumCodePoint;
+        return this;
+    }
+
+    /**
+     * <p>
+     * Overrides the default source of randomness.
+     * </p>
+     * 
+     * <p>
+     * Passing {@code null} to this method will revert to the default source of
+     * randomness.
+     * </p>
+     * 
+     * @param random
+     *            the source of randomness, may be {@code null}
+     * @return {@code this}, to allow method chaining
+     * @since 1.0
+     */
+    public RandomStringBuilder usingRandom(final Random random) {
+        this.random = random;
+        return this;
+    }
+
+    /**
+     * <p>
+     * Limits the characters in the generated string to those that match at
+     * least one of the predicates supplied.
+     * </p>
+     * 
+     * <p>
+     * Passing {@code null} or an empty array to this method will revert to the
+     * default behaviour of allowing any character. Multiple calls to this
+     * method will replace the previously stored predicates.
+     * </p>
+     * 
+     * @param predicates
+     *            the predicates, may be {@code null} or empty
+     * @return {@code this}, to allow method chaining
+     * @since 1.0
+     */
+    public RandomStringBuilder filteredBy(final CodePointPredicate... predicates) {
+        if (predicates == null || predicates.length == 0) {
+            inclusivePredicates = null;
+            return this;
+        }
+
+        if (inclusivePredicates == null) {
+            inclusivePredicates = new HashSet<>();
+        } else {
+            inclusivePredicates.clear();
+        }
+
+        for (CodePointPredicate predicate : predicates) {
+            inclusivePredicates.add(predicate);
+        }
+
+        return this;
+    }
+
+    /**
+     * <p>
+     * Generates a random string using the settings defined in this builder.
+     * Code points are randomly selected between the minimum and maximum values.
+     * Surrogate and private use characters are not returned, although the
+     * resulting string may contain pairs of surrogates that together encode a
+     * supplementary character.
+     * </p>
+     * 
+     * <p>
+     * A static {@code Random} instance is used if an alternative wasn't
+     * provided via {@link #usingRandom(Random)}.
+     * </p>
+     * 
+     * @return the randomly generated string
+     * @since 1.0
+     */
+    @Override
+    public String build() {
+        if (length == 0) {
+            return "";
+        }
+
+        if (random == null) {
+            random = DEFAULT_RANDOM;
+        }
+
+        final StringBuilder builder = new StringBuilder(length);
+        long remaining = length;
+
+        do {
+            int codePoint = random.nextInt(maximumCodePoint - minimumCodePoint + 1) + minimumCodePoint;
+
+            switch (Character.getType(codePoint)) {
+            case Character.UNASSIGNED:
+            case Character.PRIVATE_USE:
+            case Character.SURROGATE:
+                continue;
+            }
+
+            if (inclusivePredicates != null) {
+                boolean matchedFilter = false;
+                for (CodePointPredicate predicate : inclusivePredicates) {
+                    if (predicate.test(codePoint)) {
+                        matchedFilter = true;
+                        break;
+                    }
+                }
+                if (!matchedFilter) {
+                    continue;
+                }
+            }
+
+            builder.appendCodePoint(codePoint);
+            remaining--;
+
+        } while (remaining != 0);
+
+        return builder.toString();
+    }
+
+    /**
+     * A predicate for selecting code points.
+     * 
+     * @since 1.0
+     */
+    public static interface CodePointPredicate {
+        /**
+         * Tests the code point with this predicate.
+         * 
+         * @param codePoint
+         *            the code point to test
+         * @return {@code true} if the code point matches the predicate,
+         *         {@code false} otherwise
+         * @since 1.0
+         */
+        boolean test(int codePoint);
+    }
+
+    /**
+     * Tests code points against {@link Character#isLetter(int)}.
+     * 
+     * @since 1.0
+     */
+    public static final CodePointPredicate LETTERS = new LetterPredicate();
+
+    /**
+     * Tests code points against {@link Character#isDigit(int)}.
+     * 
+     * @since 1.0
+     */
+    public static final CodePointPredicate DIGITS = new DigitPredicate();
+
+    /**
+     * Tests whether code points are letters.
+     */
+    private static final class LetterPredicate implements CodePointPredicate {
+        @Override
+        public boolean test(int codePoint) {
+            return Character.isLetter(codePoint);
+        }
+    }
+
+    /**
+     * Tests whether code points are digits.
+     */
+    private static final class DigitPredicate implements CodePointPredicate {
+        @Override
+        public boolean test(int codePoint) {
+            return Character.isDigit(codePoint);
+        }
+    };
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/12040905/src/test/java/org/apache/commons/text/RandomStringBuilderTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/RandomStringBuilderTest.java b/src/test/java/org/apache/commons/text/RandomStringBuilderTest.java
new file mode 100644
index 0000000..e6f9f81
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/RandomStringBuilderTest.java
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import static org.apache.commons.text.RandomStringBuilder.LETTERS;
+import static org.junit.Assert.*;
+
+import java.util.Random;
+
+import org.apache.commons.text.RandomStringBuilder.CodePointPredicate;
+import org.junit.Test;
+
+/**
+ * Tests for {@link RandomStringBuilder}
+ */
+public class RandomStringBuilderTest {
+
+    private static int codePointLength(String s) {
+        return s.codePointCount(0, s.length());
+    }
+
+    private static final CodePointPredicate A_FILTER = new CodePointPredicate() {
+        @Override
+        public boolean test(int codePoint) {
+            return codePoint == 'a';
+        }
+    };
+
+    private static final CodePointPredicate B_FILTER = new CodePointPredicate() {
+        @Override
+        public boolean test(int codePoint) {
+            return codePoint == 'b';
+        }
+    };
+
+    @Test
+    public void testDefaultLength() throws Exception {
+        String str = new RandomStringBuilder().build();
+        assertEquals(RandomStringBuilder.DEFAULT_LENGTH, codePointLength(str));
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testInvalidLength() throws Exception {
+        new RandomStringBuilder().ofLength(-1);
+    }
+
+    @Test
+    public void testSetLength() throws Exception {
+        final int length = 99;
+        String str = new RandomStringBuilder().ofLength(length).build();
+        assertEquals(length, codePointLength(str));
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testBadMinimumCodePoint() throws Exception {
+        new RandomStringBuilder().withinRange(-1, 1);
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testBadMaximumCodePoint() throws Exception {
+        new RandomStringBuilder().withinRange(0, Character.MAX_CODE_POINT + 1);
+    }
+
+    @Test
+        public void testWithinRange() throws Exception {
+            final int length = 5000;
+            final int minimumCodePoint = 'a';
+            final int maximumCodePoint = 'z';
+            String str = new RandomStringBuilder().ofLength(length).withinRange(minimumCodePoint,maximumCodePoint).build();
+    
+            int i = 0;
+            do {
+                int codePoint = str.codePointAt(i);
+                assertTrue(codePoint >= minimumCodePoint && codePoint <= maximumCodePoint);
+                i += Character.charCount(codePoint);
+            } while (i < str.length());
+    
+        }
+
+    @Test
+    public void testNoLoneSurrogates() throws Exception {
+        final int length = 5000;
+        String str = new RandomStringBuilder().ofLength(length).build();
+
+        char lastChar = str.charAt(0);
+        for (int i = 1; i < str.length(); i++) {
+            char c = str.charAt(i);
+
+            if (Character.isLowSurrogate(c)) {
+                assertTrue(Character.isHighSurrogate(lastChar));
+            }
+
+            if (Character.isHighSurrogate(lastChar)) {
+                assertTrue(Character.isLowSurrogate(c));
+            }
+
+            if (Character.isHighSurrogate(c)) {
+                // test this isn't the last character in the string
+                assertTrue(i + 1 < str.length());
+            }
+
+            lastChar = c;
+        }
+    }
+
+    @Test
+    public void testUsingRandom() throws Exception {
+        final char testChar = 'a';
+        final Random testRandom = new Random() {
+            private static final long serialVersionUID = 1L;
+
+            @Override
+            public int nextInt(int n) {
+                return testChar;
+            }
+        };
+
+        String str = new RandomStringBuilder().ofLength(100).usingRandom(testRandom).build();
+        for (char c : str.toCharArray()) {
+            assertEquals(testChar, c);
+        }
+    }
+
+    @Test
+    public void testLetterPredicate() throws Exception {
+        String str = new RandomStringBuilder().ofLength(5000).filteredBy(LETTERS).build();
+
+        int i = 0;
+        do {
+            int codePoint = str.codePointAt(i);
+            assertTrue(Character.isLetter(codePoint));
+            i += Character.charCount(codePoint);
+        } while (i < str.length());
+    }
+
+    @Test
+    public void testDigitPredicate() throws Exception {
+        String str = new RandomStringBuilder().ofLength(5000).filteredBy(RandomStringBuilder.DIGITS).build();
+
+        int i = 0;
+        do {
+            int codePoint = str.codePointAt(i);
+            assertTrue(Character.isDigit(codePoint));
+            i += Character.charCount(codePoint);
+        } while (i < str.length());
+    }
+
+    @Test
+    public void testMultipleFilters() throws Exception {
+        String str = new RandomStringBuilder().ofLength(5000).withinRange('a','d')
+                .filteredBy(A_FILTER, B_FILTER).build();
+
+        boolean aFound = false;
+        boolean bFound = false;
+
+        for (char c : str.toCharArray()) {
+            if (c == 'a') {
+                aFound = true;
+            } else if (c == 'b') {
+                bFound = true;
+            } else {
+                fail("Invalid character");
+            }
+        }
+
+        assertTrue(aFound && bFound);
+    }
+
+    @Test
+    public void testNoPrivateCharacters() throws Exception {
+        final int startOfPrivateBMPChars = 0xE000;
+
+        // Request a string in an area of the Basic Multilingual Plane that is
+        // largely
+        // occupied by private characters
+        String str = new RandomStringBuilder().ofLength(5000).withinRange(startOfPrivateBMPChars,

+                Character.MIN_SUPPLEMENTARY_CODE_POINT - 1).build();
+
+        int i = 0;
+        do {
+            int codePoint = str.codePointAt(i);
+            assertFalse(Character.getType(codePoint) == Character.PRIVATE_USE);
+            i += Character.charCount(codePoint);
+        } while (i < str.length());
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void testBadMinAndMax() throws Exception {
+        new RandomStringBuilder().withinRange(2, 1);
+    }
+
+    @Test
+    public void testRemoveFilters() throws Exception {
+
+        RandomStringBuilder builder = new RandomStringBuilder().ofLength(100).withinRange('a',
'z')
+                .filteredBy(A_FILTER);
+
+        builder.filteredBy();
+
+        String str = builder.build();
+        for (char c : str.toCharArray()) {
+            if (c != 'a') {
+                // filter was successfully removed
+                return;
+            }
+        }
+
+        fail("Filter appears to have remained in place");
+    }
+
+    @Test
+    public void testChangeOfFilter() throws Exception {
+        RandomStringBuilder builder = new RandomStringBuilder().ofLength(100).withinRange('a',
'z')
+                .filteredBy(A_FILTER);
+        String str = builder.filteredBy(B_FILTER).build();
+
+        for (char c : str.toCharArray()) {
+            assertTrue(c == 'b');
+        }
+    }
+}


Mime
View raw message