commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chtom...@apache.org
Subject [22/25] [text] chore: update packages back to org.apache.commons.text.*
Date Fri, 10 Feb 2017 13:17:35 GMT
http://git-wip-us.apache.org/repos/asf/commons-text/blob/c7cf533d/src/main/java/org/apache/commons/text/StrTokenizer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/StrTokenizer.java b/src/main/java/org/apache/commons/text/StrTokenizer.java
new file mode 100644
index 0000000..8186f37
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/StrTokenizer.java
@@ -0,0 +1,1118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.NoSuchElementException;
+
+/**
+ * Tokenizes a string based based on delimiters (separators)
+ * and supporting quoting and ignored character concepts.
+ * <p>
+ * This class can split a String into many smaller strings. It aims
+ * to do a similar job to {@link java.util.StringTokenizer StringTokenizer},
+ * however it offers much more control and flexibility including implementing
+ * the <code>ListIterator</code> interface. By default, it is set up
+ * like <code>StringTokenizer</code>.
+ * <p>
+ * The input String is split into a number of <i>tokens</i>.
+ * Each token is separated from the next String by a <i>delimiter</i>.
+ * One or more delimiter characters must be specified.
+ * <p>
+ * Each token may be surrounded by quotes.
+ * The <i>quote</i> matcher specifies the quote character(s).
+ * A quote may be escaped within a quoted section by duplicating itself.
+ * <p>
+ * Between each token and the delimiter are potentially characters that need trimming.
+ * The <i>trimmer</i> matcher specifies these characters.
+ * One usage might be to trim whitespace characters.
+ * <p>
+ * At any point outside the quotes there might potentially be invalid characters.
+ * The <i>ignored</i> matcher specifies these characters to be removed.
+ * One usage might be to remove new line characters.
+ * <p>
+ * Empty tokens may be removed or returned as null.
+ * <pre>
+ * "a,b,c"         - Three tokens "a","b","c"   (comma delimiter)
+ * " a, b , c "    - Three tokens "a","b","c"   (default CSV processing trims whitespace)
+ * "a, ", b ,", c" - Three tokens "a, " , " b ", ", c" (quoted text untouched)
+ * </pre>
+ * <p>
+ *
+ * This tokenizer has the following properties and options:
+ *
+ * <table summary="Tokenizer Properties">
+ *  <tr>
+ *   <th>Property</th><th>Type</th><th>Default</th>
+ *  </tr>
+ *  <tr>
+ *   <td>delim</td><td>CharSetMatcher</td><td>{ \t\n\r\f}</td>
+ *  </tr>
+ *  <tr>
+ *   <td>quote</td><td>NoneMatcher</td><td>{}</td>
+ *  </tr>
+ *  <tr>
+ *   <td>ignore</td><td>NoneMatcher</td><td>{}</td>
+ *  </tr>
+ *  <tr>
+ *   <td>emptyTokenAsNull</td><td>boolean</td><td>false</td>
+ *  </tr>
+ *  <tr>
+ *   <td>ignoreEmptyTokens</td><td>boolean</td><td>true</td>
+ *  </tr>
+ * </table>
+ *
+ * @since 1.0
+ */
+public class StrTokenizer implements ListIterator<String>, Cloneable {
+
+    private static final StrTokenizer CSV_TOKENIZER_PROTOTYPE;
+    private static final StrTokenizer TSV_TOKENIZER_PROTOTYPE;
+    static {
+        CSV_TOKENIZER_PROTOTYPE = new StrTokenizer();
+        CSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StrMatcher.commaMatcher());
+        CSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StrMatcher.doubleQuoteMatcher());
+        CSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StrMatcher.noneMatcher());
+        CSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StrMatcher.trimMatcher());
+        CSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false);
+        CSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false);
+
+        TSV_TOKENIZER_PROTOTYPE = new StrTokenizer();
+        TSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StrMatcher.tabMatcher());
+        TSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StrMatcher.doubleQuoteMatcher());
+        TSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StrMatcher.noneMatcher());
+        TSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StrMatcher.trimMatcher());
+        TSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false);
+        TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false);
+    }
+
+    /** The text to work on. */
+    private char chars[];
+    /** The parsed tokens */
+    private String tokens[];
+    /** The current iteration position */
+    private int tokenPos;
+
+    /** The delimiter matcher */
+    private StrMatcher delimMatcher = StrMatcher.splitMatcher();
+    /** The quote matcher */
+    private StrMatcher quoteMatcher = StrMatcher.noneMatcher();
+    /** The ignored matcher */
+    private StrMatcher ignoredMatcher = StrMatcher.noneMatcher();
+    /** The trimmer matcher */
+    private StrMatcher trimmerMatcher = StrMatcher.noneMatcher();
+
+    /** Whether to return empty tokens as null */
+    private boolean emptyAsNull = false;
+    /** Whether to ignore empty tokens */
+    private boolean ignoreEmptyTokens = true;
+
+    //-----------------------------------------------------------------------
+
+    /**
+     * Returns a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>.
+     *
+     * @return a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>.
+     */
+    private static StrTokenizer getCSVClone() {
+        return (StrTokenizer) CSV_TOKENIZER_PROTOTYPE.clone();
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Comma Separated Value strings
+     * initializing it with the given input.  The default for CSV processing
+     * will be trim whitespace from both ends (which can be overridden with
+     * the setTrimmer method).
+     * <p>
+     * You must call a "reset" method to set the string which you want to parse.
+     * @return a new tokenizer instance which parses Comma Separated Value strings
+     */
+    public static StrTokenizer getCSVInstance() {
+        return getCSVClone();
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Comma Separated Value strings
+     * initializing it with the given input.  The default for CSV processing
+     * will be trim whitespace from both ends (which can be overridden with
+     * the setTrimmer method).
+     *
+     * @param input  the text to parse
+     * @return a new tokenizer instance which parses Comma Separated Value strings
+     */
+    public static StrTokenizer getCSVInstance(final String input) {
+        final StrTokenizer tok = getCSVClone();
+        tok.reset(input);
+        return tok;
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Comma Separated Value strings
+     * initializing it with the given input.  The default for CSV processing
+     * will be trim whitespace from both ends (which can be overridden with
+     * the setTrimmer method).
+     *
+     * @param input  the text to parse
+     * @return a new tokenizer instance which parses Comma Separated Value strings
+     */
+    public static StrTokenizer getCSVInstance(final char[] input) {
+        final StrTokenizer tok = getCSVClone();
+        tok.reset(input);
+        return tok;
+    }
+
+    /**
+     * Returns a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>.
+     *
+     * @return a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>.
+     */
+    private static StrTokenizer getTSVClone() {
+        return (StrTokenizer) TSV_TOKENIZER_PROTOTYPE.clone();
+    }
+
+
+    /**
+     * Gets a new tokenizer instance which parses Tab Separated Value strings.
+     * The default for CSV processing will be trim whitespace from both ends
+     * (which can be overridden with the setTrimmer method).
+     * <p>
+     * You must call a "reset" method to set the string which you want to parse.
+     * @return a new tokenizer instance which parses Tab Separated Value strings.
+     */
+    public static StrTokenizer getTSVInstance() {
+        return getTSVClone();
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Tab Separated Value strings.
+     * The default for CSV processing will be trim whitespace from both ends
+     * (which can be overridden with the setTrimmer method).
+     * @param input  the string to parse
+     * @return a new tokenizer instance which parses Tab Separated Value strings.
+     */
+    public static StrTokenizer getTSVInstance(final String input) {
+        final StrTokenizer tok = getTSVClone();
+        tok.reset(input);
+        return tok;
+    }
+
+    /**
+     * Gets a new tokenizer instance which parses Tab Separated Value strings.
+     * The default for CSV processing will be trim whitespace from both ends
+     * (which can be overridden with the setTrimmer method).
+     * @param input  the string to parse
+     * @return a new tokenizer instance which parses Tab Separated Value strings.
+     */
+    public static StrTokenizer getTSVInstance(final char[] input) {
+        final StrTokenizer tok = getTSVClone();
+        tok.reset(input);
+        return tok;
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * Constructs a tokenizer splitting on space, tab, newline and formfeed
+     * as per StringTokenizer, but with no text to tokenize.
+     * <p>
+     * This constructor is normally used with {@link #reset(String)}.
+     */
+    public StrTokenizer() {
+        super();
+        this.chars = null;
+    }
+
+    /**
+     * Constructs a tokenizer splitting on space, tab, newline and formfeed
+     * as per StringTokenizer.
+     *
+     * @param input  the string which is to be parsed
+     */
+    public StrTokenizer(final String input) {
+        super();
+        if (input != null) {
+            chars = input.toCharArray();
+        } else {
+            chars = null;
+        }
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified delimiter character.
+     *
+     * @param input  the string which is to be parsed
+     * @param delim  the field delimiter character
+     */
+    public StrTokenizer(final String input, final char delim) {
+        this(input);
+        setDelimiterChar(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified delimiter string.
+     *
+     * @param input  the string which is to be parsed
+     * @param delim  the field delimiter string
+     */
+    public StrTokenizer(final String input, final String delim) {
+        this(input);
+        setDelimiterString(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting using the specified delimiter matcher.
+     *
+     * @param input  the string which is to be parsed
+     * @param delim  the field delimiter matcher
+     */
+    public StrTokenizer(final String input, final StrMatcher delim) {
+        this(input);
+        setDelimiterMatcher(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified delimiter character
+     * and handling quotes using the specified quote character.
+     *
+     * @param input  the string which is to be parsed
+     * @param delim  the field delimiter character
+     * @param quote  the field quoted string character
+     */
+    public StrTokenizer(final String input, final char delim, final char quote) {
+        this(input, delim);
+        setQuoteChar(quote);
+    }
+
+    /**
+     * Constructs a tokenizer splitting using the specified delimiter matcher
+     * and handling quotes using the specified quote matcher.
+     *
+     * @param input  the string which is to be parsed
+     * @param delim  the field delimiter matcher
+     * @param quote  the field quoted string matcher
+     */
+    public StrTokenizer(final String input, final StrMatcher delim, final StrMatcher quote) {
+        this(input, delim);
+        setQuoteMatcher(quote);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on space, tab, newline and formfeed
+     * as per StringTokenizer.
+     *
+     * @param input  the string which is to be parsed, not cloned
+     */
+    public StrTokenizer(final char[] input) {
+        super();
+        if (input == null) {
+            this.chars = null;
+        } else {
+            this.chars = input.clone();
+        }
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified character.
+     *
+     * @param input  the string which is to be parsed, not cloned
+     * @param delim the field delimiter character
+     */
+    public StrTokenizer(final char[] input, final char delim) {
+        this(input);
+        setDelimiterChar(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified string.
+     *
+     * @param input  the string which is to be parsed, not cloned
+     * @param delim the field delimiter string
+     */
+    public StrTokenizer(final char[] input, final String delim) {
+        this(input);
+        setDelimiterString(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting using the specified delimiter matcher.
+     *
+     * @param input  the string which is to be parsed, not cloned
+     * @param delim  the field delimiter matcher
+     */
+    public StrTokenizer(final char[] input, final StrMatcher delim) {
+        this(input);
+        setDelimiterMatcher(delim);
+    }
+
+    /**
+     * Constructs a tokenizer splitting on the specified delimiter character
+     * and handling quotes using the specified quote character.
+     *
+     * @param input  the string which is to be parsed, not cloned
+     * @param delim  the field delimiter character
+     * @param quote  the field quoted string character
+     */
+    public StrTokenizer(final char[] input, final char delim, final char quote) {
+        this(input, delim);
+        setQuoteChar(quote);
+    }
+
+    /**
+     * Constructs a tokenizer splitting using the specified delimiter matcher
+     * and handling quotes using the specified quote matcher.
+     *
+     * @param input  the string which is to be parsed, not cloned
+     * @param delim  the field delimiter character
+     * @param quote  the field quoted string character
+     */
+    public StrTokenizer(final char[] input, final StrMatcher delim, final StrMatcher quote) {
+        this(input, delim);
+        setQuoteMatcher(quote);
+    }
+
+    // API
+    //-----------------------------------------------------------------------
+    /**
+     * Gets the number of tokens found in the String.
+     *
+     * @return the number of matched tokens
+     */
+    public int size() {
+        checkTokenized();
+        return tokens.length;
+    }
+
+    /**
+     * Gets the next token from the String.
+     * Equivalent to {@link #next()} except it returns null rather than
+     * throwing {@link NoSuchElementException} when no tokens remain.
+     *
+     * @return the next sequential token, or null when no more tokens are found
+     */
+    public String nextToken() {
+        if (hasNext()) {
+            return tokens[tokenPos++];
+        }
+        return null;
+    }
+
+    /**
+     * Gets the previous token from the String.
+     *
+     * @return the previous sequential token, or null when no more tokens are found
+     */
+    public String previousToken() {
+        if (hasPrevious()) {
+            return tokens[--tokenPos];
+        }
+        return null;
+    }
+
+    /**
+     * Gets a copy of the full token list as an independent modifiable array.
+     *
+     * @return the tokens as a String array
+     */
+    public String[] getTokenArray() {
+        checkTokenized();
+        return tokens.clone();
+    }
+
+    /**
+     * Gets a copy of the full token list as an independent modifiable list.
+     *
+     * @return the tokens as a String array
+     */
+    public List<String> getTokenList() {
+        checkTokenized();
+        final List<String> list = new ArrayList<>(tokens.length);
+        for (final String element : tokens) {
+            list.add(element);
+        }
+        return list;
+    }
+
+    /**
+     * Resets this tokenizer, forgetting all parsing and iteration already completed.
+     * <p>
+     * This method allows the same tokenizer to be reused for the same String.
+     *
+     * @return this, to enable chaining
+     */
+    public StrTokenizer reset() {
+        tokenPos = 0;
+        tokens = null;
+        return this;
+    }
+
+    /**
+     * Reset this tokenizer, giving it a new input string to parse.
+     * In this manner you can re-use a tokenizer with the same settings
+     * on multiple input lines.
+     *
+     * @param input  the new string to tokenize, null sets no text to parse
+     * @return this, to enable chaining
+     */
+    public StrTokenizer reset(final String input) {
+        reset();
+        if (input != null) {
+            this.chars = input.toCharArray();
+        } else {
+            this.chars = null;
+        }
+        return this;
+    }
+
+    /**
+     * Reset this tokenizer, giving it a new input string to parse.
+     * In this manner you can re-use a tokenizer with the same settings
+     * on multiple input lines.
+     *
+     * @param input  the new character array to tokenize, not cloned, null sets no text to parse
+     * @return this, to enable chaining
+     */
+    public StrTokenizer reset(final char[] input) {
+        reset();
+        if (input != null) {
+            this.chars = input.clone();
+        } else {
+            this.chars = null;
+        }
+        return this;
+    }
+
+    // ListIterator
+    //-----------------------------------------------------------------------
+    /**
+     * Checks whether there are any more tokens.
+     *
+     * @return true if there are more tokens
+     */
+    @Override
+    public boolean hasNext() {
+        checkTokenized();
+        return tokenPos < tokens.length;
+    }
+
+    /**
+     * Gets the next token.
+     *
+     * @return the next String token
+     * @throws NoSuchElementException if there are no more elements
+     */
+    @Override
+    public String next() {
+        if (hasNext()) {
+            return tokens[tokenPos++];
+        }
+        throw new NoSuchElementException();
+    }
+
+    /**
+     * Gets the index of the next token to return.
+     *
+     * @return the next token index
+     */
+    @Override
+    public int nextIndex() {
+        return tokenPos;
+    }
+
+    /**
+     * Checks whether there are any previous tokens that can be iterated to.
+     *
+     * @return true if there are previous tokens
+     */
+    @Override
+    public boolean hasPrevious() {
+        checkTokenized();
+        return tokenPos > 0;
+    }
+
+    /**
+     * Gets the token previous to the last returned token.
+     *
+     * @return the previous token
+     */
+    @Override
+    public String previous() {
+        if (hasPrevious()) {
+            return tokens[--tokenPos];
+        }
+        throw new NoSuchElementException();
+    }
+
+    /**
+     * Gets the index of the previous token.
+     *
+     * @return the previous token index
+     */
+    @Override
+    public int previousIndex() {
+        return tokenPos - 1;
+    }
+
+    /**
+     * Unsupported ListIterator operation.
+     *
+     * @throws UnsupportedOperationException always
+     */
+    @Override
+    public void remove() {
+        throw new UnsupportedOperationException("remove() is unsupported");
+    }
+
+    /**
+     * Unsupported ListIterator operation.
+     * @param obj this parameter ignored.
+     * @throws UnsupportedOperationException always
+     */
+    @Override
+    public void set(final String obj) {
+        throw new UnsupportedOperationException("set() is unsupported");
+    }
+
+    /**
+     * Unsupported ListIterator operation.
+     * @param obj this parameter ignored.
+     * @throws UnsupportedOperationException always
+     */
+    @Override
+    public void add(final String obj) {
+        throw new UnsupportedOperationException("add() is unsupported");
+    }
+
+    // Implementation
+    //-----------------------------------------------------------------------
+    /**
+     * Checks if tokenization has been done, and if not then do it.
+     */
+    private void checkTokenized() {
+        if (tokens == null) {
+            if (chars == null) {
+                // still call tokenize as subclass may do some work
+                final List<String> split = tokenize(null, 0, 0);
+                tokens = split.toArray(new String[split.size()]);
+            } else {
+                final List<String> split = tokenize(chars, 0, chars.length);
+                tokens = split.toArray(new String[split.size()]);
+            }
+        }
+    }
+
+    /**
+     * Internal method to performs the tokenization.
+     * <p>
+     * Most users of this class do not need to call this method. This method
+     * will be called automatically by other (public) methods when required.
+     * <p>
+     * This method exists to allow subclasses to add code before or after the
+     * tokenization. For example, a subclass could alter the character array,
+     * offset or count to be parsed, or call the tokenizer multiple times on
+     * multiple strings. It is also be possible to filter the results.
+     * <p>
+     * <code>StrTokenizer</code> will always pass a zero offset and a count
+     * equal to the length of the array to this method, however a subclass
+     * may pass other values, or even an entirely different array.
+     *
+     * @param srcChars  the character array being tokenized, may be null
+     * @param offset  the start position within the character array, must be valid
+     * @param count  the number of characters to tokenize, must be valid
+     * @return the modifiable list of String tokens, unmodifiable if null array or zero count
+     */
+    protected List<String> tokenize(final char[] srcChars, final int offset, final int count) {
+        if (srcChars == null || count == 0) {
+            return Collections.emptyList();
+        }
+        final StrBuilder buf = new StrBuilder();
+        final List<String> tokenList = new ArrayList<>();
+        int pos = offset;
+
+        // loop around the entire buffer
+        while (pos >= 0 && pos < count) {
+            // find next token
+            pos = readNextToken(srcChars, pos, count, buf, tokenList);
+
+            // handle case where end of string is a delimiter
+            if (pos >= count) {
+                addToken(tokenList, "");
+            }
+        }
+        return tokenList;
+    }
+
+    /**
+     * Adds a token to a list, paying attention to the parameters we've set.
+     *
+     * @param list  the list to add to
+     * @param tok  the token to add
+     */
+    private void addToken(final List<String> list, String tok) {
+        if (tok == null || tok.length() == 0) {
+            if (isIgnoreEmptyTokens()) {
+                return;
+            }
+            if (isEmptyTokenAsNull()) {
+                tok = null;
+            }
+        }
+        list.add(tok);
+    }
+
+    /**
+     * Reads character by character through the String to get the next token.
+     *
+     * @param srcChars  the character array being tokenized
+     * @param start  the first character of field
+     * @param len  the length of the character array being tokenized
+     * @param workArea  a temporary work area
+     * @param tokenList  the list of parsed tokens
+     * @return the starting position of the next field (the character
+     *  immediately after the delimiter), or -1 if end of string found
+     */
+    private int readNextToken(final char[] srcChars, int start, final int len, final StrBuilder workArea, final List<String> tokenList) {
+        // skip all leading whitespace, unless it is the
+        // field delimiter or the quote character
+        while (start < len) {
+            final int removeLen = Math.max(
+                    getIgnoredMatcher().isMatch(srcChars, start, start, len),
+                    getTrimmerMatcher().isMatch(srcChars, start, start, len));
+            if (removeLen == 0 ||
+                getDelimiterMatcher().isMatch(srcChars, start, start, len) > 0 ||
+                getQuoteMatcher().isMatch(srcChars, start, start, len) > 0) {
+                break;
+            }
+            start += removeLen;
+        }
+
+        // handle reaching end
+        if (start >= len) {
+            addToken(tokenList, "");
+            return -1;
+        }
+
+        // handle empty token
+        final int delimLen = getDelimiterMatcher().isMatch(srcChars, start, start, len);
+        if (delimLen > 0) {
+            addToken(tokenList, "");
+            return start + delimLen;
+        }
+
+        // handle found token
+        final int quoteLen = getQuoteMatcher().isMatch(srcChars, start, start, len);
+        if (quoteLen > 0) {
+            return readWithQuotes(srcChars, start + quoteLen, len, workArea, tokenList, start, quoteLen);
+        }
+        return readWithQuotes(srcChars, start, len, workArea, tokenList, 0, 0);
+    }
+
+    /**
+     * Reads a possibly quoted string token.
+     *
+     * @param srcChars  the character array being tokenized
+     * @param start  the first character of field
+     * @param len  the length of the character array being tokenized
+     * @param workArea  a temporary work area
+     * @param tokenList  the list of parsed tokens
+     * @param quoteStart  the start position of the matched quote, 0 if no quoting
+     * @param quoteLen  the length of the matched quote, 0 if no quoting
+     * @return the starting position of the next field (the character
+     *  immediately after the delimiter, or if end of string found,
+     *  then the length of string
+     */
+    private int readWithQuotes(final char[] srcChars, final int start, final int len, final StrBuilder workArea,
+                               final List<String> tokenList, final int quoteStart, final int quoteLen) {
+        // Loop until we've found the end of the quoted
+        // string or the end of the input
+        workArea.clear();
+        int pos = start;
+        boolean quoting = quoteLen > 0;
+        int trimStart = 0;
+
+        while (pos < len) {
+            // quoting mode can occur several times throughout a string
+            // we must switch between quoting and non-quoting until we
+            // encounter a non-quoted delimiter, or end of string
+            if (quoting) {
+                // In quoting mode
+
+                // If we've found a quote character, see if it's
+                // followed by a second quote.  If so, then we need
+                // to actually put the quote character into the token
+                // rather than end the token.
+                if (isQuote(srcChars, pos, len, quoteStart, quoteLen)) {
+                    if (isQuote(srcChars, pos + quoteLen, len, quoteStart, quoteLen)) {
+                        // matched pair of quotes, thus an escaped quote
+                        workArea.append(srcChars, pos, quoteLen);
+                        pos += quoteLen * 2;
+                        trimStart = workArea.size();
+                        continue;
+                    }
+
+                    // end of quoting
+                    quoting = false;
+                    pos += quoteLen;
+                    continue;
+                }
+
+                // copy regular character from inside quotes
+                workArea.append(srcChars[pos++]);
+                trimStart = workArea.size();
+
+            } else {
+                // Not in quoting mode
+
+                // check for delimiter, and thus end of token
+                final int delimLen = getDelimiterMatcher().isMatch(srcChars, pos, start, len);
+                if (delimLen > 0) {
+                    // return condition when end of token found
+                    addToken(tokenList, workArea.substring(0, trimStart));
+                    return pos + delimLen;
+                }
+
+                // check for quote, and thus back into quoting mode
+                if (quoteLen > 0 && isQuote(srcChars, pos, len, quoteStart, quoteLen)) {
+                    quoting = true;
+                    pos += quoteLen;
+                    continue;
+                }
+
+                // check for ignored (outside quotes), and ignore
+                final int ignoredLen = getIgnoredMatcher().isMatch(srcChars, pos, start, len);
+                if (ignoredLen > 0) {
+                    pos += ignoredLen;
+                    continue;
+                }
+
+                // check for trimmed character
+                // don't yet know if its at the end, so copy to workArea
+                // use trimStart to keep track of trim at the end
+                final int trimmedLen = getTrimmerMatcher().isMatch(srcChars, pos, start, len);
+                if (trimmedLen > 0) {
+                    workArea.append(srcChars, pos, trimmedLen);
+                    pos += trimmedLen;
+                    continue;
+                }
+
+                // copy regular character from outside quotes
+                workArea.append(srcChars[pos++]);
+                trimStart = workArea.size();
+            }
+        }
+
+        // return condition when end of string found
+        addToken(tokenList, workArea.substring(0, trimStart));
+        return -1;
+    }
+
+    /**
+     * Checks if the characters at the index specified match the quote
+     * already matched in readNextToken().
+     *
+     * @param srcChars  the character array being tokenized
+     * @param pos  the position to check for a quote
+     * @param len  the length of the character array being tokenized
+     * @param quoteStart  the start position of the matched quote, 0 if no quoting
+     * @param quoteLen  the length of the matched quote, 0 if no quoting
+     * @return true if a quote is matched
+     */
+    private boolean isQuote(final char[] srcChars, final int pos, final int len, final int quoteStart, final int quoteLen) {
+        for (int i = 0; i < quoteLen; i++) {
+            if (pos + i >= len || srcChars[pos + i] != srcChars[quoteStart + i]) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    // Delimiter
+    //-----------------------------------------------------------------------
+    /**
+     * Gets the field delimiter matcher.
+     *
+     * @return the delimiter matcher in use
+     */
+    public StrMatcher getDelimiterMatcher() {
+        return this.delimMatcher;
+    }
+
+    /**
+     * Sets the field delimiter matcher.
+     * <p>
+     * The delimitier is used to separate one token from another.
+     *
+     * @param delim  the delimiter matcher to use
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setDelimiterMatcher(final StrMatcher delim) {
+        if (delim == null) {
+            this.delimMatcher = StrMatcher.noneMatcher();
+        } else {
+            this.delimMatcher = delim;
+        }
+        return this;
+    }
+
+    /**
+     * Sets the field delimiter character.
+     *
+     * @param delim  the delimiter character to use
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setDelimiterChar(final char delim) {
+        return setDelimiterMatcher(StrMatcher.charMatcher(delim));
+    }
+
+    /**
+     * Sets the field delimiter string.
+     *
+     * @param delim  the delimiter string to use
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setDelimiterString(final String delim) {
+        return setDelimiterMatcher(StrMatcher.stringMatcher(delim));
+    }
+
+    // Quote
+    //-----------------------------------------------------------------------
+    /**
+     * Gets the quote matcher currently in use.
+     * <p>
+     * The quote character is used to wrap data between the tokens.
+     * This enables delimiters to be entered as data.
+     * The default value is '"' (double quote).
+     *
+     * @return the quote matcher in use
+     */
+    public StrMatcher getQuoteMatcher() {
+        return quoteMatcher;
+    }
+
+    /**
+     * Set the quote matcher to use.
+     * <p>
+     * The quote character is used to wrap data between the tokens.
+     * This enables delimiters to be entered as data.
+     *
+     * @param quote  the quote matcher to use, null ignored
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setQuoteMatcher(final StrMatcher quote) {
+        if (quote != null) {
+            this.quoteMatcher = quote;
+        }
+        return this;
+    }
+
+    /**
+     * Sets the quote character to use.
+     * <p>
+     * The quote character is used to wrap data between the tokens.
+     * This enables delimiters to be entered as data.
+     *
+     * @param quote  the quote character to use
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setQuoteChar(final char quote) {
+        return setQuoteMatcher(StrMatcher.charMatcher(quote));
+    }
+
+    // Ignored
+    //-----------------------------------------------------------------------
+    /**
+     * Gets the ignored character matcher.
+     * <p>
+     * These characters are ignored when parsing the String, unless they are
+     * within a quoted region.
+     * The default value is not to ignore anything.
+     *
+     * @return the ignored matcher in use
+     */
+    public StrMatcher getIgnoredMatcher() {
+        return ignoredMatcher;
+    }
+
+    /**
+     * Set the matcher for characters to ignore.
+     * <p>
+     * These characters are ignored when parsing the String, unless they are
+     * within a quoted region.
+     *
+     * @param ignored  the ignored matcher to use, null ignored
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setIgnoredMatcher(final StrMatcher ignored) {
+        if (ignored != null) {
+            this.ignoredMatcher = ignored;
+        }
+        return this;
+    }
+
+    /**
+     * Set the character to ignore.
+     * <p>
+     * This character is ignored when parsing the String, unless it is
+     * within a quoted region.
+     *
+     * @param ignored  the ignored character to use
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setIgnoredChar(final char ignored) {
+        return setIgnoredMatcher(StrMatcher.charMatcher(ignored));
+    }
+
+    // Trimmer
+    //-----------------------------------------------------------------------
+    /**
+     * Gets the trimmer character matcher.
+     * <p>
+     * These characters are trimmed off on each side of the delimiter
+     * until the token or quote is found.
+     * The default value is not to trim anything.
+     *
+     * @return the trimmer matcher in use
+     */
+    public StrMatcher getTrimmerMatcher() {
+        return trimmerMatcher;
+    }
+
+    /**
+     * Sets the matcher for characters to trim.
+     * <p>
+     * These characters are trimmed off on each side of the delimiter
+     * until the token or quote is found.
+     *
+     * @param trimmer  the trimmer matcher to use, null ignored
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setTrimmerMatcher(final StrMatcher trimmer) {
+        if (trimmer != null) {
+            this.trimmerMatcher = trimmer;
+        }
+        return this;
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * Gets whether the tokenizer currently returns empty tokens as null.
+     * The default for this property is false.
+     *
+     * @return true if empty tokens are returned as null
+     */
+    public boolean isEmptyTokenAsNull() {
+        return this.emptyAsNull;
+    }
+
+    /**
+     * Sets whether the tokenizer should return empty tokens as null.
+     * The default for this property is false.
+     *
+     * @param emptyAsNull  whether empty tokens are returned as null
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setEmptyTokenAsNull(final boolean emptyAsNull) {
+        this.emptyAsNull = emptyAsNull;
+        return this;
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * Gets whether the tokenizer currently ignores empty tokens.
+     * The default for this property is true.
+     *
+     * @return true if empty tokens are not returned
+     */
+    public boolean isIgnoreEmptyTokens() {
+        return ignoreEmptyTokens;
+    }
+
+    /**
+     * Sets whether the tokenizer should ignore and not return empty tokens.
+     * The default for this property is true.
+     *
+     * @param ignoreEmptyTokens  whether empty tokens are not returned
+     * @return this, to enable chaining
+     */
+    public StrTokenizer setIgnoreEmptyTokens(final boolean ignoreEmptyTokens) {
+        this.ignoreEmptyTokens = ignoreEmptyTokens;
+        return this;
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * Gets the String content that the tokenizer is parsing.
+     *
+     * @return the string content being parsed
+     */
+    public String getContent() {
+        if (chars == null) {
+            return null;
+        }
+        return new String(chars);
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * Creates a new instance of this Tokenizer. The new instance is reset so
+     * that it will be at the start of the token list.
+     * If a {@link CloneNotSupportedException} is caught, return <code>null</code>.
+     * 
+     * @return a new instance of this Tokenizer which has been reset.
+     */
+    @Override
+    public Object clone() {
+        try {
+            return cloneReset();
+        } catch (final CloneNotSupportedException ex) {
+            return null;
+        }
+    }
+
+    /**
+     * Creates a new instance of this Tokenizer. The new instance is reset so that
+     * it will be at the start of the token list.
+     * 
+     * @return a new instance of this Tokenizer which has been reset.
+     * @throws CloneNotSupportedException if there is a problem cloning
+     */
+    Object cloneReset() throws CloneNotSupportedException {
+        // this method exists to enable 100% test coverage
+        final StrTokenizer cloned = (StrTokenizer) super.clone();
+        if (cloned.chars != null) {
+            cloned.chars = cloned.chars.clone();
+        }
+        cloned.reset();
+        return cloned;
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * Gets the String content that the tokenizer is parsing.
+     *
+     * @return the string content being parsed
+     */
+    @Override
+    public String toString() {
+        if (tokens == null) {
+            return "StrTokenizer[not tokenized yet]";
+        }
+        return "StrTokenizer" + getTokenList();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/c7cf533d/src/main/java/org/apache/commons/text/StringEscapeUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/StringEscapeUtils.java b/src/main/java/org/apache/commons/text/StringEscapeUtils.java
new file mode 100644
index 0000000..aa6b071
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/StringEscapeUtils.java
@@ -0,0 +1,959 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.text.translate.AggregateTranslator;
+import org.apache.commons.text.translate.CharSequenceTranslator;
+import org.apache.commons.text.translate.CsvTranslators;
+import org.apache.commons.text.translate.EntityArrays;
+import org.apache.commons.text.translate.JavaUnicodeEscaper;
+import org.apache.commons.text.translate.LookupTranslator;
+import org.apache.commons.text.translate.NumericEntityEscaper;
+import org.apache.commons.text.translate.NumericEntityUnescaper;
+import org.apache.commons.text.translate.OctalUnescaper;
+import org.apache.commons.text.translate.SingleLookupTranslator;
+import org.apache.commons.text.translate.UnicodeUnescaper;
+import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * <p>Escapes and unescapes {@code String}s for
+ * Java, Java Script, HTML and XML.</p>
+ *
+ * <p>#ThreadSafe#</p>
+ *
+ *
+ * <p>
+ * This code has been adapted from Apache Commons Lang 3.5.
+ * </p>
+ *
+ * @since 1.0
+ */
+public class StringEscapeUtils {
+
+    /* ESCAPE TRANSLATORS */
+
+    /**
+     * Translator object for escaping Java.
+     *
+     * While {@link #escapeJava(String)} is the expected method of use, this
+     * object allows the Java escaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator ESCAPE_JAVA;
+    static {
+        Map<CharSequence, CharSequence> escapeJavaMap = new HashMap<>();
+        escapeJavaMap.put("\"", "\\\"");
+        escapeJavaMap.put("\\", "\\\\");
+        ESCAPE_JAVA = new AggregateTranslator(
+                new LookupTranslator(Collections.unmodifiableMap(escapeJavaMap)),
+                new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
+                JavaUnicodeEscaper.outsideOf(32, 0x7f)
+        );
+    }
+
+    /**
+     * Translator object for escaping EcmaScript/JavaScript.
+     *
+     * While {@link #escapeEcmaScript(String)} is the expected method of use, this
+     * object allows the EcmaScript escaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator ESCAPE_ECMASCRIPT;
+    static {
+        Map<CharSequence, CharSequence> escapeEcmaScriptMap = new HashMap<>();
+        escapeEcmaScriptMap.put("'", "\\'");
+        escapeEcmaScriptMap.put("\"", "\\\"");
+        escapeEcmaScriptMap.put("\\", "\\\\");
+        escapeEcmaScriptMap.put("/", "\\/");
+        ESCAPE_ECMASCRIPT = new AggregateTranslator(
+                new LookupTranslator(Collections.unmodifiableMap(escapeEcmaScriptMap)),
+                new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
+                JavaUnicodeEscaper.outsideOf(32, 0x7f)
+        );
+    }
+
+    /**
+     * Translator object for escaping Json.
+     *
+     * While {@link #escapeJson(String)} is the expected method of use, this
+     * object allows the Json escaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator ESCAPE_JSON;
+    static {
+        Map<CharSequence, CharSequence> escapeJsonMap = new HashMap<>();
+        escapeJsonMap.put("\"", "\\\"");
+        escapeJsonMap.put("\\", "\\\\");
+        escapeJsonMap.put("/", "\\/");
+        ESCAPE_JSON = new AggregateTranslator(
+                new LookupTranslator(Collections.unmodifiableMap(escapeJsonMap)),
+                new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
+                JavaUnicodeEscaper.outsideOf(32, 0x7f)
+        );
+    }
+
+    /**
+     * Translator object for escaping XML 1.0.
+     *
+     * While {@link #escapeXml10(String)} is the expected method of use, this
+     * object allows the XML escaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator ESCAPE_XML10;
+    static {
+        Map<CharSequence, CharSequence> escapeXml10Map = new HashMap<>();
+        escapeXml10Map.put("\u0000", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0001", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0002", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0003", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0004", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0005", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0006", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0007", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0008", StringUtils.EMPTY);
+        escapeXml10Map.put("\u000b", StringUtils.EMPTY);
+        escapeXml10Map.put("\u000c", StringUtils.EMPTY);
+        escapeXml10Map.put("\u000e", StringUtils.EMPTY);
+        escapeXml10Map.put("\u000f", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0010", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0011", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0012", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0013", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0014", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0015", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0016", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0017", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0018", StringUtils.EMPTY);
+        escapeXml10Map.put("\u0019", StringUtils.EMPTY);
+        escapeXml10Map.put("\u001a", StringUtils.EMPTY);
+        escapeXml10Map.put("\u001b", StringUtils.EMPTY);
+        escapeXml10Map.put("\u001c", StringUtils.EMPTY);
+        escapeXml10Map.put("\u001d", StringUtils.EMPTY);
+        escapeXml10Map.put("\u001e", StringUtils.EMPTY);
+        escapeXml10Map.put("\u001f", StringUtils.EMPTY);
+        escapeXml10Map.put("\ufffe", StringUtils.EMPTY);
+        escapeXml10Map.put("\uffff", StringUtils.EMPTY);
+        ESCAPE_XML10 = new AggregateTranslator(
+                new LookupTranslator(EntityArrays.BASIC_ESCAPE),
+                new LookupTranslator(EntityArrays.APOS_ESCAPE),
+                new LookupTranslator(Collections.unmodifiableMap(escapeXml10Map)),
+                NumericEntityEscaper.between(0x7f, 0x84),
+                NumericEntityEscaper.between(0x86, 0x9f),
+                new UnicodeUnpairedSurrogateRemover()
+        );
+    }
+
+    /**
+     * Translator object for escaping XML 1.1.
+     *
+     * While {@link #escapeXml11(String)} is the expected method of use, this
+     * object allows the XML escaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator ESCAPE_XML11;
+    static {
+        Map<CharSequence, CharSequence> escapeXml11Map = new HashMap<>();
+        escapeXml11Map.put("\u0000", StringUtils.EMPTY);
+        escapeXml11Map.put("\u000b", "&#11;");
+        escapeXml11Map.put("\u000c", "&#12;");
+        escapeXml11Map.put("\ufffe", StringUtils.EMPTY);
+        escapeXml11Map.put("\uffff", StringUtils.EMPTY);
+        ESCAPE_XML11 = new AggregateTranslator(
+                new LookupTranslator(EntityArrays.BASIC_ESCAPE),
+                new LookupTranslator(EntityArrays.APOS_ESCAPE),
+                new LookupTranslator(Collections.unmodifiableMap(escapeXml11Map)),
+                NumericEntityEscaper.between(0x1, 0x8),
+                NumericEntityEscaper.between(0xe, 0x1f),
+                NumericEntityEscaper.between(0x7f, 0x84),
+                NumericEntityEscaper.between(0x86, 0x9f),
+                new UnicodeUnpairedSurrogateRemover()
+        );
+    }
+
+    /**
+     * Translator object for escaping HTML version 3.0.
+     *
+     * While {@link #escapeHtml3(String)} is the expected method of use, this
+     * object allows the HTML escaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator ESCAPE_HTML3 =
+            new AggregateTranslator(
+                    new LookupTranslator(EntityArrays.BASIC_ESCAPE),
+                    new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE)
+            );
+
+    /**
+     * The improved translator object for escaping HTML version 3.0.
+     * The 'improved' part of this translator is that it checks if the html is already translated.
+     * This check prevents double, triple, or recursive translations.
+     *
+     * While {@link #escapeHtml3Once(String)} is the expected method of use, this
+     * object allows the HTML escaping functionality to be used
+     * as the foundation for a custom translator.
+     *
+     * Note that, multiple lookup tables should be passed to this translator
+     * instead of passing multiple instances of this translator to the
+     * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the
+     * lookup table passed to that instance while deciding whether a value is
+     * already translated or not.
+     */
+    public static final CharSequenceTranslator ESCAPE_HTML3_ONCE =
+            new SingleLookupTranslator(EntityArrays.BASIC_ESCAPE, EntityArrays.ISO8859_1_ESCAPE);
+
+
+    /**
+     * Translator object for escaping HTML version 4.0.
+     *
+     * While {@link #escapeHtml4(String)} is the expected method of use, this
+     * object allows the HTML escaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator ESCAPE_HTML4 =
+            new AggregateTranslator(
+                    new LookupTranslator(EntityArrays.BASIC_ESCAPE),
+                    new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE),
+                    new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE)
+            );
+
+    /**
+     * The improved translator object for escaping HTML version 4.0.
+     * The 'improved' part of this translator is that it checks if the html is already translated.
+     * This check prevents double, triple, or recursive translations.
+     *
+     * While {@link #escapeHtml4Once(String)} is the expected method of use, this
+     * object allows the HTML escaping functionality to be used
+     * as the foundation for a custom translator.
+     *
+     * Note that, multiple lookup tables should be passed to this translator
+     * instead of passing multiple instances of this translator to the
+     * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the
+     * lookup table passed to that instance while deciding whether a value is
+     * already translated or not.
+     */
+    public static final CharSequenceTranslator ESCAPE_HTML4_ONCE =
+            new SingleLookupTranslator(
+                    EntityArrays.BASIC_ESCAPE,
+                    EntityArrays.ISO8859_1_ESCAPE,
+                    EntityArrays.HTML40_EXTENDED_ESCAPE
+            );
+
+    /**
+     * Translator object for escaping individual Comma Separated Values.
+     *
+     * While {@link #escapeCsv(String)} is the expected method of use, this
+     * object allows the CSV escaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator ESCAPE_CSV = new CsvTranslators.CsvEscaper();
+
+    /**
+     * Translator object for escaping Shell command language.
+     *
+     * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
+     */
+    public static final CharSequenceTranslator ESCAPE_XSI;
+    static {
+        Map<CharSequence, CharSequence> escapeXsiMap = new HashMap<>();
+        escapeXsiMap.put("|", "\\|");
+        escapeXsiMap.put("&", "\\&");
+        escapeXsiMap.put(";", "\\;");
+        escapeXsiMap.put("<", "\\<");
+        escapeXsiMap.put(">", "\\>");
+        escapeXsiMap.put("(", "\\(");
+        escapeXsiMap.put(")", "\\)");
+        escapeXsiMap.put("$", "\\$");
+        escapeXsiMap.put("`", "\\`");
+        escapeXsiMap.put("\\", "\\\\");
+        escapeXsiMap.put("\"", "\\\"");
+        escapeXsiMap.put("'", "\\'");
+        escapeXsiMap.put(" ", "\\ ");
+        escapeXsiMap.put("\t", "\\\t");
+        escapeXsiMap.put("\r\n", "");
+        escapeXsiMap.put("\n", "");
+        escapeXsiMap.put("*", "\\*");
+        escapeXsiMap.put("?", "\\?");
+        escapeXsiMap.put("[", "\\[");
+        escapeXsiMap.put("#", "\\#");
+        escapeXsiMap.put("~", "\\~");
+        escapeXsiMap.put("=", "\\=");
+        escapeXsiMap.put("%", "\\%");
+        ESCAPE_XSI = new LookupTranslator(
+                Collections.unmodifiableMap(escapeXsiMap)
+        );
+    }
+
+    /* UNESCAPE TRANSLATORS */
+
+    /**
+     * Translator object for unescaping escaped Java.
+     *
+     * While {@link #unescapeJava(String)} is the expected method of use, this
+     * object allows the Java unescaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
+    public static final CharSequenceTranslator UNESCAPE_JAVA;
+    static {
+        Map<CharSequence, CharSequence> unescapeJavaMap = new HashMap<>();
+        unescapeJavaMap.put("\\\\", "\\");
+        unescapeJavaMap.put("\\\"", "\"");
+        unescapeJavaMap.put("\\'", "'");
+        unescapeJavaMap.put("\\", "");
+        UNESCAPE_JAVA = new AggregateTranslator(
+                new OctalUnescaper(),     // .between('\1', '\377'),
+                new UnicodeUnescaper(),
+                new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE),
+                new LookupTranslator(Collections.unmodifiableMap(unescapeJavaMap))
+        );
+    }
+
+    /**
+     * Translator object for unescaping escaped EcmaScript.
+     *
+     * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
+     * object allows the EcmaScript unescaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
+
+    /**
+     * Translator object for unescaping escaped Json.
+     *
+     * While {@link #unescapeJson(String)} is the expected method of use, this
+     * object allows the Json unescaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
+
+    /**
+     * Translator object for unescaping escaped HTML 3.0.
+     *
+     * While {@link #unescapeHtml3(String)} is the expected method of use, this
+     * object allows the HTML unescaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator UNESCAPE_HTML3 =
+            new AggregateTranslator(
+                    new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
+                    new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE),
+                    new NumericEntityUnescaper()
+            );
+
+    /**
+     * Translator object for unescaping escaped HTML 4.0.
+     *
+     * While {@link #unescapeHtml4(String)} is the expected method of use, this
+     * object allows the HTML unescaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator UNESCAPE_HTML4 =
+            new AggregateTranslator(
+                    new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
+                    new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE),
+                    new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE),
+                    new NumericEntityUnescaper()
+            );
+
+    /**
+     * Translator object for unescaping escaped XML.
+     *
+     * While {@link #unescapeXml(String)} is the expected method of use, this
+     * object allows the XML unescaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator UNESCAPE_XML =
+            new AggregateTranslator(
+                    new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
+                    new LookupTranslator(EntityArrays.APOS_UNESCAPE),
+                    new NumericEntityUnescaper()
+            );
+
+    /**
+     * Translator object for unescaping escaped Comma Separated Value entries.
+     *
+     * While {@link #unescapeCsv(String)} is the expected method of use, this
+     * object allows the CSV unescaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator UNESCAPE_CSV = new CsvTranslators.CsvUnescaper();
+
+    /**
+     * Translator object for unescaping escaped XSI Value entries.
+     *
+     * While {@link #unescapeXSI(String)}  is the expected method of use, this
+     * object allows the XSI unescaping functionality to be used
+     * as the foundation for a custom translator.
+     */
+    public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper();
+
+    /**
+     * Translator object for unescaping backslash escaped entries.
+     */
+    static class XsiUnescaper extends CharSequenceTranslator {
+
+        /**
+         * Escaped backslash constant.
+         */
+        private static final char BACKSLASH = '\\';
+
+        @Override
+        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
+
+            if (index != 0) {
+                throw new IllegalStateException("XsiUnescaper should never reach the [1] index");
+            }
+
+            String s = input.toString();
+
+            int segmentStart = 0;
+            int searchOffset = 0;
+            while (true) {
+                int pos = s.indexOf(BACKSLASH, searchOffset);
+                if (pos == -1) {
+                    if (segmentStart < s.length()) {
+                        out.write(s.substring(segmentStart));
+                    }
+                    break;
+                }
+                if (pos > segmentStart) {
+                    out.write(s.substring(segmentStart, pos));
+                }
+                segmentStart = pos + 1;
+                searchOffset = pos + 2;
+            }
+
+            return Character.codePointCount(input, 0, input.length());
+        }
+    }
+
+    /* Helper functions */
+
+    /**
+     * <p>{@code StringEscapeUtils} instances should NOT be constructed in
+     * standard programming.</p>
+     *
+     * <p>Instead, the class should be used as:</p>
+     * <pre>StringEscapeUtils.escapeJava("foo");</pre>
+     *
+     * <p>This constructor is public to permit tools that require a JavaBean
+     * instance to operate.</p>
+     */
+    public StringEscapeUtils() {
+        super();
+    }
+
+    /**
+     * <p>Convenience wrapper for {@link java.lang.StringBuilder} providing escape methods.</p>
+     *
+     * <p>Example:</p>
+     * <pre>
+     * new Builder(ESCAPE_HTML4)
+     *      .append("&lt;p&gt;")
+     *      .escape("This is paragraph 1 and special chars like &amp; get escaped.")
+     *      .append("&lt;/p&gt;&lt;p&gt;")
+     *      .escape("This is paragraph 2 &amp; more...")
+     *      .append("&lt;/p&gt;")
+     *      .toString()
+     * </pre>
+     *
+     */
+    public static final class Builder {
+
+        /**
+         * StringBuilder to be used in the Builder class.
+         */
+        private final StringBuilder sb;
+
+        /**
+         * CharSequenceTranslator to be used in the Builder class.
+         */
+        private final CharSequenceTranslator translator;
+
+        /**
+         * Builder constructor.
+         *
+         * @param translator a CharSequenceTranslator.
+         */
+        private Builder(final CharSequenceTranslator translator) {
+            this.sb = new StringBuilder();
+            this.translator = translator;
+        }
+
+        /**
+         * <p>Escape {@code input} according to the given {@link CharSequenceTranslator}.</p>
+         *
+         * @param input the String to escape
+         * @return {@code this}, to enable chaining
+         */
+        public Builder escape(final String input) {
+            sb.append(translator.translate(input));
+            return this;
+        }
+
+        /**
+         * Literal append, no escaping being done.
+         *
+         * @param input the String to append
+         * @return {@code this}, to enable chaining
+         */
+        public Builder append(final String input) {
+            sb.append(input);
+            return this;
+        }
+
+        /**
+         * <p>Return the escaped string.</p>
+         *
+         * @return the escaped string
+         */
+        @Override
+        public String toString() {
+            return sb.toString();
+        }
+    }
+
+    /**
+     * Get a {@link Builder}.
+     * @param translator the text translator
+     * @return {@link Builder}
+     */
+    public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) {
+        return new Builder(translator);
+    }
+
+    // Java and JavaScript
+    //--------------------------------------------------------------------------
+    /**
+     * <p>Escapes the characters in a {@code String} using Java String rules.</p>
+     *
+     * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
+     *
+     * <p>So a tab becomes the characters {@code '\\'} and
+     * {@code 't'}.</p>
+     *
+     * <p>The only difference between Java strings and JavaScript strings
+     * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
+     *
+     * <p>Example:</p>
+     * <pre>
+     * input string: He didn't say, "Stop!"
+     * output string: He didn't say, \"Stop!\"
+     * </pre>
+     *
+     * @param input  String to escape values in, may be null
+     * @return String with escaped values, {@code null} if null string input
+     */
+    public static final String escapeJava(final String input) {
+        return ESCAPE_JAVA.translate(input);
+    }
+
+    /**
+     * <p>Escapes the characters in a {@code String} using EcmaScript String rules.</p>
+     * <p>Escapes any values it finds into their EcmaScript String form.
+     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
+     *
+     * <p>So a tab becomes the characters {@code '\\'} and
+     * {@code 't'}.</p>
+     *
+     * <p>The only difference between Java strings and EcmaScript strings
+     * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
+     *
+     * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects. </p>
+     *
+     * <p>Example:</p>
+     * <pre>
+     * input string: He didn't say, "Stop!"
+     * output string: He didn\'t say, \"Stop!\"
+     * </pre>
+     *
+     * <b>Security Note.</b> We only provide backslash escaping in this method. For example, {@code '\"'} has the output
+     * {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used
+     * in an HTML tag like {@code <select onmouseover="..." />}. If you wish to have more rigorous string escaping, you
+     * may consider the
+     * <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI Libraries</a>.
+     * Further, you can view the <a href="https://github.com/esapi">ESAPI GitHub Org</a>.
+     *
+     * @param input  String to escape values in, may be null
+     * @return String with escaped values, {@code null} if null string input
+     */
+    public static final String escapeEcmaScript(final String input) {
+        return ESCAPE_ECMASCRIPT.translate(input);
+    }
+
+    /**
+     * <p>Escapes the characters in a {@code String} using Json String rules.</p>
+     * <p>Escapes any values it finds into their Json String form.
+     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
+     *
+     * <p>So a tab becomes the characters {@code '\\'} and
+     * {@code 't'}.</p>
+     *
+     * <p>The only difference between Java strings and Json strings
+     * is that in Json, forward-slash (/) is escaped.</p>
+     *
+     * <p>See http://www.ietf.org/rfc/rfc4627.txt for further details. </p>
+     *
+     * <p>Example:</p>
+     * <pre>
+     * input string: He didn't say, "Stop!"
+     * output string: He didn't say, \"Stop!\"
+     * </pre>
+     *
+     * @param input  String to escape values in, may be null
+     * @return String with escaped values, {@code null} if null string input
+     */
+    public static final String escapeJson(final String input) {
+        return ESCAPE_JSON.translate(input);
+    }
+
+    /**
+     * <p>Unescapes any Java literals found in the {@code String}.
+     * For example, it will turn a sequence of {@code '\'} and
+     * {@code 'n'} into a newline character, unless the {@code '\'}
+     * is preceded by another {@code '\'}.</p>
+     *
+     * @param input  the {@code String} to unescape, may be null
+     * @return a new unescaped {@code String}, {@code null} if null string input
+     */
+    public static final String unescapeJava(final String input) {
+        return UNESCAPE_JAVA.translate(input);
+    }
+
+    /**
+     * <p>Unescapes any EcmaScript literals found in the {@code String}.</p>
+     *
+     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
+     * into a newline character, unless the {@code '\'} is preceded by another
+     * {@code '\'}.</p>
+     *
+     * @see #unescapeJava(String)
+     * @param input  the {@code String} to unescape, may be null
+     * @return A new unescaped {@code String}, {@code null} if null string input
+     */
+    public static final String unescapeEcmaScript(final String input) {
+        return UNESCAPE_ECMASCRIPT.translate(input);
+    }
+
+    /**
+     * <p>Unescapes any Json literals found in the {@code String}.</p>
+     *
+     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
+     * into a newline character, unless the {@code '\'} is preceded by another
+     * {@code '\'}.</p>
+     *
+     * @see #unescapeJava(String)
+     * @param input  the {@code String} to unescape, may be null
+     * @return A new unescaped {@code String}, {@code null} if null string input
+     */
+    public static final String unescapeJson(final String input) {
+        return UNESCAPE_JSON.translate(input);
+    }
+
+    // HTML and XML
+    //--------------------------------------------------------------------------
+    /**
+     * <p>Escapes the characters in a {@code String} using HTML entities.</p>
+     *
+     * <p>
+     * For example:
+     * </p>
+     * <p><code>"bread" &amp; "butter"</code></p>
+     * becomes:
+     * <p>
+     * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
+     * </p>
+     *
+     * <p>Supports all known HTML 4.0 entities, including funky accents.
+     * Note that the commonly used apostrophe escape character (&amp;apos;)
+     * is not a legal entity and so is not supported). </p>
+     *
+     * @param input  the {@code String} to escape, may be null
+     * @return a new escaped {@code String}, {@code null} if null string input
+     *
+     * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
+     * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
+     * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
+     * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
+     * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
+     */
+    public static final String escapeHtml4(final String input) {
+        return ESCAPE_HTML4.translate(input);
+    }
+
+    /**
+     * <p>Escapes the characters in a {@code String} using HTML entities.
+     * But escapes them only once. i.e. does not escape already escaped characters.</p>
+     *
+     * <p>
+     * For example:
+     * </p>
+     * <p><code>"bread" &amp; "butter"</code></p>
+     * becomes:
+     * <p>
+     * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
+     * </p>
+     *
+     * <p>
+     * But:
+     * </p>
+     * <p><code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code></p>
+     * remains unaffected.
+     *
+     * <p>Supports all known HTML 4.0 entities, including funky accents.
+     * Note that the commonly used apostrophe escape character (&amp;apos;)
+     * is not a legal entity and so is not supported). </p>
+     *
+     * @param input  the {@code String} to escape, may be null
+     * @return a new escaped {@code String}, {@code null} if null string input
+     *
+     * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
+     * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
+     * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
+     * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
+     * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
+     */
+    public static final String escapeHtml4Once(final String input) {
+        return ESCAPE_HTML4_ONCE.translate(input);
+    }
+
+
+    /**
+     * <p>Escapes the characters in a {@code String} using HTML entities.</p>
+     * <p>Supports only the HTML 3.0 entities. </p>
+     *
+     * @param input  the {@code String} to escape, may be null
+     * @return a new escaped {@code String}, {@code null} if null string input
+     */
+    public static final String escapeHtml3(final String input) {
+        return ESCAPE_HTML3.translate(input);
+    }
+
+    /**
+     * <p>Escapes the characters in a {@code String} using HTML entities.
+     * But escapes them only once. i.e. does not escape already escaped characters.</p>
+     * <p>Supports only the HTML 3.0 entities. </p>
+     *
+     * @param input  the {@code String} to escape, may be null
+     * @return a new escaped {@code String}, {@code null} if null string input
+     */
+    public static final String escapeHtml3Once(final String input) {
+        return ESCAPE_HTML3_ONCE.translate(input);
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * <p>Unescapes a string containing entity escapes to a string
+     * containing the actual Unicode characters corresponding to the
+     * escapes. Supports HTML 4.0 entities.</p>
+     *
+     * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
+     * will become {@code "<Fran´┐Żais>"}</p>
+     *
+     * <p>If an entity is unrecognized, it is left alone, and inserted
+     * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
+     * become {@code ">&zzzz;x"}.</p>
+     *
+     * @param input  the {@code String} to unescape, may be null
+     * @return a new unescaped {@code String}, {@code null} if null string input
+     */
+    public static final String unescapeHtml4(final String input) {
+        return UNESCAPE_HTML4.translate(input);
+    }
+
+    /**
+     * <p>Unescapes a string containing entity escapes to a string
+     * containing the actual Unicode characters corresponding to the
+     * escapes. Supports only HTML 3.0 entities.</p>
+     *
+     * @param input  the {@code String} to unescape, may be null
+     * @return a new unescaped {@code String}, {@code null} if null string input
+     */
+    public static final String unescapeHtml3(final String input) {
+        return UNESCAPE_HTML3.translate(input);
+    }
+
+    /**
+     * <p>Escapes the characters in a {@code String} using XML entities.</p>
+     *
+     * <p>For example: {@code "bread" & "butter"} =&gt;
+     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
+     * </p>
+     *
+     * <p>Note that XML 1.0 is a text-only format: it cannot represent control
+     * characters or unpaired Unicode surrogate codepoints, even after escaping.
+     * {@code escapeXml10} will remove characters that do not fit in the
+     * following ranges:</p>
+     *
+     * <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
+     *
+     * <p>Though not strictly necessary, {@code escapeXml10} will escape
+     * characters in the following ranges:</p>
+     *
+     * <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
+     *
+     * <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
+     * document. If you want to allow more non-text characters in an XML 1.1
+     * document, use {@link #escapeXml11(String)}.</p>
+     *
+     * @param input  the {@code String} to escape, may be null
+     * @return a new escaped {@code String}, {@code null} if null string input
+     * @see #unescapeXml(java.lang.String)
+     */
+    public static String escapeXml10(final String input) {
+        return ESCAPE_XML10.translate(input);
+    }
+
+    /**
+     * <p>Escapes the characters in a {@code String} using XML entities.</p>
+     *
+     * <p>For example: {@code "bread" & "butter"} =&gt;
+     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
+     * </p>
+     *
+     * <p>XML 1.1 can represent certain control characters, but it cannot represent
+     * the null byte or unpaired Unicode surrogate codepoints, even after escaping.
+     * {@code escapeXml11} will remove characters that do not fit in the following
+     * ranges:</p>
+     *
+     * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
+     *
+     * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
+     *
+     * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
+     *
+     * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
+     * use it for XML 1.0 documents.</p>
+     *
+     * @param input  the {@code String} to escape, may be null
+     * @return a new escaped {@code String}, {@code null} if null string input
+     * @see #unescapeXml(java.lang.String)
+     */
+    public static String escapeXml11(final String input) {
+        return ESCAPE_XML11.translate(input);
+    }
+
+    //-----------------------------------------------------------------------
+    /**
+     * <p>Unescapes a string containing XML entity escapes to a string
+     * containing the actual Unicode characters corresponding to the
+     * escapes.</p>
+     *
+     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
+     * Does not support DTDs or external entities.</p>
+     *
+     * <p>Note that numerical \\u Unicode codes are unescaped to their respective
+     *    Unicode characters. This may change in future releases. </p>
+     *
+     * @param input  the {@code String} to unescape, may be null
+     * @return a new unescaped {@code String}, {@code null} if null string input
+     * @see #escapeXml10(String)
+     * @see #escapeXml11(String)
+     */
+    public static final String unescapeXml(final String input) {
+        return UNESCAPE_XML.translate(input);
+    }
+
+    //-----------------------------------------------------------------------
+
+    /**
+     * <p>Returns a {@code String} value for a CSV column enclosed in double quotes,
+     * if required.</p>
+     *
+     * <p>If the value contains a comma, newline or double quote, then the
+     *    String value is returned enclosed in double quotes.</p>
+     *
+     * <p>Any double quote characters in the value are escaped with another double quote.</p>
+     *
+     * <p>If the value does not contain a comma, newline or double quote, then the
+     *    String value is returned unchanged.</p>
+     *
+     * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
+     * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
+     *
+     * @param input the input CSV column String, may be null
+     * @return the input String, enclosed in double quotes if the value contains a comma,
+     * newline or double quote, {@code null} if null string input
+     */
+    public static final String escapeCsv(final String input) {
+        return ESCAPE_CSV.translate(input);
+    }
+
+    /**
+     * <p>Returns a {@code String} value for an unescaped CSV column. </p>
+     *
+     * <p>If the value is enclosed in double quotes, and contains a comma, newline
+     *    or double quote, then quotes are removed.
+     * </p>
+     *
+     * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
+     *    to just one double quote. </p>
+     *
+     * <p>If the value is not enclosed in double quotes, or is and does not contain a
+     *    comma, newline or double quote, then the String value is returned unchanged.</p>
+     *
+     * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
+     * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
+     *
+     * @param input the input CSV column String, may be null
+     * @return the input String, with enclosing double quotes removed and embedded double
+     * quotes unescaped, {@code null} if null string input
+     */
+    public static final String unescapeCsv(final String input) {
+        return UNESCAPE_CSV.translate(input);
+    }
+
+    /**
+     * <p>Escapes the characters in a {@code String} using XSI rules.</p>
+     *
+     * <p><b>Beware!</b> In most cases you don't want to escape shell commands but use multi-argument
+     * methods provided by {@link java.lang.ProcessBuilder} or {@link java.lang.Runtime#exec(String[])}
+     * instead.</p>
+     *
+     * <p>Example:</p>
+     * <pre>
+     * input string: He didn't say, "Stop!"
+     * output string: He\ didn\'t\ say,\ \"Stop!\"
+     * </pre>
+     *
+     * @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
+     * @param input  String to escape values in, may be null
+     * @return String with escaped values, {@code null} if null string input
+     */
+    public static final String escapeXSI(final String input) {
+        return ESCAPE_XSI.translate(input);
+    }
+
+    /**
+     * <p>Unescapes the characters in a {@code String} using XSI rules.</p>
+     *
+     * @see StringEscapeUtils#escapeXSI(String)
+     * @param input  the {@code String} to unescape, may be null
+     * @return a new unescaped {@code String}, {@code null} if null string input
+     */
+    public static final String unescapeXSI(final String input) {
+        return UNESCAPE_XSI.translate(input);
+    }
+
+}


Mime
View raw message