Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id AFF7E200C50 for ; Sat, 4 Mar 2017 01:59:34 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id AEA9E160B8A; Sat, 4 Mar 2017 00:59:34 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 8D9C9160B87 for ; Sat, 4 Mar 2017 01:59:32 +0100 (CET) Received: (qmail 83139 invoked by uid 500); 4 Mar 2017 00:59:28 -0000 Mailing-List: contact commits-help@commons.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@commons.apache.org Delivered-To: mailing list commits@commons.apache.org Received: (qmail 81627 invoked by uid 99); 4 Mar 2017 00:59:27 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 04 Mar 2017 00:59:27 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 094F2E0A38; Sat, 4 Mar 2017 00:59:27 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: chtompki@apache.org To: commits@commons.apache.org Date: Sat, 04 Mar 2017 00:59:37 -0000 Message-Id: <311262f5deae4264975fe1e96bd71511@git.apache.org> In-Reply-To: <0121dac168374761a8d75526dc08d9f8@git.apache.org> References: <0121dac168374761a8d75526dc08d9f8@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [12/50] [abbrv] [text] chore: update packages back to org.apache.commons.text.* archived-at: Sat, 04 Mar 2017 00:59:34 -0000 http://git-wip-us.apache.org/repos/asf/commons-text/blob/c7cf533d/src/test/java/org/apache/commons/text/StrSubstitutorTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/StrSubstitutorTest.java b/src/test/java/org/apache/commons/text/StrSubstitutorTest.java new file mode 100644 index 0000000..e382691 --- /dev/null +++ b/src/test/java/org/apache/commons/text/StrSubstitutorTest.java @@ -0,0 +1,740 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.text; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import org.apache.commons.lang3.mutable.MutableObject; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Test class for {@link StrSubstitutor}. + */ +public class StrSubstitutorTest { + + private Map values; + + @Before + public void setUp() throws Exception { + values = new HashMap<>(); + values.put("animal", "quick brown fox"); + values.put("target", "lazy dog"); + } + + @After + public void tearDown() throws Exception { + values = null; + } + + //----------------------------------------------------------------------- + /** + * Tests simple key replace. + */ + @Test + public void testReplaceSimple() { + doTestReplace("The quick brown fox jumps over the lazy dog.", "The ${animal} jumps over the ${target}.", true); + } + + /** + * Tests simple key replace. + */ + @Test + public void testReplaceSolo() { + doTestReplace("quick brown fox", "${animal}", false); + } + + /** + * Tests replace with no variables. + */ + @Test + public void testReplaceNoVariables() { + doTestNoReplace("The balloon arrived."); + } + + /** + * Tests replace with null. + */ + @Test + public void testReplaceNull() { + doTestNoReplace(null); + } + + /** + * Tests replace with null. + */ + @Test + public void testReplaceEmpty() { + doTestNoReplace(""); + } + + /** + * Tests key replace changing map after initialization (not recommended). + */ + @Test + public void testReplaceChangedMap() { + final StrSubstitutor sub = new StrSubstitutor(values); + values.put("target", "moon"); + assertEquals("The quick brown fox jumps over the moon.", sub.replace("The ${animal} jumps over the ${target}.")); + } + + /** + * Tests unknown key replace. + */ + @Test + public void testReplaceUnknownKey() { + doTestReplace("The ${person} jumps over the lazy dog.", "The ${person} jumps over the ${target}.", true); + doTestReplace("The ${person} jumps over the lazy dog. 1234567890.", "The ${person} jumps over the ${target}. ${undefined.number:-1234567890}.", true); + } + + /** + * Tests adjacent keys. + */ + @Test + public void testReplaceAdjacentAtStart() { + values.put("code", "GBP"); + values.put("amount", "12.50"); + final StrSubstitutor sub = new StrSubstitutor(values); + assertEquals("GBP12.50 charged", sub.replace("${code}${amount} charged")); + } + + /** + * Tests adjacent keys. + */ + @Test + public void testReplaceAdjacentAtEnd() { + values.put("code", "GBP"); + values.put("amount", "12.50"); + final StrSubstitutor sub = new StrSubstitutor(values); + assertEquals("Amount is GBP12.50", sub.replace("Amount is ${code}${amount}")); + } + + /** + * Tests simple recursive replace. + */ + @Test + public void testReplaceRecursive() { + values.put("animal", "${critter}"); + values.put("target", "${pet}"); + values.put("pet", "${petCharacteristic} dog"); + values.put("petCharacteristic", "lazy"); + values.put("critter", "${critterSpeed} ${critterColor} ${critterType}"); + values.put("critterSpeed", "quick"); + values.put("critterColor", "brown"); + values.put("critterType", "fox"); + doTestReplace("The quick brown fox jumps over the lazy dog.", "The ${animal} jumps over the ${target}.", true); + + values.put("pet", "${petCharacteristicUnknown:-lazy} dog"); + doTestReplace("The quick brown fox jumps over the lazy dog.", "The ${animal} jumps over the ${target}.", true); + } + + /** + * Tests escaping. + */ + @Test + public void testReplaceEscaping() { + doTestReplace("The ${animal} jumps over the lazy dog.", "The $${animal} jumps over the ${target}.", true); + } + + /** + * Tests escaping. + */ + @Test + public void testReplaceSoloEscaping() { + doTestReplace("${animal}", "$${animal}", false); + } + + /** + * Tests complex escaping. + */ + @Test + public void testReplaceComplexEscaping() { + doTestReplace("The ${quick brown fox} jumps over the lazy dog.", "The $${${animal}} jumps over the ${target}.", true); + doTestReplace("The ${quick brown fox} jumps over the lazy dog. ${1234567890}.", "The $${${animal}} jumps over the ${target}. $${${undefined.number:-1234567890}}.", true); + } + + /** + * Tests when no prefix or suffix. + */ + @Test + public void testReplaceNoPrefixNoSuffix() { + doTestReplace("The animal jumps over the lazy dog.", "The animal jumps over the ${target}.", true); + } + + /** + * Tests when no incomplete prefix. + */ + @Test + public void testReplaceIncompletePrefix() { + doTestReplace("The {animal} jumps over the lazy dog.", "The {animal} jumps over the ${target}.", true); + } + + /** + * Tests when prefix but no suffix. + */ + @Test + public void testReplacePrefixNoSuffix() { + doTestReplace("The ${animal jumps over the ${target} lazy dog.", "The ${animal jumps over the ${target} ${target}.", true); + } + + /** + * Tests when suffix but no prefix. + */ + @Test + public void testReplaceNoPrefixSuffix() { + doTestReplace("The animal} jumps over the lazy dog.", "The animal} jumps over the ${target}.", true); + } + + /** + * Tests when no variable name. + */ + @Test + public void testReplaceEmptyKeys() { + doTestReplace("The ${} jumps over the lazy dog.", "The ${} jumps over the ${target}.", true); + doTestReplace("The animal jumps over the lazy dog.", "The ${:-animal} jumps over the ${target}.", true); + } + + /** + * Tests replace creates output same as input. + */ + @Test + public void testReplaceToIdentical() { + values.put("animal", "$${${thing}}"); + values.put("thing", "animal"); + doTestReplace("The ${animal} jumps.", "The ${animal} jumps.", true); + } + + /** + * Tests a cyclic replace operation. + * The cycle should be detected and cause an exception to be thrown. + */ + @Test + public void testCyclicReplacement() { + final Map map = new HashMap<>(); + map.put("animal", "${critter}"); + map.put("target", "${pet}"); + map.put("pet", "${petCharacteristic} dog"); + map.put("petCharacteristic", "lazy"); + map.put("critter", "${critterSpeed} ${critterColor} ${critterType}"); + map.put("critterSpeed", "quick"); + map.put("critterColor", "brown"); + map.put("critterType", "${animal}"); + StrSubstitutor sub = new StrSubstitutor(map); + try { + sub.replace("The ${animal} jumps over the ${target}."); + fail("Cyclic replacement was not detected!"); + } catch (final IllegalStateException ex) { + // expected + } + + // also check even when default value is set. + map.put("critterType", "${animal:-fox}"); + sub = new StrSubstitutor(map); + try { + sub.replace("The ${animal} jumps over the ${target}."); + fail("Cyclic replacement was not detected!"); + } catch (final IllegalStateException ex) { + // expected + } + } + + /** + * Tests interpolation with weird boundary patterns. + */ + @Test + public void testReplaceWeirdPattens() { + doTestNoReplace(""); + doTestNoReplace("${}"); + doTestNoReplace("${ }"); + doTestNoReplace("${\t}"); + doTestNoReplace("${\n}"); + doTestNoReplace("${\b}"); + doTestNoReplace("${"); + doTestNoReplace("$}"); + doTestNoReplace("}"); + doTestNoReplace("${}$"); + doTestNoReplace("${${"); + doTestNoReplace("${${}}"); + doTestNoReplace("${$${}}"); + doTestNoReplace("${$$${}}"); + doTestNoReplace("${$$${$}}"); + doTestNoReplace("${${}}"); + doTestNoReplace("${${ }}"); + } + + /** + * Tests simple key replace. + */ + @Test + public void testReplacePartialString_noReplace() { + final StrSubstitutor sub = new StrSubstitutor(); + assertEquals("${animal} jumps", sub.replace("The ${animal} jumps over the ${target}.", 4, 15)); + } + + /** + * Tests whether a variable can be replaced in a variable name. + */ + @Test + public void testReplaceInVariable() { + values.put("animal.1", "fox"); + values.put("animal.2", "mouse"); + values.put("species", "2"); + final StrSubstitutor sub = new StrSubstitutor(values); + sub.setEnableSubstitutionInVariables(true); + assertEquals( + "Wrong result (1)", + "The mouse jumps over the lazy dog.", + sub.replace("The ${animal.${species}} jumps over the ${target}.")); + values.put("species", "1"); + assertEquals( + "Wrong result (2)", + "The fox jumps over the lazy dog.", + sub.replace("The ${animal.${species}} jumps over the ${target}.")); + assertEquals( + "Wrong result (3)", + "The fox jumps over the lazy dog.", + sub.replace("The ${unknown.animal.${unknown.species:-1}:-fox} jumps over the ${unknow.target:-lazy dog}.")); + } + + /** + * Tests whether substitution in variable names is disabled per default. + */ + @Test + public void testReplaceInVariableDisabled() { + values.put("animal.1", "fox"); + values.put("animal.2", "mouse"); + values.put("species", "2"); + final StrSubstitutor sub = new StrSubstitutor(values); + assertEquals( + "Wrong result (1)", + "The ${animal.${species}} jumps over the lazy dog.", + sub.replace("The ${animal.${species}} jumps over the ${target}.")); + assertEquals( + "Wrong result (2)", + "The ${animal.${species:-1}} jumps over the lazy dog.", + sub.replace("The ${animal.${species:-1}} jumps over the ${target}.")); + } + + /** + * Tests complex and recursive substitution in variable names. + */ + @Test + public void testReplaceInVariableRecursive() { + values.put("animal.2", "brown fox"); + values.put("animal.1", "white mouse"); + values.put("color", "white"); + values.put("species.white", "1"); + values.put("species.brown", "2"); + final StrSubstitutor sub = new StrSubstitutor(values); + sub.setEnableSubstitutionInVariables(true); + assertEquals( + "Wrong result (1)", + "The white mouse jumps over the lazy dog.", + sub.replace("The ${animal.${species.${color}}} jumps over the ${target}.")); + assertEquals( + "Wrong result (2)", + "The brown fox jumps over the lazy dog.", + sub.replace("The ${animal.${species.${unknownColor:-brown}}} jumps over the ${target}.")); + } + + @Test + public void testDefaultValueDelimiters() { + final Map map = new HashMap<>(); + map.put("animal", "fox"); + map.put("target", "dog"); + + StrSubstitutor sub = new StrSubstitutor(map, "${", "}", '$'); + assertEquals("The fox jumps over the lazy dog. 1234567890.", + sub.replace("The ${animal} jumps over the lazy ${target}. ${undefined.number:-1234567890}.")); + + sub = new StrSubstitutor(map, "${", "}", '$', "?:"); + assertEquals("The fox jumps over the lazy dog. 1234567890.", + sub.replace("The ${animal} jumps over the lazy ${target}. ${undefined.number?:1234567890}.")); + + sub = new StrSubstitutor(map, "${", "}", '$', "||"); + assertEquals("The fox jumps over the lazy dog. 1234567890.", + sub.replace("The ${animal} jumps over the lazy ${target}. ${undefined.number||1234567890}.")); + + sub = new StrSubstitutor(map, "${", "}", '$', "!"); + assertEquals("The fox jumps over the lazy dog. 1234567890.", + sub.replace("The ${animal} jumps over the lazy ${target}. ${undefined.number!1234567890}.")); + + sub = new StrSubstitutor(map, "${", "}", '$', ""); + sub.setValueDelimiterMatcher(null); + assertEquals("The fox jumps over the lazy dog. ${undefined.number!1234567890}.", + sub.replace("The ${animal} jumps over the lazy ${target}. ${undefined.number!1234567890}.")); + + sub = new StrSubstitutor(map, "${", "}", '$'); + sub.setValueDelimiterMatcher(null); + assertEquals("The fox jumps over the lazy dog. ${undefined.number!1234567890}.", + sub.replace("The ${animal} jumps over the lazy ${target}. ${undefined.number!1234567890}.")); + } + + //----------------------------------------------------------------------- + /** + * Tests protected. + */ + @Test + public void testResolveVariable() { + final StrBuilder builder = new StrBuilder("Hi ${name}!"); + final Map map = new HashMap<>(); + map.put("name", "commons"); + final StrSubstitutor sub = new StrSubstitutor(map) { + @Override + protected String resolveVariable(final String variableName, final StrBuilder buf, final int startPos, final int endPos) { + assertEquals("name", variableName); + assertSame(builder, buf); + assertEquals(3, startPos); + assertEquals(10, endPos); + return "jakarta"; + } + }; + sub.replaceIn(builder); + assertEquals("Hi jakarta!", builder.toString()); + } + + //----------------------------------------------------------------------- + /** + * Tests constructor. + */ + @Test + public void testConstructorNoArgs() { + final StrSubstitutor sub = new StrSubstitutor(); + assertEquals("Hi ${name}", sub.replace("Hi ${name}")); + } + + /** + * Tests constructor. + */ + @Test + public void testConstructorMapPrefixSuffix() { + final Map map = new HashMap<>(); + map.put("name", "commons"); + final StrSubstitutor sub = new StrSubstitutor(map, "<", ">"); + assertEquals("Hi < commons", sub.replace("Hi $< ")); + } + + /** + * Tests constructor. + */ + @Test + public void testConstructorMapFull() { + final Map map = new HashMap<>(); + map.put("name", "commons"); + StrSubstitutor sub = new StrSubstitutor(map, "<", ">", '!'); + assertEquals("Hi < commons", sub.replace("Hi !< ")); + sub = new StrSubstitutor(map, "<", ">", '!', "||"); + assertEquals("Hi < commons", sub.replace("Hi !< ")); + } + + //----------------------------------------------------------------------- + /** + * Tests get set. + */ + @Test + public void testGetSetEscape() { + final StrSubstitutor sub = new StrSubstitutor(); + assertEquals('$', sub.getEscapeChar()); + sub.setEscapeChar('<'); + assertEquals('<', sub.getEscapeChar()); + } + + /** + * Tests get set. + */ + @Test + public void testGetSetPrefix() { + final StrSubstitutor sub = new StrSubstitutor(); + assertTrue(sub.getVariablePrefixMatcher() instanceof StrMatcher.StringMatcher); + sub.setVariablePrefix('<'); + assertTrue(sub.getVariablePrefixMatcher() instanceof StrMatcher.CharMatcher); + + sub.setVariablePrefix("<<"); + assertTrue(sub.getVariablePrefixMatcher() instanceof StrMatcher.StringMatcher); + try { + sub.setVariablePrefix((String) null); + fail(); + } catch (final IllegalArgumentException ex) { + // expected + } + assertTrue(sub.getVariablePrefixMatcher() instanceof StrMatcher.StringMatcher); + + final StrMatcher matcher = StrMatcher.commaMatcher(); + sub.setVariablePrefixMatcher(matcher); + assertSame(matcher, sub.getVariablePrefixMatcher()); + try { + sub.setVariablePrefixMatcher((StrMatcher) null); + fail(); + } catch (final IllegalArgumentException ex) { + // expected + } + assertSame(matcher, sub.getVariablePrefixMatcher()); + } + + /** + * Tests get set. + */ + @Test + public void testGetSetSuffix() { + final StrSubstitutor sub = new StrSubstitutor(); + assertTrue(sub.getVariableSuffixMatcher() instanceof StrMatcher.StringMatcher); + sub.setVariableSuffix('<'); + assertTrue(sub.getVariableSuffixMatcher() instanceof StrMatcher.CharMatcher); + + sub.setVariableSuffix("<<"); + assertTrue(sub.getVariableSuffixMatcher() instanceof StrMatcher.StringMatcher); + try { + sub.setVariableSuffix((String) null); + fail(); + } catch (final IllegalArgumentException ex) { + // expected + } + assertTrue(sub.getVariableSuffixMatcher() instanceof StrMatcher.StringMatcher); + + final StrMatcher matcher = StrMatcher.commaMatcher(); + sub.setVariableSuffixMatcher(matcher); + assertSame(matcher, sub.getVariableSuffixMatcher()); + try { + sub.setVariableSuffixMatcher((StrMatcher) null); + fail(); + } catch (final IllegalArgumentException ex) { + // expected + } + assertSame(matcher, sub.getVariableSuffixMatcher()); + } + + /** + * Tests get set. + */ + @Test + public void testGetSetValueDelimiter() { + final StrSubstitutor sub = new StrSubstitutor(); + assertTrue(sub.getValueDelimiterMatcher() instanceof StrMatcher.StringMatcher); + sub.setValueDelimiter(':'); + assertTrue(sub.getValueDelimiterMatcher() instanceof StrMatcher.CharMatcher); + + sub.setValueDelimiter("||"); + assertTrue(sub.getValueDelimiterMatcher() instanceof StrMatcher.StringMatcher); + sub.setValueDelimiter((String) null); + assertNull(sub.getValueDelimiterMatcher()); + + final StrMatcher matcher = StrMatcher.commaMatcher(); + sub.setValueDelimiterMatcher(matcher); + assertSame(matcher, sub.getValueDelimiterMatcher()); + sub.setValueDelimiterMatcher((StrMatcher) null); + assertNull(sub.getValueDelimiterMatcher()); + } + + //----------------------------------------------------------------------- + /** + * Tests static. + */ + @Test + public void testStaticReplace() { + final Map map = new HashMap<>(); + map.put("name", "commons"); + assertEquals("Hi commons!", StrSubstitutor.replace("Hi ${name}!", map)); + } + + /** + * Tests static. + */ + @Test + public void testStaticReplacePrefixSuffix() { + final Map map = new HashMap<>(); + map.put("name", "commons"); + assertEquals("Hi commons!", StrSubstitutor.replace("Hi !", map, "<", ">")); + } + + /** + * Tests interpolation with system properties. + */ + @Test + public void testStaticReplaceSystemProperties() { + final StrBuilder buf = new StrBuilder(); + buf.append("Hi ").append(System.getProperty("user.name")); + buf.append(", you are working with "); + buf.append(System.getProperty("os.name")); + buf.append(", your home directory is "); + buf.append(System.getProperty("user.home")).append('.'); + assertEquals(buf.toString(), StrSubstitutor.replaceSystemProperties("Hi ${user.name}, you are " + + "working with ${os.name}, your home " + + "directory is ${user.home}.")); + } + + /** + * Test for LANG-1055: StrSubstitutor.replaceSystemProperties does not work consistently + */ + @Test + public void testLANG1055() { + System.setProperty("test_key", "test_value"); + + final String expected = StrSubstitutor.replace("test_key=${test_key}", System.getProperties()); + final String actual = StrSubstitutor.replaceSystemProperties("test_key=${test_key}"); + assertEquals(expected, actual); + } + + /** + * Test the replace of a properties object + */ + @Test + public void testSubstituteDefaultProperties(){ + final String org = "${doesnotwork}"; + System.setProperty("doesnotwork", "It works!"); + + // create a new Properties object with the System.getProperties as default + final Properties props = new Properties(System.getProperties()); + + assertEquals("It works!", StrSubstitutor.replace(org, props)); + } + + @Test + public void testSamePrefixAndSuffix() { + final Map map = new HashMap<>(); + map.put("greeting", "Hello"); + map.put(" there ", "XXX"); + map.put("name", "commons"); + assertEquals("Hi commons!", StrSubstitutor.replace("Hi @name@!", map, "@", "@")); + assertEquals("Hello there commons!", StrSubstitutor.replace("@greeting@ there @name@!", map, "@", "@")); + } + + @Test + public void testSubstitutePreserveEscape() { + final String org = "${not-escaped} $${escaped}"; + final Map map = new HashMap<>(); + map.put("not-escaped", "value"); + + final StrSubstitutor sub = new StrSubstitutor(map, "${", "}", '$'); + assertFalse(sub.isPreserveEscapes()); + assertEquals("value ${escaped}", sub.replace(org)); + + sub.setPreserveEscapes(true); + assertTrue(sub.isPreserveEscapes()); + assertEquals("value $${escaped}", sub.replace(org)); + } + + //----------------------------------------------------------------------- + private void doTestReplace(final String expectedResult, final String replaceTemplate, final boolean substring) { + final String expectedShortResult = expectedResult.substring(1, expectedResult.length() - 1); + final StrSubstitutor sub = new StrSubstitutor(values); + + // replace using String + assertEquals(expectedResult, sub.replace(replaceTemplate)); + if (substring) { + assertEquals(expectedShortResult, sub.replace(replaceTemplate, 1, replaceTemplate.length() - 2)); + } + + // replace using char[] + final char[] chars = replaceTemplate.toCharArray(); + assertEquals(expectedResult, sub.replace(chars)); + if (substring) { + assertEquals(expectedShortResult, sub.replace(chars, 1, chars.length - 2)); + } + + // replace using StringBuffer + StringBuffer buf = new StringBuffer(replaceTemplate); + assertEquals(expectedResult, sub.replace(buf)); + if (substring) { + assertEquals(expectedShortResult, sub.replace(buf, 1, buf.length() - 2)); + } + + // replace using StringBuilder + StringBuilder builder = new StringBuilder(replaceTemplate); + assertEquals(expectedResult, sub.replace(builder)); + if (substring) { + assertEquals(expectedShortResult, sub.replace(builder, 1, builder.length() - 2)); + } + + // replace using StrBuilder + StrBuilder bld = new StrBuilder(replaceTemplate); + assertEquals(expectedResult, sub.replace(bld)); + if (substring) { + assertEquals(expectedShortResult, sub.replace(bld, 1, bld.length() - 2)); + } + + // replace using object + final MutableObject obj = new MutableObject<>(replaceTemplate); // toString returns template + assertEquals(expectedResult, sub.replace(obj)); + + // replace in StringBuffer + buf = new StringBuffer(replaceTemplate); + assertTrue(sub.replaceIn(buf)); + assertEquals(expectedResult, buf.toString()); + if (substring) { + buf = new StringBuffer(replaceTemplate); + assertTrue(sub.replaceIn(buf, 1, buf.length() - 2)); + assertEquals(expectedResult, buf.toString()); // expect full result as remainder is untouched + } + + // replace in StringBuilder + builder = new StringBuilder(replaceTemplate); + assertTrue(sub.replaceIn(builder)); + assertEquals(expectedResult, builder.toString()); + if (substring) { + builder = new StringBuilder(replaceTemplate); + assertTrue(sub.replaceIn(builder, 1, builder.length() - 2)); + assertEquals(expectedResult, builder.toString()); // expect full result as remainder is untouched + } + + // replace in StrBuilder + bld = new StrBuilder(replaceTemplate); + assertTrue(sub.replaceIn(bld)); + assertEquals(expectedResult, bld.toString()); + if (substring) { + bld = new StrBuilder(replaceTemplate); + assertTrue(sub.replaceIn(bld, 1, bld.length() - 2)); + assertEquals(expectedResult, bld.toString()); // expect full result as remainder is untouched + } + } + + private void doTestNoReplace(final String replaceTemplate) { + final StrSubstitutor sub = new StrSubstitutor(values); + + if (replaceTemplate == null) { + assertEquals(null, sub.replace((String) null)); + assertEquals(null, sub.replace((String) null, 0, 100)); + assertEquals(null, sub.replace((char[]) null)); + assertEquals(null, sub.replace((char[]) null, 0, 100)); + assertEquals(null, sub.replace((StringBuffer) null)); + assertEquals(null, sub.replace((StringBuffer) null, 0, 100)); + assertEquals(null, sub.replace((StrBuilder) null)); + assertEquals(null, sub.replace((StrBuilder) null, 0, 100)); + assertEquals(null, sub.replace((Object) null)); + assertFalse(sub.replaceIn((StringBuffer) null)); + assertFalse(sub.replaceIn((StringBuffer) null, 0, 100)); + assertFalse(sub.replaceIn((StrBuilder) null)); + assertFalse(sub.replaceIn((StrBuilder) null, 0, 100)); + } else { + assertEquals(replaceTemplate, sub.replace(replaceTemplate)); + final StrBuilder bld = new StrBuilder(replaceTemplate); + assertFalse(sub.replaceIn(bld)); + assertEquals(replaceTemplate, bld.toString()); + } + } + +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/c7cf533d/src/test/java/org/apache/commons/text/StrTokenizerTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/StrTokenizerTest.java b/src/test/java/org/apache/commons/text/StrTokenizerTest.java new file mode 100644 index 0000000..7e86084 --- /dev/null +++ b/src/test/java/org/apache/commons/text/StrTokenizerTest.java @@ -0,0 +1,913 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.text; + +import org.junit.Test; + +import static org.junit.Assert.*; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.NoSuchElementException; + +/** + * Unit test for {@link StrTokenizer}. + */ +public class StrTokenizerTest { + + private static final String CSV_SIMPLE_FIXTURE = "A,b,c"; + + private static final String TSV_SIMPLE_FIXTURE = "A\tb\tc"; + + private void checkClone(final StrTokenizer tokenizer) { + assertFalse(StrTokenizer.getCSVInstance() == tokenizer); + assertFalse(StrTokenizer.getTSVInstance() == tokenizer); + } + + // ----------------------------------------------------------------------- + @Test + public void test1() { + + final String input = "a;b;c;\"d;\"\"e\";f; ; ; "; + final StrTokenizer tok = new StrTokenizer(input); + tok.setDelimiterChar(';'); + tok.setQuoteChar('"'); + tok.setIgnoredMatcher(StrMatcher.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + final String tokens[] = tok.getTokenArray(); + + final String expected[] = new String[]{"a", "b", "c", "d;\"e", "f", "", "", "",}; + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + for (int i = 0; i < expected.length; i++) { + assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'", + expected[i], tokens[i]); + } + + } + + @Test + public void test2() { + + final String input = "a;b;c ;\"d;\"\"e\";f; ; ;"; + final StrTokenizer tok = new StrTokenizer(input); + tok.setDelimiterChar(';'); + tok.setQuoteChar('"'); + tok.setIgnoredMatcher(StrMatcher.noneMatcher()); + tok.setIgnoreEmptyTokens(false); + final String tokens[] = tok.getTokenArray(); + + final String expected[] = new String[]{"a", "b", "c ", "d;\"e", "f", " ", " ", "",}; + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + for (int i = 0; i < expected.length; i++) { + assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'", + expected[i], tokens[i]); + } + + } + + @Test + public void test3() { + + final String input = "a;b; c;\"d;\"\"e\";f; ; ;"; + final StrTokenizer tok = new StrTokenizer(input); + tok.setDelimiterChar(';'); + tok.setQuoteChar('"'); + tok.setIgnoredMatcher(StrMatcher.noneMatcher()); + tok.setIgnoreEmptyTokens(false); + final String tokens[] = tok.getTokenArray(); + + final String expected[] = new String[]{"a", "b", " c", "d;\"e", "f", " ", " ", "",}; + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + for (int i = 0; i < expected.length; i++) { + assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'", + expected[i], tokens[i]); + } + + } + + @Test + public void test4() { + + final String input = "a;b; c;\"d;\"\"e\";f; ; ;"; + final StrTokenizer tok = new StrTokenizer(input); + tok.setDelimiterChar(';'); + tok.setQuoteChar('"'); + tok.setIgnoredMatcher(StrMatcher.trimMatcher()); + tok.setIgnoreEmptyTokens(true); + final String tokens[] = tok.getTokenArray(); + + final String expected[] = new String[]{"a", "b", "c", "d;\"e", "f",}; + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + for (int i = 0; i < expected.length; i++) { + assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'", + expected[i], tokens[i]); + } + + } + + @Test + public void test5() { + + final String input = "a;b; c;\"d;\"\"e\";f; ; ;"; + final StrTokenizer tok = new StrTokenizer(input); + tok.setDelimiterChar(';'); + tok.setQuoteChar('"'); + tok.setIgnoredMatcher(StrMatcher.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + final String tokens[] = tok.getTokenArray(); + + final String expected[] = new String[]{"a", "b", "c", "d;\"e", "f", null, null, null,}; + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + for (int i = 0; i < expected.length; i++) { + assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'", + expected[i], tokens[i]); + } + + } + + @Test + public void test6() { + + final String input = "a;b; c;\"d;\"\"e\";f; ; ;"; + final StrTokenizer tok = new StrTokenizer(input); + tok.setDelimiterChar(';'); + tok.setQuoteChar('"'); + tok.setIgnoredMatcher(StrMatcher.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + // tok.setTreatingEmptyAsNull(true); + final String tokens[] = tok.getTokenArray(); + + final String expected[] = new String[]{"a", "b", " c", "d;\"e", "f", null, null, null,}; + + int nextCount = 0; + while (tok.hasNext()) { + tok.next(); + nextCount++; + } + + int prevCount = 0; + while (tok.hasPrevious()) { + tok.previous(); + prevCount++; + } + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + + assertTrue("could not cycle through entire token list" + " using the 'hasNext' and 'next' methods", + nextCount == expected.length); + + assertTrue("could not cycle through entire token list" + " using the 'hasPrevious' and 'previous' methods", + prevCount == expected.length); + + } + + @Test + public void test7() { + + final String input = "a b c \"d e\" f "; + final StrTokenizer tok = new StrTokenizer(input); + tok.setDelimiterMatcher(StrMatcher.spaceMatcher()); + tok.setQuoteMatcher(StrMatcher.doubleQuoteMatcher()); + tok.setIgnoredMatcher(StrMatcher.noneMatcher()); + tok.setIgnoreEmptyTokens(false); + final String tokens[] = tok.getTokenArray(); + + final String expected[] = new String[]{"a", "", "", "b", "c", "d e", "f", "",}; + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + for (int i = 0; i < expected.length; i++) { + assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'", + expected[i], tokens[i]); + } + + } + + @Test + public void test8() { + + final String input = "a b c \"d e\" f "; + final StrTokenizer tok = new StrTokenizer(input); + tok.setDelimiterMatcher(StrMatcher.spaceMatcher()); + tok.setQuoteMatcher(StrMatcher.doubleQuoteMatcher()); + tok.setIgnoredMatcher(StrMatcher.noneMatcher()); + tok.setIgnoreEmptyTokens(true); + final String tokens[] = tok.getTokenArray(); + + final String expected[] = new String[]{"a", "b", "c", "d e", "f",}; + + assertEquals(Arrays.toString(tokens), expected.length, tokens.length); + for (int i = 0; i < expected.length; i++) { + assertEquals("token[" + i + "] was '" + tokens[i] + "' but was expected to be '" + expected[i] + "'", + expected[i], tokens[i]); + } + + } + + @Test + public void testBasic1() { + final String input = "a b c"; + final StrTokenizer tok = new StrTokenizer(input); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasic2() { + final String input = "a \nb\fc"; + final StrTokenizer tok = new StrTokenizer(input); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasic3() { + final String input = "a \nb\u0001\fc"; + final StrTokenizer tok = new StrTokenizer(input); + assertEquals("a", tok.next()); + assertEquals("b\u0001", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasic4() { + final String input = "a \"b\" c"; + final StrTokenizer tok = new StrTokenizer(input); + assertEquals("a", tok.next()); + assertEquals("\"b\"", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasic5() { + final String input = "a:b':c"; + final StrTokenizer tok = new StrTokenizer(input, ':', '\''); + assertEquals("a", tok.next()); + assertEquals("b'", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicDelim1() { + final String input = "a:b:c"; + final StrTokenizer tok = new StrTokenizer(input, ':'); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicDelim2() { + final String input = "a:b:c"; + final StrTokenizer tok = new StrTokenizer(input, ','); + assertEquals("a:b:c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testDelimString() { + final String input = "a##b##c"; + final StrTokenizer tok = new StrTokenizer(input, "##"); + + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testDelimMatcher() { + final String input = "a/b\\c"; + final StrMatcher delimMatcher = new StrMatcher.CharSetMatcher(new char[]{'/', '\\'}); + + final StrTokenizer tok = new StrTokenizer(input, delimMatcher); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testDelimMatcherQuoteMatcher() { + final String input = "`a`;`b`;`c`"; + final StrMatcher delimMatcher = new StrMatcher.CharSetMatcher(new char[]{';'}); + final StrMatcher quoteMatcher = new StrMatcher.CharSetMatcher(new char[]{'`'}); + + final StrTokenizer tok = new StrTokenizer(input, delimMatcher, quoteMatcher); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicEmpty1() { + final String input = "a b c"; + final StrTokenizer tok = new StrTokenizer(input); + tok.setIgnoreEmptyTokens(false); + assertEquals("a", tok.next()); + assertEquals("", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicEmpty2() { + final String input = "a b c"; + final StrTokenizer tok = new StrTokenizer(input); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals(null, tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuoted1() { + final String input = "a 'b' c"; + final StrTokenizer tok = new StrTokenizer(input, ' ', '\''); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuoted2() { + final String input = "a:'b':"; + final StrTokenizer tok = new StrTokenizer(input, ':', '\''); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals(null, tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuoted3() { + final String input = "a:'b''c'"; + final StrTokenizer tok = new StrTokenizer(input, ':', '\''); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("b'c", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuoted4() { + final String input = "a: 'b' 'c' :d"; + final StrTokenizer tok = new StrTokenizer(input, ':', '\''); + tok.setTrimmerMatcher(StrMatcher.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("b c", tok.next()); + assertEquals("d", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuoted5() { + final String input = "a: 'b'x'c' :d"; + final StrTokenizer tok = new StrTokenizer(input, ':', '\''); + tok.setTrimmerMatcher(StrMatcher.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("bxc", tok.next()); + assertEquals("d", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuoted6() { + final String input = "a:'b'\"c':d"; + final StrTokenizer tok = new StrTokenizer(input, ':'); + tok.setQuoteMatcher(StrMatcher.quoteMatcher()); + assertEquals("a", tok.next()); + assertEquals("b\"c:d", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuoted7() { + final String input = "a:\"There's a reason here\":b"; + final StrTokenizer tok = new StrTokenizer(input, ':'); + tok.setQuoteMatcher(StrMatcher.quoteMatcher()); + assertEquals("a", tok.next()); + assertEquals("There's a reason here", tok.next()); + assertEquals("b", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicQuotedTrimmed1() { + final String input = "a: 'b' :"; + final StrTokenizer tok = new StrTokenizer(input, ':', '\''); + tok.setTrimmerMatcher(StrMatcher.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals(null, tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicTrimmed1() { + final String input = "a: b : "; + final StrTokenizer tok = new StrTokenizer(input, ':'); + tok.setTrimmerMatcher(StrMatcher.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals(null, tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicTrimmed2() { + final String input = "a: b :"; + final StrTokenizer tok = new StrTokenizer(input, ':'); + tok.setTrimmerMatcher(StrMatcher.stringMatcher(" ")); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals(null, tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicIgnoreTrimmed1() { + final String input = "a: bIGNOREc : "; + final StrTokenizer tok = new StrTokenizer(input, ':'); + tok.setIgnoredMatcher(StrMatcher.stringMatcher("IGNORE")); + tok.setTrimmerMatcher(StrMatcher.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("bc", tok.next()); + assertEquals(null, tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicIgnoreTrimmed2() { + final String input = "IGNOREaIGNORE: IGNORE bIGNOREc IGNORE : IGNORE "; + final StrTokenizer tok = new StrTokenizer(input, ':'); + tok.setIgnoredMatcher(StrMatcher.stringMatcher("IGNORE")); + tok.setTrimmerMatcher(StrMatcher.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("bc", tok.next()); + assertEquals(null, tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicIgnoreTrimmed3() { + final String input = "IGNOREaIGNORE: IGNORE bIGNOREc IGNORE : IGNORE "; + final StrTokenizer tok = new StrTokenizer(input, ':'); + tok.setIgnoredMatcher(StrMatcher.stringMatcher("IGNORE")); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals(" bc ", tok.next()); + assertEquals(" ", tok.next()); + assertFalse(tok.hasNext()); + } + + @Test + public void testBasicIgnoreTrimmed4() { + final String input = "IGNOREaIGNORE: IGNORE 'bIGNOREc'IGNORE'd' IGNORE : IGNORE "; + final StrTokenizer tok = new StrTokenizer(input, ':', '\''); + tok.setIgnoredMatcher(StrMatcher.stringMatcher("IGNORE")); + tok.setTrimmerMatcher(StrMatcher.trimMatcher()); + tok.setIgnoreEmptyTokens(false); + tok.setEmptyTokenAsNull(true); + assertEquals("a", tok.next()); + assertEquals("bIGNOREcd", tok.next()); + assertEquals(null, tok.next()); + assertFalse(tok.hasNext()); + } + + //----------------------------------------------------------------------- + @Test + public void testListArray() { + final String input = "a b c"; + final StrTokenizer tok = new StrTokenizer(input); + final String[] array = tok.getTokenArray(); + final List list = tok.getTokenList(); + + assertEquals(Arrays.asList(array), list); + assertEquals(3, list.size()); + } + + //----------------------------------------------------------------------- + private void testCSV(final String data) { + this.testXSVAbc(StrTokenizer.getCSVInstance(data)); + this.testXSVAbc(StrTokenizer.getCSVInstance(data.toCharArray())); + } + + @Test + public void testCSVEmpty() { + this.testEmpty(StrTokenizer.getCSVInstance()); + this.testEmpty(StrTokenizer.getCSVInstance("")); + } + + @Test + public void testCSVSimple() { + this.testCSV(CSV_SIMPLE_FIXTURE); + } + + @Test + public void testCSVSimpleNeedsTrim() { + this.testCSV(" " + CSV_SIMPLE_FIXTURE); + this.testCSV(" \n\t " + CSV_SIMPLE_FIXTURE); + this.testCSV(" \n " + CSV_SIMPLE_FIXTURE + "\n\n\r"); + } + + void testEmpty(final StrTokenizer tokenizer) { + this.checkClone(tokenizer); + assertFalse(tokenizer.hasNext()); + assertFalse(tokenizer.hasPrevious()); + assertEquals(null, tokenizer.nextToken()); + assertEquals(0, tokenizer.size()); + try { + tokenizer.next(); + fail(); + } catch (final NoSuchElementException ex) {} + } + + @Test + public void testGetContent() { + final String input = "a b c \"d e\" f "; + StrTokenizer tok = new StrTokenizer(input); + assertEquals(input, tok.getContent()); + + tok = new StrTokenizer(input.toCharArray()); + assertEquals(input, tok.getContent()); + + tok = new StrTokenizer(); + assertEquals(null, tok.getContent()); + } + + //----------------------------------------------------------------------- + @Test + public void testChaining() { + final StrTokenizer tok = new StrTokenizer(); + assertEquals(tok, tok.reset()); + assertEquals(tok, tok.reset("")); + assertEquals(tok, tok.reset(new char[0])); + assertEquals(tok, tok.setDelimiterChar(' ')); + assertEquals(tok, tok.setDelimiterString(" ")); + assertEquals(tok, tok.setDelimiterMatcher(null)); + assertEquals(tok, tok.setQuoteChar(' ')); + assertEquals(tok, tok.setQuoteMatcher(null)); + assertEquals(tok, tok.setIgnoredChar(' ')); + assertEquals(tok, tok.setIgnoredMatcher(null)); + assertEquals(tok, tok.setTrimmerMatcher(null)); + assertEquals(tok, tok.setEmptyTokenAsNull(false)); + assertEquals(tok, tok.setIgnoreEmptyTokens(false)); + } + + /** + * Tests that the {@link StrTokenizer#clone()} clone method catches {@link CloneNotSupportedException} and returns + * null. + */ + @Test + public void testCloneNotSupportedException() { + final Object notCloned = new StrTokenizer() { + @Override + Object cloneReset() throws CloneNotSupportedException { + throw new CloneNotSupportedException("test"); + } + }.clone(); + assertNull(notCloned); + } + + @Test + public void testCloneNull() { + final StrTokenizer tokenizer = new StrTokenizer((char[]) null); + // Start sanity check + assertEquals(null, tokenizer.nextToken()); + tokenizer.reset(); + assertEquals(null, tokenizer.nextToken()); + // End sanity check + final StrTokenizer clonedTokenizer = (StrTokenizer) tokenizer.clone(); + tokenizer.reset(); + assertEquals(null, tokenizer.nextToken()); + assertEquals(null, clonedTokenizer.nextToken()); + } + + @Test + public void testCloneReset() { + final char[] input = new char[]{'a'}; + final StrTokenizer tokenizer = new StrTokenizer(input); + // Start sanity check + assertEquals("a", tokenizer.nextToken()); + tokenizer.reset(input); + assertEquals("a", tokenizer.nextToken()); + // End sanity check + final StrTokenizer clonedTokenizer = (StrTokenizer) tokenizer.clone(); + input[0] = 'b'; + tokenizer.reset(input); + assertEquals("b", tokenizer.nextToken()); + assertEquals("a", clonedTokenizer.nextToken()); + } + + // ----------------------------------------------------------------------- + @Test + public void testConstructor_String() { + StrTokenizer tok = new StrTokenizer("a b"); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertFalse(tok.hasNext()); + + tok = new StrTokenizer(""); + assertFalse(tok.hasNext()); + + tok = new StrTokenizer((String) null); + assertFalse(tok.hasNext()); + } + + //----------------------------------------------------------------------- + @Test + public void testConstructor_String_char() { + StrTokenizer tok = new StrTokenizer("a b", ' '); + assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1)); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertFalse(tok.hasNext()); + + tok = new StrTokenizer("", ' '); + assertFalse(tok.hasNext()); + + tok = new StrTokenizer((String) null, ' '); + assertFalse(tok.hasNext()); + } + + //----------------------------------------------------------------------- + @Test + public void testConstructor_String_char_char() { + StrTokenizer tok = new StrTokenizer("a b", ' ', '"'); + assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1)); + assertEquals(1, tok.getQuoteMatcher().isMatch("\"".toCharArray(), 0, 0, 1)); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertFalse(tok.hasNext()); + + tok = new StrTokenizer("", ' ', '"'); + assertFalse(tok.hasNext()); + + tok = new StrTokenizer((String) null, ' ', '"'); + assertFalse(tok.hasNext()); + } + + //----------------------------------------------------------------------- + @Test + public void testConstructor_charArray() { + StrTokenizer tok = new StrTokenizer("a b".toCharArray()); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertFalse(tok.hasNext()); + + tok = new StrTokenizer(new char[0]); + assertFalse(tok.hasNext()); + + tok = new StrTokenizer((char[]) null); + assertFalse(tok.hasNext()); + } + + //----------------------------------------------------------------------- + @Test + public void testConstructor_charArray_char() { + StrTokenizer tok = new StrTokenizer("a b".toCharArray(), ' '); + assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1)); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertFalse(tok.hasNext()); + + tok = new StrTokenizer(new char[0], ' '); + assertFalse(tok.hasNext()); + + tok = new StrTokenizer((char[]) null, ' '); + assertFalse(tok.hasNext()); + } + + //----------------------------------------------------------------------- + @Test + public void testConstructor_charArray_char_char() { + StrTokenizer tok = new StrTokenizer("a b".toCharArray(), ' ', '"'); + assertEquals(1, tok.getDelimiterMatcher().isMatch(" ".toCharArray(), 0, 0, 1)); + assertEquals(1, tok.getQuoteMatcher().isMatch("\"".toCharArray(), 0, 0, 1)); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertFalse(tok.hasNext()); + + tok = new StrTokenizer(new char[0], ' ', '"'); + assertFalse(tok.hasNext()); + + tok = new StrTokenizer((char[]) null, ' ', '"'); + assertFalse(tok.hasNext()); + } + + //----------------------------------------------------------------------- + @Test + public void testReset() { + final StrTokenizer tok = new StrTokenizer("a b c"); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + + tok.reset(); + assertEquals("a", tok.next()); + assertEquals("b", tok.next()); + assertEquals("c", tok.next()); + assertFalse(tok.hasNext()); + } + + //----------------------------------------------------------------------- + @Test + public void testReset_String() { + final StrTokenizer tok = new StrTokenizer("x x x"); + tok.reset("d e"); + assertEquals("d", tok.next()); + assertEquals("e", tok.next()); + assertFalse(tok.hasNext()); + + tok.reset((String) null); + assertFalse(tok.hasNext()); + } + + //----------------------------------------------------------------------- + @Test + public void testReset_charArray() { + final StrTokenizer tok = new StrTokenizer("x x x"); + + final char[] array = new char[] {'a', 'b', 'c'}; + tok.reset(array); + assertEquals("abc", tok.next()); + assertFalse(tok.hasNext()); + + tok.reset((char[]) null); + assertFalse(tok.hasNext()); + } + + //----------------------------------------------------------------------- + @Test + public void testTSV() { + this.testXSVAbc(StrTokenizer.getTSVInstance(TSV_SIMPLE_FIXTURE)); + this.testXSVAbc(StrTokenizer.getTSVInstance(TSV_SIMPLE_FIXTURE.toCharArray())); + } + + @Test + public void testTSVEmpty() { + this.testEmpty(StrTokenizer.getTSVInstance()); + this.testEmpty(StrTokenizer.getTSVInstance("")); + } + + void testXSVAbc(final StrTokenizer tokenizer) { + this.checkClone(tokenizer); + assertEquals(-1, tokenizer.previousIndex()); + assertEquals(0, tokenizer.nextIndex()); + assertEquals(null, tokenizer.previousToken()); + assertEquals("A", tokenizer.nextToken()); + assertEquals(1, tokenizer.nextIndex()); + assertEquals("b", tokenizer.nextToken()); + assertEquals(2, tokenizer.nextIndex()); + assertEquals("c", tokenizer.nextToken()); + assertEquals(3, tokenizer.nextIndex()); + assertEquals(null, tokenizer.nextToken()); + assertEquals(3, tokenizer.nextIndex()); + assertEquals("c", tokenizer.previousToken()); + assertEquals(2, tokenizer.nextIndex()); + assertEquals("b", tokenizer.previousToken()); + assertEquals(1, tokenizer.nextIndex()); + assertEquals("A", tokenizer.previousToken()); + assertEquals(0, tokenizer.nextIndex()); + assertEquals(null, tokenizer.previousToken()); + assertEquals(0, tokenizer.nextIndex()); + assertEquals(-1, tokenizer.previousIndex()); + assertEquals(3, tokenizer.size()); + } + + @Test + public void testIteration() { + final StrTokenizer tkn = new StrTokenizer("a b c"); + assertFalse(tkn.hasPrevious()); + try { + tkn.previous(); + fail(); + } catch (final NoSuchElementException ex) {} + assertTrue(tkn.hasNext()); + + assertEquals("a", tkn.next()); + try { + tkn.remove(); + fail(); + } catch (final UnsupportedOperationException ex) {} + try { + tkn.set("x"); + fail(); + } catch (final UnsupportedOperationException ex) {} + try { + tkn.add("y"); + fail(); + } catch (final UnsupportedOperationException ex) {} + assertTrue(tkn.hasPrevious()); + assertTrue(tkn.hasNext()); + + assertEquals("b", tkn.next()); + assertTrue(tkn.hasPrevious()); + assertTrue(tkn.hasNext()); + + assertEquals("c", tkn.next()); + assertTrue(tkn.hasPrevious()); + assertFalse(tkn.hasNext()); + + try { + tkn.next(); + fail(); + } catch (final NoSuchElementException ex) {} + assertTrue(tkn.hasPrevious()); + assertFalse(tkn.hasNext()); + } + + //----------------------------------------------------------------------- + @Test + public void testTokenizeSubclassInputChange() { + final StrTokenizer tkn = new StrTokenizer("a b c d e") { + @Override + protected List tokenize(final char[] chars, final int offset, final int count) { + return super.tokenize("w x y z".toCharArray(), 2, 5); + } + }; + assertEquals("x", tkn.next()); + assertEquals("y", tkn.next()); + } + + //----------------------------------------------------------------------- + @Test + public void testTokenizeSubclassOutputChange() { + final StrTokenizer tkn = new StrTokenizer("a b c") { + @Override + protected List tokenize(final char[] chars, final int offset, final int count) { + final List list = super.tokenize(chars, offset, count); + Collections.reverse(list); + return list; + } + }; + assertEquals("c", tkn.next()); + assertEquals("b", tkn.next()); + assertEquals("a", tkn.next()); + } + + //----------------------------------------------------------------------- + @Test + public void testToString() { + final StrTokenizer tkn = new StrTokenizer("a b c d e"); + assertEquals("StrTokenizer[not tokenized yet]", tkn.toString()); + tkn.next(); + assertEquals("StrTokenizer[a, b, c, d, e]", tkn.toString()); + } + +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/c7cf533d/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java new file mode 100644 index 0000000..f716763 --- /dev/null +++ b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java @@ -0,0 +1,601 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text; + +import org.junit.Test; + +import java.io.IOException; +import java.io.StringWriter; +import java.lang.reflect.Constructor; +import java.lang.reflect.Modifier; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; + +import static org.apache.commons.text.StringEscapeUtils.escapeXSI; +import static org.apache.commons.text.StringEscapeUtils.unescapeXSI; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * Unit tests for {@link StringEscapeUtils}. + * + *

+ * This code has been adapted from Apache Commons Lang 3.5. + *

+ * + */ +public class StringEscapeUtilsTest { + private final static String FOO = "foo"; + + @Test + public void testConstructor() { + assertNotNull(new StringEscapeUtils()); + final Constructor[] cons = StringEscapeUtils.class.getDeclaredConstructors(); + assertEquals(1, cons.length); + assertTrue(Modifier.isPublic(cons[0].getModifiers())); + assertTrue(Modifier.isPublic(StringEscapeUtils.class.getModifiers())); + assertFalse(Modifier.isFinal(StringEscapeUtils.class.getModifiers())); + } + + @Test + public void testEscapeJava() throws IOException { + assertEquals(null, StringEscapeUtils.escapeJava(null)); + try { + StringEscapeUtils.ESCAPE_JAVA.translate(null, null); + fail(); + } catch (final IOException ex) { + fail(); + } catch (final IllegalArgumentException ex) { + } + try { + StringEscapeUtils.ESCAPE_JAVA.translate("", null); + fail(); + } catch (final IOException ex) { + fail(); + } catch (final IllegalArgumentException ex) { + } + + assertEscapeJava("empty string", "", ""); + assertEscapeJava(FOO, FOO); + assertEscapeJava("tab", "\\t", "\t"); + assertEscapeJava("backslash", "\\\\", "\\"); + assertEscapeJava("single quote should not be escaped", "'", "'"); + assertEscapeJava("\\\\\\b\\t\\r", "\\\b\t\r"); + assertEscapeJava("\\u1234", "\u1234"); + assertEscapeJava("\\u0234", "\u0234"); + assertEscapeJava("\\u00EF", "\u00ef"); + assertEscapeJava("\\u0001", "\u0001"); + assertEscapeJava("Should use capitalized Unicode hex", "\\uABCD", "\uabcd"); + + assertEscapeJava("He didn't say, \\\"stop!\\\"", + "He didn't say, \"stop!\""); + assertEscapeJava("non-breaking space", "This space is non-breaking:" + "\\u00A0", + "This space is non-breaking:\u00a0"); + assertEscapeJava("\\uABCD\\u1234\\u012C", + "\uABCD\u1234\u012C"); + } + + /** + * Tests https://issues.apache.org/jira/browse/LANG-421 + */ + @Test + public void testEscapeJavaWithSlash() { + final String input = "String with a slash (/) in it"; + + final String expected = input; + final String actual = StringEscapeUtils.escapeJava(input); + + /** + * In 2.4 StringEscapeUtils.escapeJava(String) escapes '/' characters, which are not a valid character to escape + * in a Java string. + */ + assertEquals(expected, actual); + } + + private void assertEscapeJava(final String escaped, final String original) throws IOException { + assertEscapeJava(null, escaped, original); + } + + private void assertEscapeJava(String message, final String expected, final String original) throws IOException { + final String converted = StringEscapeUtils.escapeJava(original); + message = "escapeJava(String) failed" + (message == null ? "" : (": " + message)); + assertEquals(message, expected, converted); + + final StringWriter writer = new StringWriter(); + StringEscapeUtils.ESCAPE_JAVA.translate(original, writer); + assertEquals(expected, writer.toString()); + } + + @Test + public void testUnescapeJava() throws IOException { + assertEquals(null, StringEscapeUtils.unescapeJava(null)); + try { + StringEscapeUtils.UNESCAPE_JAVA.translate(null, null); + fail(); + } catch (final IOException ex) { + fail(); + } catch (final IllegalArgumentException ex) { + } + try { + StringEscapeUtils.UNESCAPE_JAVA.translate("", null); + fail(); + } catch (final IOException ex) { + fail(); + } catch (final IllegalArgumentException ex) { + } + try { + StringEscapeUtils.unescapeJava("\\u02-3"); + fail(); + } catch (final RuntimeException ex) { + } + + assertUnescapeJava("", ""); + assertUnescapeJava("test", "test"); + assertUnescapeJava("\ntest\b", "\\ntest\\b"); + assertUnescapeJava("\u123425foo\ntest\b", "\\u123425foo\\ntest\\b"); + assertUnescapeJava("'\foo\teste\r", "\\'\\foo\\teste\\r"); + assertUnescapeJava("", "\\"); + //foo + assertUnescapeJava("lowercase Unicode", "\uABCDx", "\\uabcdx"); + assertUnescapeJava("uppercase Unicode", "\uABCDx", "\\uABCDx"); + assertUnescapeJava("Unicode as final character", "\uABCD", "\\uabcd"); + } + + private void assertUnescapeJava(final String unescaped, final String original) throws IOException { + assertUnescapeJava(null, unescaped, original); + } + + private void assertUnescapeJava(final String message, final String unescaped, final String original) throws IOException { + final String expected = unescaped; + final String actual = StringEscapeUtils.unescapeJava(original); + + assertEquals("unescape(String) failed" + + (message == null ? "" : (": " + message)) + + ": expected '" + StringEscapeUtils.escapeJava(expected) + + // we escape this so we can see it in the error message + "' actual '" + StringEscapeUtils.escapeJava(actual) + "'", + expected, actual); + + final StringWriter writer = new StringWriter(); + StringEscapeUtils.UNESCAPE_JAVA.translate(original, writer); + assertEquals(unescaped, writer.toString()); + + } + + @Test + public void testEscapeEcmaScript() { + assertEquals(null, StringEscapeUtils.escapeEcmaScript(null)); + try { + StringEscapeUtils.ESCAPE_ECMASCRIPT.translate(null, null); + fail(); + } catch (final IOException ex) { + fail(); + } catch (final IllegalArgumentException ex) { + } + try { + StringEscapeUtils.ESCAPE_ECMASCRIPT.translate("", null); + fail(); + } catch (final IOException ex) { + fail(); + } catch (final IllegalArgumentException ex) { + } + + assertEquals("He didn\\'t say, \\\"stop!\\\"", StringEscapeUtils.escapeEcmaScript("He didn't say, \"stop!\"")); + assertEquals("document.getElementById(\\\"test\\\").value = \\'';")); + } + + + // HTML and XML + //-------------------------------------------------------------- + + private static final String[][] HTML_ESCAPES = { + {"no escaping", "plain text", "plain text"}, + {"no escaping", "plain text", "plain text"}, + {"empty string", "", ""}, + {"null", null, null}, + {"ampersand", "bread & butter", "bread & butter"}, + {"quotes", ""bread" & butter", "\"bread\" & butter"}, + {"final character only", "greater than >", "greater than >"}, + {"first character only", "< less than", "< less than"}, + {"apostrophe", "Huntington's chorea", "Huntington's chorea"}, + {"languages", "English,Français,\u65E5\u672C\u8A9E (nihongo)", "English,Fran\u00E7ais,\u65E5\u672C\u8A9E (nihongo)"}, + {"8-bit ascii shouldn't number-escape", "\u0080\u009F", "\u0080\u009F"}, + }; + + @Test + public void testEscapeHtml() { + for (final String[] element : HTML_ESCAPES) { + final String message = element[0]; + final String expected = element[1]; + final String original = element[2]; + assertEquals(message, expected, StringEscapeUtils.escapeHtml4(original)); + final StringWriter sw = new StringWriter(); + try { + StringEscapeUtils.ESCAPE_HTML4.translate(original, sw); + } catch (final IOException e) { + } + final String actual = original == null ? null : sw.toString(); + assertEquals(message, expected, actual); + } + } + + @Test + public void testEscapeHtml4Once() { + for (final String[] element : HTML_ESCAPES) { + final String message = element[0]; + final String expected = element[1]; + final String original = element[2]; + assertEquals(message, expected, StringEscapeUtils.escapeHtml4Once(original)); + assertEquals(message, expected, StringEscapeUtils.escapeHtml4Once(expected)); + final StringWriter sw = new StringWriter(); + try { + StringEscapeUtils.ESCAPE_HTML4_ONCE.translate(original, sw); + } catch (final IOException e) { + } + final String actual = original == null ? null : sw.toString(); + assertEquals(message, expected, actual); + final StringWriter sw2 = new StringWriter(); + try { + StringEscapeUtils.ESCAPE_HTML4_ONCE.translate(expected, sw2); + } catch (final IOException e) { + } + final String actual2 = original == null ? null : sw2.toString(); + assertEquals(message, expected, actual2); + } + } + + @Test + public void testEscapeHtml3Once() { + for (final String[] element : HTML_ESCAPES) { + final String message = element[0]; + final String expected = element[1]; + final String original = element[2]; + assertEquals(message, expected, StringEscapeUtils.escapeHtml3Once(original)); + assertEquals(message, expected, StringEscapeUtils.escapeHtml3Once(expected)); + final StringWriter sw = new StringWriter(); + try { + StringEscapeUtils.ESCAPE_HTML3_ONCE.translate(original, sw); + } catch (final IOException e) { + } + final String actual = original == null ? null : sw.toString(); + assertEquals(message, expected, actual); + final StringWriter sw2 = new StringWriter(); + try { + StringEscapeUtils.ESCAPE_HTML3_ONCE.translate(expected, sw2); + } catch (final IOException e) { + } + final String actual2 = original == null ? null : sw2.toString(); + assertEquals(message, expected, actual2); + } + } + + @Test + public void testUnescapeHtml4() { + for (final String[] element : HTML_ESCAPES) { + final String message = element[0]; + final String expected = element[2]; + final String original = element[1]; + assertEquals(message, expected, StringEscapeUtils.unescapeHtml4(original)); + + final StringWriter sw = new StringWriter(); + try { + StringEscapeUtils.UNESCAPE_HTML4.translate(original, sw); + } catch (final IOException e) { + } + final String actual = original == null ? null : sw.toString(); + assertEquals(message, expected, actual); + } + // \u00E7 is a cedilla (c with wiggle under) + // note that the test string must be 7-bit-clean (Unicode escaped) or else it will compile incorrectly + // on some locales + assertEquals("funny chars pass through OK", "Fran\u00E7ais", StringEscapeUtils.unescapeHtml4("Fran\u00E7ais")); + + assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml4("Hello&;World")); + assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml4("Hello&#;World")); + assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml4("Hello&# ;World")); + assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml4("Hello&##;World")); + } + + @Test + public void testUnescapeHexCharsHtml() { + // Simple easy to grok test + assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("€Ÿ")); + assertEquals("hex number unescape", "\u0080\u009F", StringEscapeUtils.unescapeHtml4("€Ÿ")); + // Test all Character values: + for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i++) { + final Character c1 = new Character(i); + final Character c2 = new Character((char)(i+1)); + final String expected = c1.toString() + c2.toString(); + final String escapedC1 = "&#x" + Integer.toHexString((c1.charValue())) + ";"; + final String escapedC2 = "&#x" + Integer.toHexString((c2.charValue())) + ";"; + assertEquals("hex number unescape index " + (int)i, expected, StringEscapeUtils.unescapeHtml4(escapedC1 + escapedC2)); + } + } + + @Test + public void testUnescapeUnknownEntity() throws Exception { + assertEquals("&zzzz;", StringEscapeUtils.unescapeHtml4("&zzzz;")); + } + + @Test + public void testEscapeHtmlVersions() throws Exception { + assertEquals("Β", StringEscapeUtils.escapeHtml4("\u0392")); + assertEquals("\u0392", StringEscapeUtils.unescapeHtml4("Β")); + + // TODO: refine API for escaping/unescaping specific HTML versions + } + + + + @Test + public void testEscapeXml10() throws Exception { + assertEquals("a<b>c"d'e&f", StringEscapeUtils.escapeXml10("ac\"d'e&f")); + assertEquals("XML 1.0 should not escape \t \n \r", + "a\tb\rc\nd", StringEscapeUtils.escapeXml10("a\tb\rc\nd")); + assertEquals("XML 1.0 should omit most #x0-x8 | #xb | #xc | #xe-#x19", + "ab", StringEscapeUtils.escapeXml10("a\u0000\u0001\u0008\u000b\u000c\u000e\u001fb")); + assertEquals("XML 1.0 should omit #xd800-#xdfff", + "a\ud7ff \ue000b", StringEscapeUtils.escapeXml10("a\ud7ff\ud800 \udfff \ue000b")); + assertEquals("XML 1.0 should omit #xfffe | #xffff", + "a\ufffdb", StringEscapeUtils.escapeXml10("a\ufffd\ufffe\uffffb")); + assertEquals("XML 1.0 should escape #x7f-#x84 | #x86 - #x9f, for XML 1.1 compatibility", + "a\u007e„\u0085†Ÿ\u00a0b", StringEscapeUtils.escapeXml10("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b")); + } + + @Test + public void testEscapeXml11() throws Exception { + assertEquals("a<b>c"d'e&f", StringEscapeUtils.escapeXml11("ac\"d'e&f")); + assertEquals("XML 1.1 should not escape \t \n \r", + "a\tb\rc\nd", StringEscapeUtils.escapeXml11("a\tb\rc\nd")); + assertEquals("XML 1.1 should omit #x0", + "ab", StringEscapeUtils.escapeXml11("a\u0000b")); + assertEquals("XML 1.1 should escape #x1-x8 | #xb | #xc | #xe-#x19", + "a b", StringEscapeUtils.escapeXml11("a\u0001\u0008\u000b\u000c\u000e\u001fb")); + assertEquals("XML 1.1 should escape #x7F-#x84 | #x86-#x9F", + "a\u007e„\u0085†Ÿ\u00a0b", StringEscapeUtils.escapeXml11("a\u007e\u007f\u0084\u0085\u0086\u009f\u00a0b")); + assertEquals("XML 1.1 should omit #xd800-#xdfff", + "a\ud7ff \ue000b", StringEscapeUtils.escapeXml11("a\ud7ff\ud800 \udfff \ue000b")); + assertEquals("XML 1.1 should omit #xfffe | #xffff", + "a\ufffdb", StringEscapeUtils.escapeXml11("a\ufffd\ufffe\uffffb")); + } + + /** + * Reverse of the above. + * + * @see LANG-729 + */ + @Test + public void testUnescapeXmlSupplementaryCharacters() { + assertEquals("Supplementary character must be represented using a single escape", "\uD84C\uDFB4", + StringEscapeUtils.unescapeXml("𣎴") ); + + assertEquals("Supplementary characters mixed with basic characters should be decoded correctly", "a b c \uD84C\uDFB4", + StringEscapeUtils.unescapeXml("a b c 𣎴") ); + } + + // Tests issue #38569 + // http://issues.apache.org/bugzilla/show_bug.cgi?id=38569 + @Test + public void testStandaloneAmphersand() { + assertEquals("", StringEscapeUtils.unescapeHtml4("<P&O>")); + assertEquals("test & <", StringEscapeUtils.unescapeHtml4("test & <")); + assertEquals("", StringEscapeUtils.unescapeXml("<P&O>")); + assertEquals("test & <", StringEscapeUtils.unescapeXml("test & <")); + } + + @Test + public void testLang313() { + assertEquals("& &", StringEscapeUtils.unescapeHtml4("& &")); + } + + @Test + public void testEscapeCsvString() throws Exception { + assertEquals("foo.bar", StringEscapeUtils.escapeCsv("foo.bar")); + assertEquals("\"foo,bar\"", StringEscapeUtils.escapeCsv("foo,bar")); + assertEquals("\"foo\nbar\"", StringEscapeUtils.escapeCsv("foo\nbar")); + assertEquals("\"foo\rbar\"", StringEscapeUtils.escapeCsv("foo\rbar")); + assertEquals("\"foo\"\"bar\"", StringEscapeUtils.escapeCsv("foo\"bar")); + assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.escapeCsv("foo\uD84C\uDFB4bar")); + assertEquals("", StringEscapeUtils.escapeCsv("")); + assertEquals(null, StringEscapeUtils.escapeCsv(null)); + } + + @Test + public void testEscapeCsvWriter() throws Exception { + checkCsvEscapeWriter("foo.bar", "foo.bar"); + checkCsvEscapeWriter("\"foo,bar\"", "foo,bar"); + checkCsvEscapeWriter("\"foo\nbar\"", "foo\nbar"); + checkCsvEscapeWriter("\"foo\rbar\"", "foo\rbar"); + checkCsvEscapeWriter("\"foo\"\"bar\"", "foo\"bar"); + checkCsvEscapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar"); + checkCsvEscapeWriter("", null); + checkCsvEscapeWriter("", ""); + } + + private void checkCsvEscapeWriter(final String expected, final String value) { + try { + final StringWriter writer = new StringWriter(); + StringEscapeUtils.ESCAPE_CSV.translate(value, writer); + assertEquals(expected, writer.toString()); + } catch (final IOException e) { + fail("Threw: " + e); + } + } + + @Test + public void testUnescapeCsvString() throws Exception { + assertEquals("foo.bar", StringEscapeUtils.unescapeCsv("foo.bar")); + assertEquals("foo,bar", StringEscapeUtils.unescapeCsv("\"foo,bar\"")); + assertEquals("foo\nbar", StringEscapeUtils.unescapeCsv("\"foo\nbar\"")); + assertEquals("foo\rbar", StringEscapeUtils.unescapeCsv("\"foo\rbar\"")); + assertEquals("foo\"bar", StringEscapeUtils.unescapeCsv("\"foo\"\"bar\"")); + assertEquals("foo\uD84C\uDFB4bar", StringEscapeUtils.unescapeCsv("foo\uD84C\uDFB4bar")); + assertEquals("", StringEscapeUtils.unescapeCsv("")); + assertEquals(null, StringEscapeUtils.unescapeCsv(null)); + + assertEquals("\"foo.bar\"", StringEscapeUtils.unescapeCsv("\"foo.bar\"")); + } + + @Test + public void testUnescapeCsvWriter() throws Exception { + checkCsvUnescapeWriter("foo.bar", "foo.bar"); + checkCsvUnescapeWriter("foo,bar", "\"foo,bar\""); + checkCsvUnescapeWriter("foo\nbar", "\"foo\nbar\""); + checkCsvUnescapeWriter("foo\rbar", "\"foo\rbar\""); + checkCsvUnescapeWriter("foo\"bar", "\"foo\"\"bar\""); + checkCsvUnescapeWriter("foo\uD84C\uDFB4bar", "foo\uD84C\uDFB4bar"); + checkCsvUnescapeWriter("", null); + checkCsvUnescapeWriter("", ""); + + checkCsvUnescapeWriter("\"foo.bar\"", "\"foo.bar\""); + } + + private void checkCsvUnescapeWriter(final String expected, final String value) { + try { + final StringWriter writer = new StringWriter(); + StringEscapeUtils.UNESCAPE_CSV.translate(value, writer); + assertEquals(expected, writer.toString()); + } catch (final IOException e) { + fail("Threw: " + e); + } + } + + /** + * Tests // https://issues.apache.org/jira/browse/LANG-480 + */ + @Test + public void testEscapeHtmlHighUnicode() { + // this is the utf8 representation of the character: + // COUNTING ROD UNIT DIGIT THREE + // in Unicode + // codepoint: U+1D362 + final byte[] data = new byte[] { (byte)0xF0, (byte)0x9D, (byte)0x8D, (byte)0xA2 }; + + final String original = new String(data, Charset.forName("UTF8")); + + final String escaped = StringEscapeUtils.escapeHtml4( original ); + assertEquals( "High Unicode should not have been escaped", original, escaped); + + final String unescaped = StringEscapeUtils.unescapeHtml4( escaped ); + assertEquals( "High Unicode should have been unchanged", original, unescaped); + + // TODO: I think this should hold, needs further investigation + // String unescapedFromEntity = StringEscapeUtils.unescapeHtml4( "𝍢" ); + // assertEquals( "High Unicode should have been unescaped", original, unescapedFromEntity); + } + + /** + * Tests https://issues.apache.org/jira/browse/LANG-339 + */ + @Test + public void testEscapeHiragana() { + // Some random Japanese Unicode characters + final String original = "\u304B\u304C\u3068"; + final String escaped = StringEscapeUtils.escapeHtml4(original); + assertEquals( "Hiragana character Unicode behaviour should not be being escaped by escapeHtml4", + original, escaped); + + final String unescaped = StringEscapeUtils.unescapeHtml4( escaped ); + + assertEquals( "Hiragana character Unicode behaviour has changed - expected no unescaping", escaped, unescaped); + } + + /** + * Tests https://issues.apache.org/jira/browse/LANG-708 + * + * @throws IOException + * if an I/O error occurs + */ + @Test + public void testLang708() throws IOException { + byte[] inputBytes = Files.readAllBytes(Paths.get("src/test/resources/stringEscapeUtilsTestData.txt")); + final String input = new String(inputBytes, StandardCharsets.UTF_8); + final String escaped = StringEscapeUtils.escapeEcmaScript(input); + // just the end: + assertTrue(escaped, escaped.endsWith("}]")); + // a little more: + assertTrue(escaped, escaped.endsWith("\"valueCode\\\":\\\"\\\"}]")); + } + + /** + * Tests https://issues.apache.org/jira/browse/LANG-911 + */ + @Test + public void testLang911() { + final String bellsTest = "\ud83d\udc80\ud83d\udd14"; + final String value = StringEscapeUtils.escapeJava(bellsTest); + final String valueTest = StringEscapeUtils.unescapeJava(value); + assertEquals(bellsTest, valueTest); + } + + @Test + public void testEscapeJson() { + assertEquals(null, StringEscapeUtils.escapeJson(null)); + try { + StringEscapeUtils.ESCAPE_JSON.translate(null, null); + fail(); + } catch (final IOException ex) { + fail(); + } catch (final IllegalArgumentException ex) { + } + try { + StringEscapeUtils.ESCAPE_JSON.translate("", null); + fail(); + } catch (final IOException ex) { + fail(); + } catch (final IllegalArgumentException ex) { + } + + assertEquals("He didn't say, \\\"stop!\\\"", StringEscapeUtils.escapeJson("He didn't say, \"stop!\"")); + + final String expected = "\\\"foo\\\" isn't \\\"bar\\\". specials: \\b\\r\\n\\f\\t\\\\\\/"; + final String input ="\"foo\" isn't \"bar\". specials: \b\r\n\f\t\\/"; + + assertEquals(expected, StringEscapeUtils.escapeJson(input)); + } + + @Test + public void testBuilder() { + String result = StringEscapeUtils.builder(StringEscapeUtils.ESCAPE_XML10).escape("<").append(">").toString(); + assertEquals("<>", result); + } + + @Test + public void testEscapeXSI() { + assertNull(null, escapeXSI(null)); + assertEquals("He\\ didn\\'t\\ say,\\ \\\"Stop!\\\"", escapeXSI("He didn't say, \"Stop!\"")); + assertEquals("\\\\", escapeXSI("\\")); + assertEquals("", escapeXSI("\n")); + } + + @Test + public void testUnscapeXSI() { + assertNull(null, unescapeXSI(null)); + assertEquals("\"", unescapeXSI("\\\"")); + assertEquals("He didn't say, \"Stop!\"", unescapeXSI("He\\ didn\\'t\\ say,\\ \\\"Stop!\\\"")); + assertEquals("\\", unescapeXSI("\\\\")); + assertEquals("", unescapeXSI("\\")); + } + +} \ No newline at end of file