commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chtom...@apache.org
Subject [1/2] [text] Added support for UTF-16 with surrogate pairs
Date Tue, 18 Jul 2017 19:37:12 GMT
Repository: commons-text
Updated Branches:
  refs/heads/master aaf4aba36 -> ce4f20e26


Added support for UTF-16 with surrogate pairs


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/15c2e4b2
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/15c2e4b2
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/15c2e4b2

Branch: refs/heads/master
Commit: 15c2e4b28686edf6f0807304367dba82ac3d359d
Parents: aaf4aba
Author: Arun Vinud <arunvinud.sivasubramaniansurianarayanan@capitalone.com>
Authored: Wed Jul 12 15:47:02 2017 -0400
Committer: Arun Vinud <arunvinud.sivasubramaniansurianarayanan@capitalone.com>
Committed: Wed Jul 12 15:47:25 2017 -0400

----------------------------------------------------------------------
 .../java/org/apache/commons/text/WordUtils.java | 27 +++++++++++---------
 .../org/apache/commons/text/WordUtilsTest.java  | 26 ++++++++++++++-----
 2 files changed, 35 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/15c2e4b2/src/main/java/org/apache/commons/text/WordUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/WordUtils.java b/src/main/java/org/apache/commons/text/WordUtils.java
index 8e96553..123243f 100644
--- a/src/main/java/org/apache/commons/text/WordUtils.java
+++ b/src/main/java/org/apache/commons/text/WordUtils.java
@@ -24,11 +24,14 @@ import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.Validate;
 
 /**
- * <p>Operations on Strings that contain words.</p>
+ * <p>
+ * Operations on Strings that contain words.
+ * </p>
  *
- * <p>This class tries to handle <code>null</code> input gracefully.
- * An exception will not be thrown for a <code>null</code> input.
- * Each method documents its behaviour in more detail.</p>
+ * <p>
+ * This class tries to handle <code>null</code> input gracefully. An exception
will not be thrown for a
+ * <code>null</code> input. Each method documents its behavior in more detail.
+ * </p>
  *
  * @since 1.1
  */
@@ -688,22 +691,22 @@ public class WordUtils {
             return "";
         }
         final int strLen = str.length();
-        final char[] buf = new char[strLen / 2 + 1];
+        final int [] newCodePoints = new int[strLen / 2 + 1];
         int count = 0;
         boolean lastWasGap = true;
-        for (int i = 0; i < strLen; i++) {
-            final char ch = str.charAt(i);
+        for (int i = 0; i < strLen;) {
+            final int codePoint = str.codePointAt(i);
 
-            if (isDelimiter(ch, delimiters)) {
+            if (isDelimiter(codePoint, delimiters)) {
                 lastWasGap = true;
             } else if (lastWasGap) {
-                buf[count++] = ch;
+                newCodePoints[count++] = codePoint;
                 lastWasGap = false;
-            } else {
-                continue; // ignore ch
             }
+
+            i += Character.charCount(codePoint);
         }
-        return new String(buf, 0, count);
+        return new String(newCodePoints, 0, count);
     }
 
     //-----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/commons-text/blob/15c2e4b2/src/test/java/org/apache/commons/text/WordUtilsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/WordUtilsTest.java b/src/test/java/org/apache/commons/text/WordUtilsTest.java
index 271a8f0..beb063a 100644
--- a/src/test/java/org/apache/commons/text/WordUtilsTest.java
+++ b/src/test/java/org/apache/commons/text/WordUtilsTest.java
@@ -16,16 +16,13 @@
  */
 package org.apache.commons.text;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
+import org.apache.commons.lang3.StringUtils;
+import org.junit.Test;
 
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Modifier;
 
-import org.apache.commons.lang3.StringUtils;
-import org.junit.Test;
+import static org.junit.Assert.*;
 
 /**
  * Unit tests for {@link WordUtils} class.
@@ -412,6 +409,23 @@ public class WordUtilsTest {
         assertEquals("i2", WordUtils.initials("i am here 123", array));
     }
 
+    @Test
+    public void testInitialsSurrogatePairs() {
+        //Tests with space as default delimiter
+        assertEquals("\uD800\uDF00\uD800\uDF02", WordUtils.initials("\uD800\uDF00\uD800\uDF01
\uD800\uDF02\uD800\uDF03"));
+        assertEquals("\uD800\uDF00\uD800\uDF02", WordUtils.initials("\uD800\uDF00\uD800\uDF01
\uD800\uDF02\uD800\uDF03", null));
+        assertEquals("\uD800\uDF00\uD800\uDF02", WordUtils.initials("\uD800\uDF00 \uD800\uDF02
", null));
+
+        //Tests with UTF-16 as delimiters
+        assertEquals("\uD800\uDF00\uD800\uDF02", WordUtils.initials("\uD800\uDF00\uD800\uDF01.\uD800\uDF02\uD800\uDF03",
new char[]{'.'}));
+        assertEquals("\uD800\uDF00\uD800\uDF02", WordUtils.initials("\uD800\uDF00\uD800\uDF01A\uD800\uDF02\uD800\uDF03",
new char[]{'A'}));
+
+        //Tests with UTF-32 as delimiters
+        assertEquals("\uD800\uDF00\uD800\uDF02", WordUtils.initials("\uD800\uDF00\uD800\uDF01\uD800\uDF14\uD800\uDF02\uD800\uDF03",
new char[]{'\uD800', '\uDF14'}));
+        assertEquals("\uD800\uDF00\uD800\uDF02", WordUtils.initials("\uD800\uDF00\uD800\uDF01\uD800\uDF14\uD800\uDF18\uD800\uDF02\uD800\uDF03",
new char[]{'\uD800', '\uDF14', '\uD800', '\uDF18'}));
+
+    }
+
     // -----------------------------------------------------------------------
     @Test
     public void testSwapCase_String() {


Mime
View raw message