asterixdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jianf...@apache.org
Subject [6/7] incubator-asterixdb-hyracks git commit: ASTERIXDB-1102: VarSize Encoding to store length of String and ByteArray
Date Thu, 29 Oct 2015 00:25:08 GMT
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/RewindableDataOutputStream.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/RewindableDataOutputStream.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/RewindableDataOutputStream.java
new file mode 100644
index 0000000..dcd5458
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/RewindableDataOutputStream.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import java.io.DataOutputStream;
+import java.io.FilterOutputStream;
+import java.io.OutputStream;
+
+public class RewindableDataOutputStream extends DataOutputStream {
+    /**
+     * Creates a new data output stream to write data to the specified
+     * underlying output stream. The counter <code>written</code> is
+     * set to zero.
+     *
+     * @param out the underlying output stream, to be saved for later
+     *            use.
+     * @see FilterOutputStream#out
+     */
+    public RewindableDataOutputStream(OutputStream out) {
+        super(out);
+    }
+
+    /**
+     * Rewind the current position by {@code delta} to a previous position.
+     * This function is used to drop the already written delta bytes.
+     * In some cases, we write some bytes, and afterward we found we've written more than expected.
+     * Then we need to fix the position by rewind the current position to the expected one.
+     * Currently, it is used by the {@link AbstractVarLenObjectBuilder} which may take more space than required
+     * at beginning, and it will shift the data and fix the position whenever required.
+     *
+     * @param delta
+     */
+    public void rewindWrittenBy(int delta) {
+        if (written < delta) {
+            throw new IndexOutOfBoundsException();
+        }
+        written -= delta;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java
new file mode 100644
index 0000000..9dafef1
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.data.std.util;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+
+public class UTF8CharSequence implements CharSequence {
+
+    private char[] buf;
+    private int length;
+
+    @Override
+    public char charAt(int index) {
+        if (index >= length || index < 0) {
+            throw new IndexOutOfBoundsException("No index " + index + " for string of length " + length);
+        }
+        return buf[index];
+    }
+
+    @Override
+    public int length() {
+        return length;
+    }
+
+    @Override
+    public CharSequence subSequence(int start, int end) {
+        UTF8CharSequence carSeq = new UTF8CharSequence();
+        carSeq.length = end - start;
+        if (end != start) {
+            carSeq.buf = new char[carSeq.length];
+            System.arraycopy(buf, start, carSeq.buf, 0, carSeq.length);
+        }
+        return carSeq;
+    }
+
+    public void reset(UTF8StringPointable valuePtr) {
+        int utfLen = valuePtr.getUTF8Length();
+        if (buf == null || buf.length < utfLen) {
+            buf = new char[utfLen];
+        }
+        int bytePos = 0;
+        int charPos = 0;
+        while (bytePos < utfLen) {
+            buf[charPos++] = valuePtr.charAt(valuePtr.getMetaDataLength() + bytePos);
+            bytePos += valuePtr.charSize(valuePtr.getMetaDataLength() + bytePos);
+        }
+        this.length = charPos;
+    }
+
+    @Override
+    public String toString() {
+        return new String(buf, 0, length);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringBuilder.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringBuilder.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringBuilder.java
new file mode 100644
index 0000000..eb29a98
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringBuilder.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hyracks.data.std.util;
+
+import java.io.IOException;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+
+public class UTF8StringBuilder extends AbstractVarLenObjectBuilder {
+
+    public void appendChar(char ch) throws IOException {
+        UTF8StringUtil.writeCharAsModifiedUTF8(ch, out);
+    }
+
+    public void appendString(String string) throws IOException {
+        for (int i = 0; i < string.length(); i++) {
+            appendChar(string.charAt(i));
+        }
+    }
+
+    public void appendUtf8StringPointable(UTF8StringPointable src, int byteStartOffset, int byteLength) throws IOException {
+        out.write(src.getByteArray(), byteStartOffset, byteLength);
+    }
+
+    public void appendUtf8StringPointable(UTF8StringPointable src) throws IOException {
+        appendUtf8StringPointable(src, src.getCharStartOffset(), src.getUTF8Length());
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIterator.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIterator.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIterator.java
new file mode 100644
index 0000000..317527e
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIterator.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+
+public class UTF8StringCharacterIterator implements ICharIterator {
+
+    private UTF8StringPointable utf8Ptr;
+    private int pos;
+
+    public UTF8StringCharacterIterator reset(UTF8StringPointable utf8Ptr) {
+        this.utf8Ptr = utf8Ptr;
+        return reset();
+    }
+
+    public UTF8StringCharacterIterator reset() {
+        this.pos = utf8Ptr.getMetaDataLength();
+        return this;
+    }
+
+    @Override
+    public boolean hasNext() {
+        return pos < utf8Ptr.getMetaDataLength() + utf8Ptr.getUTF8Length();
+    }
+
+    @Override
+    public char next() {
+        char ret = utf8Ptr.charAt(pos);
+        pos += utf8Ptr.charSize(pos);
+        return ret;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java
index f58c8da..1713467 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java
@@ -19,51 +19,39 @@
 
 package org.apache.hyracks.data.std.primitive;
 
-import org.junit.Test;
-
-import javax.xml.bind.DatatypeConverter;
+import static org.junit.Assert.assertTrue;
 
-import static org.junit.Assert.*;
+import org.junit.Test;
 
 public class ByteArrayPointableTest {
 
-    public static byte[] generatePointableBytes(byte[] bytes){
-        byte[] ret = new byte[bytes.length + ByteArrayPointable.SIZE_OF_LENGTH];
-        for (int i = 0; i < bytes.length; ++i){
-            ret[i+ ByteArrayPointable.SIZE_OF_LENGTH] = bytes[i];
-        }
-        ByteArrayPointable.putLength(bytes.length, ret, 0);
-        return ret;
-    }
-
     @Test
     public void testCompareTo() throws Exception {
-        byte [] bytes = generatePointableBytes(new byte[] { 1, 2, 3, 4});
-        ByteArrayPointable byteArrayPointable = new ByteArrayPointable();
-        byteArrayPointable.set(bytes, 0, bytes.length);
+        ByteArrayPointable byteArrayPointable = ByteArrayPointable
+                .generatePointableFromPureBytes(new byte[] { 1, 2, 3, 4 });
 
-        testEqual(byteArrayPointable, generatePointableBytes(new byte[] { 1,2 ,3,4}));
+        testEqual(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 1, 2, 3, 4 }));
 
-        testLessThan(byteArrayPointable, generatePointableBytes(new byte[] {2}));
-        testLessThan(byteArrayPointable, generatePointableBytes(new byte[] {1,2,3,5}));
-        testLessThan(byteArrayPointable, generatePointableBytes(new byte[] {1,2,3,4,5}));
+        testLessThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 2 }, 0, 1));
+        testLessThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 1, 2, 3, 5 }));
+        testLessThan(byteArrayPointable,
+                ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 1, 2, 3, 4, 5 }));
 
-        testGreaterThan(byteArrayPointable, generatePointableBytes(new byte[] { }));
-        testGreaterThan(byteArrayPointable, generatePointableBytes(new byte[] { 0}));
-        testGreaterThan(byteArrayPointable, generatePointableBytes(new byte[] { 1,2,3}));
+        testGreaterThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] {}));
+        testGreaterThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 0 }));
+        testGreaterThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 1, 2, 3 }));
 
     }
 
-
-    void testEqual(ByteArrayPointable pointable, byte [] bytes){
-        assertTrue(pointable.compareTo(bytes, 0, bytes.length) == 0);
+    void testEqual(ByteArrayPointable pointable, ByteArrayPointable bytes) {
+        assertTrue(pointable.compareTo(bytes) == 0);
     }
 
-    void testLessThan(ByteArrayPointable pointable, byte[] bytes){
-        assertTrue(pointable.compareTo(bytes, 0, bytes.length) < 0);
+    void testLessThan(ByteArrayPointable pointable, ByteArrayPointable bytes) {
+        assertTrue(pointable.compareTo(bytes) < 0);
     }
 
-    void testGreaterThan(ByteArrayPointable pointable, byte[] bytes){
-        assertTrue(pointable.compareTo(bytes, 0, bytes.length) > 0);
+    void testGreaterThan(ByteArrayPointable pointable, ByteArrayPointable bytes) {
+        assertTrue(pointable.compareTo(bytes) > 0);
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
new file mode 100644
index 0000000..f134718
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.primitive;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
+import org.apache.hyracks.util.string.UTF8StringSample;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+import org.junit.Test;
+
+public class UTF8StringPointableTest {
+    public static UTF8StringPointable STRING_EMPTY = UTF8StringPointable
+            .generateUTF8Pointable(UTF8StringSample.EMPTY_STRING);
+    public static UTF8StringPointable STRING_UTF8_MIX = UTF8StringPointable
+            .generateUTF8Pointable(UTF8StringSample.STRING_UTF8_MIX);
+    public static UTF8StringPointable STRING_UTF8_MIX_LOWERCASE = UTF8StringPointable.generateUTF8Pointable(
+            UTF8StringSample.STRING_UTF8_MIX_LOWERCASE);
+
+    public static UTF8StringPointable STRING_LEN_127 = UTF8StringPointable
+            .generateUTF8Pointable(UTF8StringSample.STRING_LEN_127);
+    public static UTF8StringPointable STRING_LEN_128 = UTF8StringPointable
+            .generateUTF8Pointable(UTF8StringSample.STRING_LEN_128);
+
+    @Test
+    public void testGetStringLength() throws Exception {
+        UTF8StringPointable utf8Ptr = UTF8StringPointable.generateUTF8Pointable(UTF8StringSample.STRING_LEN_127);
+        assertEquals(127, utf8Ptr.getUTF8Length());
+        assertEquals(1, utf8Ptr.getMetaDataLength());
+        assertEquals(127, utf8Ptr.getStringLength());
+
+        byte[] bytes = UTF8StringUtil.writeStringToBytes(UTF8StringSample.STRING_LEN_128);
+        utf8Ptr.set(bytes, 0, bytes.length);
+        assertEquals(128, utf8Ptr.getUTF8Length());
+        assertEquals(2, utf8Ptr.getMetaDataLength());
+        assertEquals(128, utf8Ptr.getStringLength());
+    }
+
+    @Test
+    public void testContains() throws Exception {
+        assertTrue(STRING_UTF8_MIX.contains(STRING_UTF8_MIX, false));
+        assertTrue(STRING_UTF8_MIX.contains(STRING_UTF8_MIX, true));
+        assertTrue(STRING_UTF8_MIX.contains(STRING_EMPTY, true));
+
+        assertTrue(STRING_UTF8_MIX.contains(STRING_UTF8_MIX_LOWERCASE, true));
+        assertTrue(STRING_UTF8_MIX_LOWERCASE.contains(STRING_UTF8_MIX, true));
+    }
+
+    @Test
+    public void testStartsWith() throws Exception {
+        assertTrue(STRING_LEN_128.startsWith(STRING_LEN_127, true));
+        assertFalse(STRING_LEN_127.startsWith(STRING_LEN_128, true));
+
+        assertTrue(STRING_LEN_127.startsWith(STRING_EMPTY, true));
+    }
+
+    @Test
+    public void testEndsWith() throws Exception {
+        assertTrue(STRING_LEN_128.endsWith(STRING_LEN_127, true));
+        assertFalse(STRING_LEN_127.endsWith(STRING_LEN_128, true));
+
+        assertTrue(STRING_LEN_127.startsWith(STRING_EMPTY, true));
+    }
+
+    @Test
+    public void testConcat() throws Exception {
+        UTF8StringPointable expected = UTF8StringPointable.generateUTF8Pointable(
+                UTF8StringSample.generateStringRepeatBy(UTF8StringSample.ONE_ASCII_CHAR, 127 + 128));
+
+        GrowableArray storage = new GrowableArray();
+        UTF8StringBuilder builder = new UTF8StringBuilder();
+        STRING_LEN_127.concat(STRING_LEN_128, builder, storage);
+
+        UTF8StringPointable actual = new UTF8StringPointable();
+        actual.set(storage.getByteArray(), 0, storage.getLength());
+
+        assertEquals(0, expected.compareTo(actual));
+
+        storage.reset();
+        STRING_LEN_127.concat(STRING_EMPTY, builder, storage);
+        actual.set(storage.getByteArray(), 0, storage.getLength());
+
+        assertEquals(0, STRING_LEN_127.compareTo(actual));
+    }
+
+    @Test
+    public void testSubstr() throws Exception {
+        GrowableArray storage = new GrowableArray();
+        UTF8StringBuilder builder = new UTF8StringBuilder();
+
+        STRING_LEN_128.substr(1, 127, builder, storage);
+        UTF8StringPointable result = new UTF8StringPointable();
+        result.set(storage.getByteArray(), 0, storage.getLength());
+
+        assertEquals(0, STRING_LEN_127.compareTo(result));
+
+        storage.reset();
+        STRING_UTF8_MIX.substr(0, UTF8StringSample.STRING_UTF8_MIX.length(), builder, storage);
+        result.set(storage.getByteArray(), 0, storage.getLength());
+        assertEquals(0, STRING_UTF8_MIX.compareTo(result));
+    }
+
+    @Test
+    public void testSubstrBefore() throws Exception {
+        UTF8StringBuilder builder = new UTF8StringBuilder();
+        GrowableArray storage = new GrowableArray();
+
+        STRING_LEN_128.substrBefore(STRING_LEN_127, builder, storage);
+        UTF8StringPointable result = new UTF8StringPointable();
+        result.set(storage.getByteArray(), 0, storage.getLength());
+
+        assertEquals(0, STRING_EMPTY.compareTo(result));
+
+        storage.reset();
+        UTF8StringPointable testPtr = UTF8StringPointable.generateUTF8Pointable("Mix中文123");
+        UTF8StringPointable pattern = UTF8StringPointable.generateUTF8Pointable("文");
+        UTF8StringPointable expect = UTF8StringPointable.generateUTF8Pointable("Mix中");
+        testPtr.substrBefore(pattern, builder, storage);
+        result.set(storage.getByteArray(), 0, storage.getLength());
+        assertEquals(0, expect.compareTo(result));
+    }
+
+    @Test
+    public void testSubstrAfter() throws Exception {
+        UTF8StringBuilder builder = new UTF8StringBuilder();
+        GrowableArray storage = new GrowableArray();
+
+        STRING_LEN_128.substrAfter(STRING_LEN_127, builder, storage);
+        UTF8StringPointable result = new UTF8StringPointable();
+        result.set(storage.getByteArray(), 0, storage.getLength());
+
+        UTF8StringPointable expect = UTF8StringPointable
+                .generateUTF8Pointable(Character.toString(UTF8StringSample.ONE_ASCII_CHAR));
+        assertEquals(0, expect.compareTo(result));
+
+        storage.reset();
+        UTF8StringPointable testPtr = UTF8StringPointable.generateUTF8Pointable("Mix中文123");
+        UTF8StringPointable pattern = UTF8StringPointable.generateUTF8Pointable("文");
+        expect = UTF8StringPointable.generateUTF8Pointable("123");
+        testPtr.substrAfter(pattern, builder, storage);
+        result.set(storage.getByteArray(), 0, storage.getLength());
+        assertEquals(0, expect.compareTo(result));
+    }
+
+    @Test
+    public void testLowercase() throws Exception {
+        UTF8StringBuilder builder = new UTF8StringBuilder();
+        GrowableArray storage = new GrowableArray();
+
+        UTF8StringPointable result = new UTF8StringPointable();
+        STRING_UTF8_MIX.lowercase(builder, storage);
+
+        result.set(storage.getByteArray(), 0, storage.getLength());
+
+        assertEquals(0, STRING_UTF8_MIX_LOWERCASE.compareTo(result));
+    }
+
+    @Test
+    public void testUppercase() throws Exception {
+        UTF8StringBuilder builder = new UTF8StringBuilder();
+        GrowableArray storage = new GrowableArray();
+
+        UTF8StringPointable result = new UTF8StringPointable();
+        STRING_UTF8_MIX_LOWERCASE.uppercase(builder, storage);
+
+        result.set(storage.getByteArray(), 0, storage.getLength());
+
+        UTF8StringPointable expected = UTF8StringPointable
+                .generateUTF8Pointable(UTF8StringSample.STRING_UTF8_MIX_LOWERCASE.toUpperCase());
+        assertEquals(0, expected.compareTo(result));
+
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringBuilderTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringBuilderTest.java b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringBuilderTest.java
new file mode 100644
index 0000000..bc0c629
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringBuilderTest.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+import org.apache.hyracks.util.string.UTF8StringSample;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+import org.junit.Test;
+
+public class UTF8StringBuilderTest {
+
+    UTF8StringBuilder utf8StringBuilder = new UTF8StringBuilder();
+    GrowableArray storage = new GrowableArray();
+
+    @Test
+    public void testNormalBuilder() throws IOException {
+        testOneString(UTF8StringSample.EMPTY_STRING, 0);
+        testOneString(UTF8StringSample.EMPTY_STRING, 127);
+
+        testOneString(UTF8StringSample.STRING_UTF8_MIX, 127);
+        testOneString(UTF8StringSample.STRING_LEN_128, 128);
+
+        testOneString(UTF8StringSample.STRING_LEN_MEDIUM, VarLenIntEncoderDecoder.BOUND_TWO_BYTE);
+        testOneString(UTF8StringSample.STRING_LEN_LARGE, VarLenIntEncoderDecoder.BOUND_THREE_BYTE);
+    }
+
+    @Test
+    public void testShrinkAfterFinish() throws IOException {
+        testOneString(UTF8StringSample.STRING_LEN_127, VarLenIntEncoderDecoder.BOUND_TWO_BYTE);
+        testOneString(UTF8StringSample.STRING_LEN_127, VarLenIntEncoderDecoder.BOUND_THREE_BYTE);
+        testOneString(UTF8StringSample.STRING_LEN_127, VarLenIntEncoderDecoder.BOUND_FOUR_BYTE);
+    }
+
+    @Test
+    public void testIncreaseAfterFinish() throws IOException {
+        testOneString(UTF8StringSample.STRING_LEN_128, VarLenIntEncoderDecoder.BOUND_ONE_BYTE);
+        testOneString(UTF8StringSample.STRING_LEN_MEDIUM, VarLenIntEncoderDecoder.BOUND_ONE_BYTE);
+        testOneString(UTF8StringSample.STRING_LEN_LARGE, VarLenIntEncoderDecoder.BOUND_TWO_BYTE);
+    }
+
+    public void testOneString(String testString, int estimateLength) throws IOException {
+        storage.reset();
+        utf8StringBuilder.reset(storage, estimateLength);
+        for (char c : testString.toCharArray()) {
+            utf8StringBuilder.appendChar(c);
+        }
+        utf8StringBuilder.finish();
+        assertEquals(testString, UTF8StringUtil.toString(new StringBuilder(), storage.getByteArray(), 0).toString());
+
+        UTF8StringPointable hyracksUtf = new UTF8StringPointable();
+        hyracksUtf.set(storage.getByteArray(), 0, storage.getLength());
+
+        GrowableArray storage2 = new GrowableArray();
+        utf8StringBuilder.reset(storage2, estimateLength);
+        utf8StringBuilder.appendUtf8StringPointable(hyracksUtf);
+        utf8StringBuilder.finish();
+        assertEquals(testString, UTF8StringUtil.toString(new StringBuilder(), storage.getByteArray(), 0).toString());
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIteratorTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIteratorTest.java b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIteratorTest.java
new file mode 100644
index 0000000..5268c82
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIteratorTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringSample;
+import org.junit.Test;
+
+public class UTF8StringCharacterIteratorTest {
+
+    private UTF8StringCharacterIterator iterator = new UTF8StringCharacterIterator();
+
+    private void testEachIterator(String testString) {
+        UTF8StringPointable ptr = UTF8StringPointable.generateUTF8Pointable(testString);
+        iterator.reset(ptr);
+        for (char ch : testString.toCharArray()) {
+            assertTrue(iterator.hasNext());
+            assertEquals(ch, iterator.next());
+        }
+        assertFalse(iterator.hasNext());
+
+        iterator.reset();
+        for (char ch : testString.toCharArray()) {
+            assertTrue(iterator.hasNext());
+            assertEquals(ch, iterator.next());
+        }
+        assertFalse(iterator.hasNext());
+    }
+
+    @Test
+    public void testIterator(){
+        testEachIterator(UTF8StringSample.EMPTY_STRING);
+        testEachIterator(UTF8StringSample.STRING_UTF8_MIX);
+        testEachIterator(UTF8StringSample.STRING_LEN_128);
+        testEachIterator(UTF8StringSample.STRING_LEN_128);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/pom.xml
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/pom.xml b/hyracks/hyracks-dataflow-common/pom.xml
index c6e85cd..ad4dfa7 100644
--- a/hyracks/hyracks-dataflow-common/pom.xml
+++ b/hyracks/hyracks-dataflow-common/pom.xml
@@ -17,48 +17,61 @@
  ! under the License.
  !-->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <artifactId>hyracks-dataflow-common</artifactId>
-  <name>hyracks-dataflow-common</name>
-  <parent>
-    <groupId>org.apache.hyracks</groupId>
-    <artifactId>hyracks</artifactId>
-    <version>0.2.17-SNAPSHOT</version>
-  </parent>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>hyracks-dataflow-common</artifactId>
+    <name>hyracks-dataflow-common</name>
+    <parent>
+        <groupId>org.apache.hyracks</groupId>
+        <artifactId>hyracks</artifactId>
+        <version>0.2.17-SNAPSHOT</version>
+    </parent>
 
-  <licenses>
-    <license>
-      <name>Apache License, Version 2.0</name>
-      <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
-      <distribution>repo</distribution>
-      <comments>A business-friendly OSS license</comments>
-    </license>
-  </licenses>
+    <licenses>
+        <license>
+            <name>Apache License, Version 2.0</name>
+            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+            <distribution>repo</distribution>
+            <comments>A business-friendly OSS license</comments>
+        </license>
+    </licenses>
 
 
-  <dependencies>
-  	<dependency>
-  		<groupId>org.apache.hyracks</groupId>
-  		<artifactId>hyracks-api</artifactId>
-  		<version>0.2.17-SNAPSHOT</version>
-  		<type>jar</type>
-  		<scope>compile</scope>
-  	</dependency>
-  	<dependency>
-  		<groupId>org.apache.hyracks</groupId>
-  		<artifactId>hyracks-data-std</artifactId>
-  		<version>0.2.17-SNAPSHOT</version>
-    </dependency>
-       <dependency>
-  		<groupId>org.apache.hyracks</groupId>
-  		<artifactId>hyracks-control-nc</artifactId>
-  		<version>0.2.17-SNAPSHOT</version>
-        <scope>test</scope>
-    </dependency>
-    <dependency>
-         <groupId>commons-io</groupId>
-         <artifactId>commons-io</artifactId>
-    </dependency>
-  </dependencies>
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-util</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-util</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-api</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+            <type>jar</type>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-data-std</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-control-nc</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+        </dependency>
+    </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java
index 4c8bc1e..d16fca7 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java
@@ -19,57 +19,71 @@
 
 package org.apache.hyracks.dataflow.common.data.marshalling;
 
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+
 public class ByteArraySerializerDeserializer implements ISerializerDeserializer<byte[]> {
 
     private static final long serialVersionUID = 1L;
 
-    public final static ByteArraySerializerDeserializer INSTANCE = new ByteArraySerializerDeserializer();
-
-    private ByteArraySerializerDeserializer() {
+    public ByteArraySerializerDeserializer() {
     }
 
+    private byte[] metaBuffer = new byte[5];
+
+    /**
+     * Return a pure byte array which doesn't have the length encoding prefix
+     *
+     * @param in - Stream to read instance from.
+     * @return
+     * @throws HyracksDataException
+     */
     @Override
     public byte[] deserialize(DataInput in) throws HyracksDataException {
         try {
-            int length = in.readUnsignedShort();
-            byte[] bytes = new byte[length + ByteArrayPointable.SIZE_OF_LENGTH];
-            in.readFully(bytes, ByteArrayPointable.SIZE_OF_LENGTH, length);
-            ByteArrayPointable.putLength(length, bytes, 0);
+            int contentLength = VarLenIntEncoderDecoder.decode(in);
+            byte[] bytes = new byte[contentLength];
+            in.readFully(bytes, 0, contentLength);
             return bytes;
         } catch (IOException e) {
             throw new HyracksDataException(e);
         }
     }
 
+    /**
+     * a pure content only byte array which doesn't have the encoded length at the beginning.
+     * will write the entire array into the out
+     */
     @Override
     public void serialize(byte[] instance, DataOutput out) throws HyracksDataException {
-
-        if (instance.length > ByteArrayPointable.MAX_LENGTH) {
-            throw new HyracksDataException(
-                    "encoded byte array too long: " + instance.length + " bytes");
+        try {
+            int metaLength = VarLenIntEncoderDecoder.encode(instance.length, metaBuffer, 0);
+            out.write(metaBuffer, 0, metaLength);
+            out.write(instance);
+        } catch (IOException e) {
+            throw new HyracksDataException(e);
         }
+    }
+
+    public void serialize(ByteArrayPointable byteArrayPtr, DataOutput out) throws HyracksDataException {
         try {
-            int realLength = ByteArrayPointable.getFullLength(instance, 0);
-            out.write(instance, 0, realLength);
+            out.write(byteArrayPtr.getByteArray(), byteArrayPtr.getStartOffset(), byteArrayPtr.getLength());
         } catch (IOException e) {
             throw new HyracksDataException(e);
         }
     }
 
+    // A pure byte array, which doesn't have the length information encoded at the beginning
     public void serialize(byte[] instance, int start, int length, DataOutput out) throws HyracksDataException {
-        if (length > ByteArrayPointable.MAX_LENGTH) {
-            throw new HyracksDataException(
-                    "encoded byte array too long: " + instance.length + " bytes");
-        }
+        int metaLength = VarLenIntEncoderDecoder.encode(length, metaBuffer, 0);
         try {
+            out.write(metaBuffer, 0, metaLength);
             out.write(instance, start, length);
         } catch (IOException e) {
             throw new HyracksDataException(e);

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
index 2435672..aee11bc 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
@@ -24,19 +24,21 @@ import java.io.IOException;
 
 import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.string.UTF8StringReader;
+import org.apache.hyracks.util.string.UTF8StringWriter;
 
 public class UTF8StringSerializerDeserializer implements ISerializerDeserializer<String> {
-    public static final UTF8StringSerializerDeserializer INSTANCE = new UTF8StringSerializerDeserializer();
 
     private static final long serialVersionUID = 1L;
+    private UTF8StringReader reader = new UTF8StringReader();
+    private UTF8StringWriter writer = new UTF8StringWriter();
 
-    private UTF8StringSerializerDeserializer() {
-    }
+    public UTF8StringSerializerDeserializer() {}
 
     @Override
     public String deserialize(DataInput in) throws HyracksDataException {
         try {
-            return in.readUTF();
+            return reader.readUTF(in);
         } catch (IOException e) {
             throw new HyracksDataException(e);
         }
@@ -45,7 +47,7 @@ public class UTF8StringSerializerDeserializer implements ISerializerDeserializer
     @Override
     public void serialize(String instance, DataOutput out) throws HyracksDataException {
         try {
-            out.writeUTF(instance);
+            writer.writeUTF8(instance, out);
         } catch (IOException e) {
             throw new HyracksDataException(e);
         }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java
index b7d302b..3d081af 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java
@@ -26,23 +26,12 @@ import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
 public class ByteArrayNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
     public static ByteArrayNormalizedKeyComputerFactory INSTANCE = new ByteArrayNormalizedKeyComputerFactory();
 
-    @Override public INormalizedKeyComputer createNormalizedKeyComputer() {
+    @Override
+    public INormalizedKeyComputer createNormalizedKeyComputer() {
         return new INormalizedKeyComputer() {
-            @Override public int normalize(byte[] bytes, int start, int length) {
-                int normalizedKey = 0;
-                int realLength = ByteArrayPointable.getLength(bytes, start);
-                for (int i = 0; i < 3; ++i) {
-                    normalizedKey <<= 8;
-                    if (i < realLength) {
-                        normalizedKey += bytes[start + ByteArrayPointable.SIZE_OF_LENGTH + i] & 0xff;
-                    }
-                }
-                // last byte, shift 7 instead of 8 to avoid negative number
-                normalizedKey <<= 7;
-                if (3 < realLength) {
-                    normalizedKey += (bytes[start + ByteArrayPointable.SIZE_OF_LENGTH + 3] & 0xfe) >> 1;
-                }
-                return normalizedKey;
+            @Override
+            public int normalize(byte[] bytes, int start, int length) {
+                return ByteArrayPointable.normalize(bytes, start);
             }
         };
     }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java
index 941afda..79936de 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java
@@ -20,7 +20,7 @@ package org.apache.hyracks.dataflow.common.data.normalizers;
 
 import org.apache.hyracks.api.dataflow.value.INormalizedKeyComputer;
 import org.apache.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 public class UTF8StringNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
     private static final long serialVersionUID = 1L;
@@ -30,17 +30,7 @@ public class UTF8StringNormalizedKeyComputerFactory implements INormalizedKeyCom
         return new INormalizedKeyComputer() {
             @Override
             public int normalize(byte[] bytes, int start, int length) {
-                int len = UTF8StringPointable.getUTFLength(bytes, start);
-                int nk = 0;
-                int offset = start + 2;
-                for (int i = 0; i < 2; ++i) {
-                    nk <<= 16;
-                    if (i < len) {
-                        nk += ((int) UTF8StringPointable.charAt(bytes, offset)) & 0xffff;
-                        offset += UTF8StringPointable.charSize(bytes, offset);
-                    }
-                }
-                return nk;
+                return UTF8StringUtil.normalize(bytes, start);
             }
         };
     }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
index c71950b..c85d1b2 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
@@ -19,12 +19,12 @@
 
 package org.apache.hyracks.dataflow.common.data.parsers;
 
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-
 import java.io.DataOutput;
 import java.io.IOException;
-import java.util.Arrays;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.data.marshalling.ByteArraySerializerDeserializer;
+import org.apache.hyracks.util.bytes.Base64Parser;
 
 public class ByteArrayBase64ParserFactory implements IValueParserFactory {
 
@@ -33,20 +33,19 @@ public class ByteArrayBase64ParserFactory implements IValueParserFactory {
     private ByteArrayBase64ParserFactory() {
     }
 
-    @Override public IValueParser createValueParser() {
+    @Override
+    public IValueParser createValueParser() {
         return new IValueParser() {
-            private byte[] buffer;
-            private byte[] quadruplet = new byte[4];
+            Base64Parser parser = new Base64Parser();
+            ByteArraySerializerDeserializer serializer = new ByteArraySerializerDeserializer();
 
-            @Override public void parse(char[] input, int start, int length, DataOutput out)
+            @Override
+            public void parse(char[] input, int start, int length, DataOutput out)
                     throws HyracksDataException {
-                if (length % 4 != 0) {
-                    throw new HyracksDataException(
-                            "Invalid Base64 string, the length of the string should be a multiple of 4");
-                }
-                buffer = extractPointableArrayFromBase64String(input, start, length, buffer, quadruplet);
+
+                parser.generatePureByteArrayFromBase64String(input, start, length);
                 try {
-                    out.write(buffer, 0, ByteArrayPointable.getFullLength(buffer, 0));
+                    serializer.serialize(parser.getByteArray(), 0, parser.getLength(), out);
                 } catch (IOException e) {
                     throw new HyracksDataException(e);
                 }
@@ -54,194 +53,4 @@ public class ByteArrayBase64ParserFactory implements IValueParserFactory {
         };
     }
 
-    // The following base64 related implementation is copied/changed base on javax.xml.bind.DatatypeConverterImpl.java
-    private static final byte[] decodeMap = initDecodeMap();
-    private static final byte PADDING = 127;
-
-    private static byte[] initDecodeMap() {
-        byte[] map = new byte[128];
-        Arrays.fill(map, (byte) -1);
-
-        int i;
-        for (i = 'A'; i <= 'Z'; i++) {
-            map[i] = (byte) (i - 'A');
-        }
-        for (i = 'a'; i <= 'z'; i++) {
-            map[i] = (byte) (i - 'a' + 26);
-        }
-        for (i = '0'; i <= '9'; i++) {
-            map[i] = (byte) (i - '0' + 52);
-        }
-        map['+'] = 62;
-        map['/'] = 63;
-        map['='] = PADDING;
-
-        return map;
-    }
-
-    /**
-     * computes the length of binary data speculatively.
-     * Our requirement is to create byte[] of the exact length to store the binary data.
-     * If we do this in a straight-forward way, it takes two passes over the data.
-     * Experiments show that this is a non-trivial overhead (35% or so is spent on
-     * the first pass in calculating the length.)
-     * So the approach here is that we compute the length speculatively, without looking
-     * at the whole contents. The obtained speculative value is never less than the
-     * actual length of the binary data, but it may be bigger. So if the speculation
-     * goes wrong, we'll pay the cost of reallocation and buffer copying.
-     * If the base64 text is tightly packed with no indentation nor illegal char
-     * (like what most web services produce), then the speculation of this method
-     * will be correct, so we get the performance benefit.
-     */
-    private static int guessLength(char[] chars, int start, int length) {
-
-        // compute the tail '=' chars
-        int j = length - 1;
-        for (; j >= 0; j--) {
-            byte code = decodeMap[chars[start + j]];
-            if (code == PADDING) {
-                continue;
-            }
-            if (code == -1) // most likely this base64 text is indented. go with the upper bound
-            {
-                return length / 4 * 3;
-            }
-            break;
-        }
-
-        j++;    // text.charAt(j) is now at some base64 char, so +1 to make it the size
-        int padSize = length - j;
-        if (padSize > 2) // something is wrong with base64. be safe and go with the upper bound
-        {
-            return length / 4 * 3;
-        }
-
-        // so far this base64 looks like it's unindented tightly packed base64.
-        // take a chance and create an array with the expected size
-        return length / 4 * 3 - padSize;
-    }
-
-    private static int guessLength(byte[] chars, int start, int length) {
-
-        // compute the tail '=' chars
-        int j = length - 1;
-        for (; j >= 0; j--) {
-            byte code = decodeMap[chars[start + j]];
-            if (code == PADDING) {
-                continue;
-            }
-            if (code == -1) // most likely this base64 text is indented. go with the upper bound
-            {
-                return length / 4 * 3;
-            }
-            break;
-        }
-
-        j++;    // text.charAt(j) is now at some base64 char, so +1 to make it the size
-        int padSize = length - j;
-        if (padSize > 2) // something is wrong with base64. be safe and go with the upper bound
-        {
-            return length / 4 * 3;
-        }
-
-        // so far this base64 looks like it's unindented tightly packed base64.
-        // take a chance and create an array with the expected size
-        return length / 4 * 3 - padSize;
-    }
-
-    public static byte[] extractPointableArrayFromBase64String(byte[] input, int start, int length,
-            byte[] bufferNeedToReset, byte[] quadruplet)
-            throws HyracksDataException {
-        int contentOffset = ByteArrayPointable.SIZE_OF_LENGTH;
-        final int buflen = guessLength(input, start, length) + contentOffset;
-        bufferNeedToReset = ByteArrayHexParserFactory.ensureCapacity(buflen, bufferNeedToReset);
-        int byteArrayLength = parseBase64String(input, start, length, bufferNeedToReset, contentOffset,
-                quadruplet);
-        if (byteArrayLength > ByteArrayPointable.MAX_LENGTH) {
-            throw new HyracksDataException("The decoded byte array is too long.");
-        }
-        ByteArrayPointable.putLength(byteArrayLength, bufferNeedToReset, 0);
-        return bufferNeedToReset;
-    }
-
-    public static byte[] extractPointableArrayFromBase64String(char[] input, int start, int length,
-            byte[] bufferNeedToReset, byte[] quadruplet)
-            throws HyracksDataException {
-        int contentOffset = ByteArrayPointable.SIZE_OF_LENGTH;
-        final int buflen = guessLength(input, start, length) + contentOffset;
-        bufferNeedToReset = ByteArrayHexParserFactory.ensureCapacity(buflen, bufferNeedToReset);
-        int byteArrayLength = parseBase64String(input, start, length, bufferNeedToReset, contentOffset,
-                quadruplet);
-        if (byteArrayLength > ByteArrayPointable.MAX_LENGTH) {
-            throw new HyracksDataException("The decoded byte array is too long.");
-        }
-        ByteArrayPointable.putLength(byteArrayLength, bufferNeedToReset, 0);
-        return bufferNeedToReset;
-    }
-
-    static int parseBase64String(char[] input, int start, int length, byte[] out, int offset,
-            byte[] quadruplet) throws HyracksDataException {
-        int outLength = 0;
-
-        int i;
-        int q = 0;
-
-        // convert each quadruplet to three bytes.
-        for (i = 0; i < length; i++) {
-            char ch = input[start + i];
-            byte v = decodeMap[ch];
-
-            if (v == -1) {
-                throw new HyracksDataException("Invalid Base64 character");
-            }
-            quadruplet[q++] = v;
-
-            if (q == 4) {
-                // quadruplet is now filled.
-                out[offset + outLength++] = (byte) ((quadruplet[0] << 2) | (quadruplet[1] >> 4));
-                if (quadruplet[2] != PADDING) {
-                    out[offset + outLength++] = (byte) ((quadruplet[1] << 4) | (quadruplet[2] >> 2));
-                }
-                if (quadruplet[3] != PADDING) {
-                    out[offset + outLength++] = (byte) ((quadruplet[2] << 6) | (quadruplet[3]));
-                }
-                q = 0;
-            }
-        }
-
-        return outLength;
-    }
-
-    static int parseBase64String(byte[] input, int start, int length, byte[] out, int offset,
-            byte[] quadruplet) throws HyracksDataException {
-        int outLength = 0;
-
-        int i;
-        int q = 0;
-
-        // convert each quadruplet to three bytes.
-        for (i = 0; i < length; i++) {
-            char ch = (char)input[start + i];
-            byte v = decodeMap[ch];
-
-            if (v == -1) {
-                throw new HyracksDataException("Invalid Base64 character");
-            }
-            quadruplet[q++] = v;
-
-            if (q == 4) {
-                // quadruplet is now filled.
-                out[offset + outLength++] = (byte) ((quadruplet[0] << 2) | (quadruplet[1] >> 4));
-                if (quadruplet[2] != PADDING) {
-                    out[offset + outLength++] = (byte) ((quadruplet[1] << 4) | (quadruplet[2] >> 2));
-                }
-                if (quadruplet[3] != PADDING) {
-                    out[offset + outLength++] = (byte) ((quadruplet[2] << 6) | (quadruplet[3]));
-                }
-                q = 0;
-            }
-        }
-
-        return outLength;
-    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
index ec249f3..f1f1eb1 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
@@ -19,12 +19,12 @@
 
 package org.apache.hyracks.dataflow.common.data.parsers;
 
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-
 import java.io.DataOutput;
 import java.io.IOException;
-import java.util.Arrays;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.data.marshalling.ByteArraySerializerDeserializer;
+import org.apache.hyracks.util.bytes.HexParser;
 
 public class ByteArrayHexParserFactory implements IValueParserFactory {
     public static ByteArrayHexParserFactory INSTANCE = new ByteArrayHexParserFactory();
@@ -32,15 +32,18 @@ public class ByteArrayHexParserFactory implements IValueParserFactory {
     private ByteArrayHexParserFactory() {
     }
 
-    @Override public IValueParser createValueParser() {
+    @Override
+    public IValueParser createValueParser() {
         return new IValueParser() {
-            private byte[] buffer = new byte[] { };
+            HexParser parser = new HexParser();
+            ByteArraySerializerDeserializer serializer = new ByteArraySerializerDeserializer();
 
-            @Override public void parse(char[] input, int start, int length, DataOutput out)
+            @Override
+            public void parse(char[] input, int start, int length, DataOutput out)
                     throws HyracksDataException {
                 try {
-                    buffer = extractPointableArrayFromHexString(input, start, length, buffer);
-                    out.write(buffer, 0, ByteArrayPointable.getFullLength(buffer, 0));
+                    parser.generateByteArrayFromHexString(input, start, length);
+                    serializer.serialize(parser.getByteArray(), 0, parser.getLength(), out);
                 } catch (IOException e) {
                     throw new HyracksDataException(e);
                 }
@@ -48,85 +51,4 @@ public class ByteArrayHexParserFactory implements IValueParserFactory {
         };
     }
 
-    public static boolean isValidHexChar(char c) {
-        if (c >= '0' && c <= '9'
-                || c >= 'a' && c <= 'f'
-                || c >= 'A' && c <= 'F') {
-            return true;
-        }
-        return false;
-    }
-
-    public static byte[] extractPointableArrayFromHexString(char[] input, int start, int length,
-            byte[] bufferNeedToReset) throws HyracksDataException {
-        if (length % 2 != 0) {
-            throw new HyracksDataException(
-                    "Invalid hex string for binary type: the string length should be a muliple of 2.");
-        }
-        int byteLength = length / 2;
-        bufferNeedToReset = ensureCapacity(byteLength + ByteArrayPointable.SIZE_OF_LENGTH, bufferNeedToReset);
-        extractByteArrayFromHexString(input, start, length, bufferNeedToReset,
-                ByteArrayPointable.SIZE_OF_LENGTH);
-        if (byteLength > ByteArrayPointable.MAX_LENGTH) {
-            throw new HyracksDataException("The decoded byte array is too long.");
-        }
-        ByteArrayPointable.putLength(byteLength, bufferNeedToReset, 0);
-        return bufferNeedToReset;
-    }
-
-    public static byte[] extractPointableArrayFromHexString(byte[] input, int start, int length,
-            byte[] bufferNeedToReset) throws HyracksDataException {
-        if (length % 2 != 0) {
-            throw new HyracksDataException(
-                    "Invalid hex string for binary type: the string length should be a muliple of 2.");
-        }
-        int byteLength = length / 2;
-        bufferNeedToReset = ensureCapacity(byteLength + ByteArrayPointable.SIZE_OF_LENGTH, bufferNeedToReset);
-        extractByteArrayFromHexString(input, start, length, bufferNeedToReset,
-                ByteArrayPointable.SIZE_OF_LENGTH);
-        if (byteLength > ByteArrayPointable.MAX_LENGTH) {
-            throw new HyracksDataException("The decoded byte array is too long.");
-        }
-        ByteArrayPointable.putLength(byteLength, bufferNeedToReset, 0);
-        return bufferNeedToReset;
-    }
-
-    static byte[] ensureCapacity(int capacity, byte[] original) {
-        if (original == null) {
-            return new byte[capacity];
-        }
-        if (original.length < capacity) {
-            return Arrays.copyOf(original, capacity);
-        }
-        return original;
-    }
-
-    private static int getValueFromValidHexChar(char c) throws HyracksDataException {
-        if (!isValidHexChar(c)) {
-            throw new HyracksDataException("Invalid hex character : " + c);
-        }
-        if (c >= '0' && c <= '9') {
-            return c - '0';
-        }
-        if (c >= 'a' && c <= 'f') {
-            return 10 + c - 'a';
-        }
-        return 10 + c - 'A';
-    }
-
-    private static void extractByteArrayFromHexString(char[] input, int start, int length, byte[] output,
-            int offset) throws HyracksDataException {
-        for (int i = 0; i < length; i += 2) {
-            output[offset + i / 2] = (byte) ((getValueFromValidHexChar(input[start + i]) << 4) +
-                    getValueFromValidHexChar(input[start + i + 1]));
-        }
-    }
-
-    private static void extractByteArrayFromHexString(byte[] input, int start, int length, byte[] output,
-            int offset) throws HyracksDataException {
-        for (int i = 0; i < length; i += 2) {
-            output[offset + i / 2] = (byte) ((getValueFromValidHexChar((char)input[start + i]) << 4) +
-                    getValueFromValidHexChar((char)input[start + i + 1]));
-        }
-    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java
index 7294e2d..58ee687 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java
@@ -22,6 +22,7 @@ import java.io.DataOutput;
 import java.io.IOException;
 
 import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.string.UTF8StringWriter;
 
 public class UTF8StringParserFactory implements IValueParserFactory {
     public static final IValueParserFactory INSTANCE = new UTF8StringParserFactory();
@@ -34,53 +35,12 @@ public class UTF8StringParserFactory implements IValueParserFactory {
     @Override
     public IValueParser createValueParser() {
         return new IValueParser() {
-            private byte[] utf8;
+            private UTF8StringWriter writer = new UTF8StringWriter();
 
             @Override
             public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
-                int utflen = 0;
-                for (int i = 0; i < length; i++) {
-                    char ch = buffer[i + start];
-                    if ((ch >= 0x0001) && (ch <= 0x007F)) {
-                        utflen++;
-                    } else if (ch > 0x07ff) {
-                        utflen += 3;
-                    } else {
-                        utflen += 2;
-                    }
-                }
-
-                if (utf8 == null || utf8.length < utflen + 2) {
-                    utf8 = new byte[utflen + 2];
-                }
-
-                int count = 0;
-                utf8[count++] = (byte) ((utflen >>> 8) & 0xff);
-                utf8[count++] = (byte) ((utflen >>> 0) & 0xff);
-
-                int i = 0;
-                for (i = 0; i < length; i++) {
-                    char ch = buffer[i + start];
-                    if (!((ch >= 0x0001) && (ch <= 0x007F)))
-                        break;
-                    utf8[count++] = (byte) ch;
-                }
-
-                for (; i < length; i++) {
-                    char ch = buffer[i + start];
-                    if ((ch >= 0x0001) && (ch <= 0x007F)) {
-                        utf8[count++] = (byte) ch;
-                    } else if (ch > 0x07FF) {
-                        utf8[count++] = (byte) (0xE0 | ((ch >> 12) & 0x0F));
-                        utf8[count++] = (byte) (0x80 | ((ch >> 6) & 0x3F));
-                        utf8[count++] = (byte) (0x80 | ((ch >> 0) & 0x3F));
-                    } else {
-                        utf8[count++] = (byte) (0xC0 | ((ch >> 6) & 0x1F));
-                        utf8[count++] = (byte) (0x80 | ((ch >> 0) & 0x3F));
-                    }
-                }
                 try {
-                    out.write(utf8, 0, utflen + 2);
+                    writer.writeUTF8(buffer, start, length, out);
                 } catch (IOException e) {
                     throw new HyracksDataException(e);
                 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/util/StringUtils.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/util/StringUtils.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/util/StringUtils.java
deleted file mode 100644
index 3b05824..0000000
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/util/StringUtils.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.hyracks.dataflow.common.data.util;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-public class StringUtils {
-    public static int writeCharAsModifiedUTF8(char c, DataOutput dos) throws IOException {
-        if (c >= 0x0000 && c <= 0x007F) {
-            dos.writeByte(c);
-            return 1;
-        } else if (c <= 0x07FF) {
-            dos.writeByte((byte) (0xC0 | ((c >> 6) & 0x3F)));
-            dos.writeByte((byte) (0x80 | (c & 0x3F)));
-            return 2;
-        } else {
-            dos.writeByte((byte) (0xE0 | ((c >> 12) & 0x0F)));
-            dos.writeByte((byte) (0x80 | ((c >> 6) & 0x3F)));
-            dos.writeByte((byte) (0x80 | (c & 0x3F)));
-            return 3;
-        }
-    }
-
-    public static void writeUTF8Len(int len, DataOutput dos) throws IOException {
-        dos.write((len >>> 8) & 0xFF);
-        dos.write((len >>> 0) & 0xFF);
-    }
-
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java
index ad4461d..05710ad 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java
@@ -53,9 +53,9 @@ public class FrameFixedFieldTupleAppenderTest {
     FrameFixedFieldAppender appender;
     static ISerializerDeserializer[] fields = new ISerializerDeserializer[] {
             IntegerSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE,
+            new UTF8StringSerializerDeserializer(),
             IntegerSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE,
+            new UTF8StringSerializerDeserializer(),
     };
     static RecordDescriptor recordDescriptor = new RecordDescriptor(fields);
     static ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(recordDescriptor.getFieldCount());

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java
index 8534388..f0e831a 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java
@@ -19,58 +19,44 @@
 
 package org.apache.hyracks.dataflow.common.data.marshalling;
 
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-import org.junit.Test;
+import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.util.Arrays;
-import java.util.Random;
 
-import static org.junit.Assert.assertTrue;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
+import org.apache.hyracks.util.string.UTF8StringSample;
+import org.junit.Test;
 
 public class ByteArraySerializerDeserializerTest {
-    Random random = new Random();
 
-    public static byte[] generateRandomBytes(int maxSize, Random random) {
-        int size = random.nextInt(maxSize);
-        byte[] bytes = new byte[size + ByteArrayPointable.SIZE_OF_LENGTH];
-        random.nextBytes(bytes);
-        ByteArrayPointable.putLength(size, bytes, 0);
-        return bytes;
-    }
+    ByteArrayPointable bytePtr = new ByteArrayPointable();
+    ByteArraySerializerDeserializer serder = new ByteArraySerializerDeserializer();
 
     @Test
     public void testSerializeDeserializeRandomBytes() throws Exception {
-        for (int i = 0; i < 10; ++i) {
-            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
-            byte[] randomBytes = generateRandomBytes(ByteArrayPointable.MAX_LENGTH + 1, random);
+        testOneByteArray(UTF8StringSample.EMPTY_STRING.getBytes());
+        testOneByteArray(UTF8StringSample.STRING_UTF8_MIX.getBytes());
+        testOneByteArray(UTF8StringSample.STRING_LEN_128.getBytes());
+        testOneByteArray(UTF8StringSample.STRING_LEN_MEDIUM.getBytes());
+        testOneByteArray(UTF8StringSample.STRING_LEN_LARGE.getBytes());
+    }
 
-            ByteArraySerializerDeserializer.INSTANCE.serialize(randomBytes, new DataOutputStream(outputStream));
-            byte[] result = outputStream.toByteArray();
-            assertTrue(Arrays.equals(randomBytes, result));
+    void testOneByteArray(byte[] testBytes) throws HyracksDataException {
+        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
 
-            ByteArrayInputStream inputStream = new ByteArrayInputStream(result);
-            assertTrue(Arrays.equals(randomBytes,
-                    ByteArraySerializerDeserializer.INSTANCE.deserialize(new DataInputStream(inputStream))));
-        }
+        serder.serialize(testBytes, new DataOutputStream(outputStream));
 
-    }
+        bytePtr.set(outputStream.toByteArray(), 0, outputStream.size());
+        assertTrue(Arrays.equals(testBytes, ByteArrayPointable.copyContent(bytePtr)));
+
+        ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray());
+        assertTrue(Arrays.equals(testBytes, serder.deserialize(new DataInputStream(inputStream))));
 
-    @Test
-    public void testPutGetLength() throws Exception {
-        final int size = 5;
-        byte[] newBytes = new byte[size];
-        for (int i = 0; i < 10; ++i) {
-            int length = random.nextInt(ByteArrayPointable.MAX_LENGTH +1);
-            for (int j = 0; j < size - 1; ++j) {
-                ByteArrayPointable.putLength(length, newBytes, j);
-                int result = ByteArrayPointable.getLength(newBytes, j);
-                assertTrue(result == length);
-            }
-        }
     }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java
index 1645631..4d3eb49 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java
@@ -19,14 +19,13 @@
 
 package org.apache.hyracks.dataflow.common.data.normalizers;
 
-import org.apache.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-import org.apache.hyracks.dataflow.common.data.marshalling.ByteArraySerializerDeserializerTest;
-import org.junit.Test;
+import static junit.framework.Assert.assertTrue;
 
 import java.util.Random;
 
-import static junit.framework.Assert.assertTrue;
+import org.apache.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
+import org.junit.Test;
 
 public class ByteArrayNormalizedKeyComputerFactoryTest {
 
@@ -34,33 +33,21 @@ public class ByteArrayNormalizedKeyComputerFactoryTest {
 
     INormalizedKeyComputer computer = ByteArrayNormalizedKeyComputerFactory.INSTANCE.createNormalizedKeyComputer();
 
-    public static ByteArrayPointable generateRandomByteArrayPointable(int maxSize, Random random) {
-        byte[] bytes = ByteArraySerializerDeserializerTest
-                .generateRandomBytes(maxSize, random);
-        ByteArrayPointable pointable = new ByteArrayPointable();
-        pointable.set(bytes, 0, bytes.length);
-        return pointable;
-    }
-
     @Test
     public void testRandomNormalizedKey() {
         for (int i = 0; i < 10; ++i) {
-            ByteArrayPointable pointable1 = generateRandomByteArrayPointable(ByteArrayPointable.MAX_LENGTH + 1,
-                    random);
-
-            ByteArrayPointable pointable2 = generateRandomByteArrayPointable(ByteArrayPointable.MAX_LENGTH + 1,
-                    random);
+            ByteArrayPointable pointable1 = generateRandomByteArrayPointableWithFixLength(
+                    Math.abs(random.nextInt((i + 1) * 10)), random);
+            ByteArrayPointable pointable2 = generateRandomByteArrayPointableWithFixLength(
+                    Math.abs(random.nextInt((i + 1) * 10)), random);
             assertNormalizeValue(pointable1, pointable2, computer);
         }
     }
 
     public static ByteArrayPointable generateRandomByteArrayPointableWithFixLength(int length, Random random) {
-        byte[] bytes = new byte[length + ByteArrayPointable.SIZE_OF_LENGTH];
+        byte[] bytes = new byte[length];
         random.nextBytes(bytes);
-        ByteArrayPointable pointable = new ByteArrayPointable();
-        ByteArrayPointable.putLength(length, bytes, 0);
-        pointable.set(bytes, 0, bytes.length);
-        return pointable;
+        return ByteArrayPointable.generatePointableFromPureBytes(bytes);
     }
 
     public static void assertNormalizeValue(ByteArrayPointable pointable1, ByteArrayPointable pointable2,
@@ -82,11 +69,12 @@ public class ByteArrayNormalizedKeyComputerFactoryTest {
             assertNormalizeValue(pointable1, pointable2, computer);
         }
 
-        byte[] bytes1 = new byte[] { 0, 4, 0, 25, 34, 42 };
-        byte[] bytes2 = new byte[] { 0, 4, (byte) 130, 25, 34, 42 };
+        ByteArrayPointable ptr1 = ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 0, 25, 34, 42 });
+        ByteArrayPointable ptr2 = ByteArrayPointable.generatePointableFromPureBytes(
+                new byte[] { (byte) 130, 25, 34, 42 });
 
-        int n1 = computer.normalize(bytes1, 0, bytes1.length);
-        int n2 = computer.normalize(bytes2, 0, bytes2.length);
+        int n1 = computer.normalize(ptr1.getByteArray(), ptr1.getStartOffset(), ptr1.getLength());
+        int n2 = computer.normalize(ptr2.getByteArray(), ptr2.getStartOffset(), ptr2.getLength());
         assertTrue(n1 < n2);
 
     }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java
index fe8b03b..cec6add 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java
@@ -19,32 +19,25 @@
 
 package org.apache.hyracks.dataflow.common.data.parsers;
 
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-import junit.framework.TestCase;
-import org.junit.Test;
+import static org.apache.hyracks.data.std.primitive.ByteArrayPointable.copyContent;
 
-import javax.xml.bind.DatatypeConverter;
 import java.io.ByteArrayOutputStream;
 import java.io.DataOutputStream;
 import java.util.Arrays;
 
-import static org.apache.hyracks.dataflow.common.data.parsers.ByteArrayHexParserFactoryTest.subArray;
+import javax.xml.bind.DatatypeConverter;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
+import org.junit.Test;
+
+import junit.framework.TestCase;
 
 public class ByteArrayBase64ParserFactoryTest extends TestCase {
 
     @Test
     public void testParseBase64String() throws HyracksDataException {
-        IValueParser parser = ByteArrayBase64ParserFactory.INSTANCE.createValueParser();
-        ByteArrayOutputStream bos = new ByteArrayOutputStream();
-        DataOutputStream outputStream = new DataOutputStream(bos);
-        String empty = "";
-
-        parser.parse(empty.toCharArray(), 0, empty.length(), outputStream);
-
-        byte[] cache = bos.toByteArray();
-        assertTrue(ByteArrayPointable.getLength(cache, 0) == 0);
-        assertTrue(DatatypeConverter.printBase64Binary(subArray(cache, 2)).equalsIgnoreCase(empty));
+        testOneString("");
 
         StringBuilder everyChar = new StringBuilder();
         for (char c = 'a'; c <= 'z'; c++) {
@@ -58,21 +51,26 @@ public class ByteArrayBase64ParserFactoryTest extends TestCase {
         }
         everyChar.append("+/");
 
-        bos.reset();
-        parser.parse(everyChar.toString().toCharArray(), 0, everyChar.length(), outputStream);
-        cache = bos.toByteArray();
-        byte[] answer = DatatypeConverter.parseBase64Binary(everyChar.toString());
-        assertTrue(ByteArrayPointable.getLength(cache, 0) == answer.length);
-        assertTrue(Arrays.equals(answer, subArray(cache, 2)));
+        testOneString(everyChar.toString());
+
+        byte[] longBytes = new byte[65536];
+        Arrays.fill(longBytes, (byte) 0xff);
+        String maxString = DatatypeConverter.printBase64Binary(longBytes);
 
-        byte[] maxBytes = new byte[ByteArrayPointable.MAX_LENGTH];
-        Arrays.fill(maxBytes, (byte) 0xff);
-        String maxString = DatatypeConverter.printBase64Binary(maxBytes);
-        bos.reset();
-        parser.parse(maxString.toCharArray(), 0, maxString.length(), outputStream);
-        cache = bos.toByteArray();
-        assertTrue(ByteArrayPointable.getLength(cache, 0) == maxBytes.length);
-        assertTrue(Arrays.equals(maxBytes, subArray(cache, 2)));
+        testOneString(maxString);
     }
 
+    void testOneString(String test) throws HyracksDataException {
+        IValueParser parser = ByteArrayBase64ParserFactory.INSTANCE.createValueParser();
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        DataOutputStream outputStream = new DataOutputStream(bos);
+        ByteArrayPointable bytePtr = new ByteArrayPointable();
+
+        parser.parse(test.toCharArray(), 0, test.length(), outputStream);
+        bytePtr.set(bos.toByteArray(), 0, bos.size());
+
+        byte[] answer = DatatypeConverter.parseBase64Binary(test);
+        assertTrue(bytePtr.getContentLength() == answer.length);
+        assertTrue(Arrays.equals(answer, copyContent(bytePtr)));
+    }
 }
\ No newline at end of file


Mime
View raw message