phoenix-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jamestay...@apache.org
Subject [3/4] phoenix git commit: PHOENIX-1287 Use the joni byte[] regex engine in place of j.u.regex (Shuxiong Ye)
Date Tue, 14 Apr 2015 19:40:32 GMT
http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/main/java/org/apache/phoenix/parse/RegexpSubstrParseNode.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/parse/RegexpSubstrParseNode.java b/phoenix-core/src/main/java/org/apache/phoenix/parse/RegexpSubstrParseNode.java
new file mode 100644
index 0000000..a975550
--- /dev/null
+++ b/phoenix-core/src/main/java/org/apache/phoenix/parse/RegexpSubstrParseNode.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.parse;
+
+import java.sql.SQLException;
+import java.util.List;
+
+import org.apache.phoenix.compile.StatementContext;
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.expression.function.ByteBasedRegexpSubstrFunction;
+import org.apache.phoenix.expression.function.RegexpSubstrFunction;
+import org.apache.phoenix.expression.function.StringBasedRegexpSubstrFunction;
+import org.apache.phoenix.query.QueryServices;
+import org.apache.phoenix.query.QueryServicesOptions;
+
+/**
+ * Parse node corresponding to {@link RegexpSubstrFunction}. It also acts as a factory for creating
+ * the right kind of RegexpSubstrFunction according to setting in
+ * QueryServices.USE_BYTE_BASED_REGEX_ATTRIB
+ */
+public class RegexpSubstrParseNode extends FunctionParseNode {
+
+    RegexpSubstrParseNode(String name, List<ParseNode> children, BuiltInFunctionInfo info) {
+        super(name, children, info);
+    }
+
+    @Override
+    public Expression create(List<Expression> children, StatementContext context)
+            throws SQLException {
+        QueryServices services = context.getConnection().getQueryServices();
+        boolean useByteBasedRegex =
+                services.getProps().getBoolean(QueryServices.USE_BYTE_BASED_REGEX_ATTRIB,
+                    QueryServicesOptions.DEFAULT_USE_BYTE_BASED_REGEX);
+        if (useByteBasedRegex) {
+            return new ByteBasedRegexpSubstrFunction(children);
+        } else {
+            return new StringBasedRegexpSubstrFunction(children);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
index 7a911e7..23f3288 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
@@ -48,6 +48,8 @@ public interface QueryServices extends SQLCloseable {
     public static final String HBASE_CLIENT_PRINCIPAL = "hbase.myclient.principal";
     public static final String SPOOL_DIRECTORY = "phoenix.spool.directory";
     public static final String AUTO_COMMIT_ATTRIB = "phoenix.connection.autoCommit";
+    // joni byte regex engine setting
+    public static final String USE_BYTE_BASED_REGEX_ATTRIB = "phoenix.regex.byteBased";
 
     /**
 	 * max size to spool the the result into

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
index 3561663..b003b11 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
@@ -61,16 +61,15 @@ import static org.apache.phoenix.query.QueryServices.STATS_UPDATE_FREQ_MS_ATTRIB
 import static org.apache.phoenix.query.QueryServices.STATS_USE_CURRENT_TIME_ATTRIB;
 import static org.apache.phoenix.query.QueryServices.THREAD_POOL_SIZE_ATTRIB;
 import static org.apache.phoenix.query.QueryServices.THREAD_TIMEOUT_MS_ATTRIB;
+import static org.apache.phoenix.query.QueryServices.USE_BYTE_BASED_REGEX_ATTRIB;
 import static org.apache.phoenix.query.QueryServices.USE_INDEXES_ATTRIB;
 
 import java.util.Map.Entry;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.Coprocessor;
-import org.apache.hadoop.hbase.ipc.PhoenixRpcSchedulerFactory;
 import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
 import org.apache.hadoop.hbase.ipc.controller.ClientRpcControllerFactory;
-import org.apache.hadoop.hbase.ipc.controller.ServerRpcControllerFactory;
 import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec;
 import org.apache.phoenix.schema.SaltingUtil;
 import org.apache.phoenix.trace.util.Tracing;
@@ -190,7 +189,8 @@ public class QueryServicesOptions {
     public static final boolean DEFAULT_IS_METRICS_ENABLED = true;
     
     private static final String DEFAULT_CLIENT_RPC_CONTROLLER_FACTORY = ClientRpcControllerFactory.class.getName();
-    
+    public static final boolean DEFAULT_USE_BYTE_BASED_REGEX = true;
+
     private final Configuration config;
 
     private QueryServicesOptions(Configuration config) {
@@ -244,6 +244,7 @@ public class QueryServicesOptions {
             .setIfUnset(DELAY_FOR_SCHEMA_UPDATE_CHECK, DEFAULT_DELAY_FOR_SCHEMA_UPDATE_CHECK)
             .setIfUnset(METRICS_ENABLED, DEFAULT_IS_METRICS_ENABLED)
             .setIfUnset(RpcControllerFactory.CUSTOM_CONTROLLER_CONF_KEY, DEFAULT_CLIENT_RPC_CONTROLLER_FACTORY)
+            .setIfUnset(USE_BYTE_BASED_REGEX_ATTRIB, DEFAULT_USE_BYTE_BASED_REGEX)
             ;
         // HBase sets this to 1, so we reset it to something more appropriate.
         // Hopefully HBase will change this, because we can't know if a user set
@@ -447,6 +448,10 @@ public class QueryServicesOptions {
         return config.getBoolean(METRICS_ENABLED, DEFAULT_IS_METRICS_ENABLED);
     }
     
+    public boolean isUseByteBasedRegex() {
+        return config.getBoolean(USE_BYTE_BASED_REGEX_ATTRIB, DEFAULT_USE_BYTE_BASED_REGEX);
+    }
+
     public QueryServicesOptions setMaxServerCacheTTLMs(int ttl) {
         return set(MAX_SERVER_CACHE_TIME_TO_LIVE_MS_ATTRIB, ttl);
     }
@@ -521,4 +526,8 @@ public class QueryServicesOptions {
         return this;
     }
 
+    public QueryServicesOptions setUseByteBasedRegex(boolean flag) {
+        config.setBoolean(USE_BYTE_BASED_REGEX_ATTRIB, flag);
+        return this;
+    }
 }

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/main/java/org/apache/phoenix/schema/types/PArrayDataType.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/schema/types/PArrayDataType.java b/phoenix-core/src/main/java/org/apache/phoenix/schema/types/PArrayDataType.java
index b6dce34..c6861f7 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/schema/types/PArrayDataType.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/schema/types/PArrayDataType.java
@@ -21,6 +21,8 @@ import java.io.DataOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.text.Format;
+import java.util.LinkedList;
+import java.util.List;
 
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -755,4 +757,93 @@ public abstract class PArrayDataType<T> extends PDataType<T> {
         buf.append(']');
         return buf.toString();
     }
+
+    static public class PArrayDataTypeBytesArrayBuilder<T> {
+        static private final int BYTE_ARRAY_DEFAULT_SIZE = 128;
+
+        private PDataType baseType;
+        private SortOrder sortOrder;
+        private List<Integer> offsetPos;
+        private TrustedByteArrayOutputStream byteStream;
+        private DataOutputStream oStream;
+        private int nulls;
+
+        public PArrayDataTypeBytesArrayBuilder(PDataType baseType, SortOrder sortOrder) {
+            this.baseType = baseType;
+            this.sortOrder = sortOrder;
+            offsetPos = new LinkedList<Integer>();
+            byteStream = new TrustedByteArrayOutputStream(BYTE_ARRAY_DEFAULT_SIZE);
+            oStream = new DataOutputStream(byteStream);
+            nulls = 0;
+        }
+
+        private void close() {
+            try {
+                if (byteStream != null) byteStream.close();
+                if (oStream != null) oStream.close();
+                byteStream = null;
+                oStream = null;
+            } catch (IOException ioe) {
+            }
+        }
+
+        public boolean appendElem(byte[] bytes) {
+            return appendElem(bytes, 0, bytes.length);
+        }
+
+        public boolean appendElem(byte[] bytes, int offset, int len) {
+            if (oStream == null || byteStream == null) return false;
+            try {
+                if (!baseType.isFixedWidth()) {
+                    if (len == 0) {
+                        offsetPos.add(byteStream.size());
+                        nulls++;
+                    } else {
+                        nulls = serializeNulls(oStream, nulls);
+                        offsetPos.add(byteStream.size());
+                        if (sortOrder == SortOrder.DESC) {
+                            SortOrder.invert(bytes, offset, bytes, offset, len);
+                        }
+                        oStream.write(bytes, offset, len);
+                        oStream.write(QueryConstants.SEPARATOR_BYTE);
+                    }
+                } else {
+                    if (sortOrder == SortOrder.DESC) {
+                        SortOrder.invert(bytes, offset, bytes, offset, len);
+                    }
+                    oStream.write(bytes, offset, len);
+                }
+                return true;
+            } catch (IOException e) {
+            }
+            return false;
+        }
+
+        public byte[] getBytesAndClose() {
+            try {
+                if (!baseType.isFixedWidth()) {
+                    int noOfElements = offsetPos.size();
+                    int[] offsetPosArray = new int[noOfElements];
+                    int index = 0;
+                    for (Integer i : offsetPos) {
+                        offsetPosArray[index] = i;
+                        ++index;
+                    }
+                    PArrayDataType.writeEndSeperatorForVarLengthArray(oStream);
+                    noOfElements =
+                            PArrayDataType.serailizeOffsetArrayIntoStream(oStream, byteStream,
+                                noOfElements, offsetPosArray[offsetPosArray.length - 1],
+                                offsetPosArray);
+                    serializeHeaderInfoIntoStream(oStream, noOfElements);
+                }
+                ImmutableBytesWritable ptr = new ImmutableBytesWritable();
+                ptr.set(byteStream.getBuffer(), 0, byteStream.size());
+                return ByteUtil.copyKeyBytesIfNecessary(ptr);
+            } catch (IOException e) {
+            } finally {
+                close();
+            }
+            return null;
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/main/java/org/apache/phoenix/util/StringUtil.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/util/StringUtil.java b/phoenix-core/src/main/java/org/apache/phoenix/util/StringUtil.java
index 4a7ae38..89ae43b 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/util/StringUtil.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/util/StringUtil.java
@@ -115,7 +115,13 @@ public class StringUtil {
     }
 
     public static int getBytesInChar(byte b, SortOrder sortOrder) {
-    	Preconditions.checkNotNull(sortOrder);
+        int ret = getBytesInCharNoException(b, sortOrder);
+        if (ret == -1) throw new UndecodableByteException(b);
+        return ret;
+    }
+
+    private static int getBytesInCharNoException(byte b, SortOrder sortOrder) {
+        Preconditions.checkNotNull(sortOrder);
         if (sortOrder == SortOrder.DESC) {
             b = SortOrder.invert(b);
         }
@@ -128,8 +134,7 @@ public class StringUtil {
             return 3;
         if ((c & BYTES_4_MASK) == 0xF0)
             return 4;
-        // Any thing else in the first byte is invalid
-        throw new UndecodableByteException(b);
+        return -1;
     }
 
     public static int calculateUTF8Length(byte[] bytes, int offset, int length, SortOrder sortOrder) {
@@ -143,6 +148,63 @@ public class StringUtil {
         return length;
     }
 
+    // given an array of bytes containing utf-8 encoded strings, starting from curPos, ending before
+    // range, and return the next character offset, -1 if no next character available or
+    // UndecodableByteException
+    private static int calculateNextCharOffset(byte[] bytes, int curPos, int range,
+            SortOrder sortOrder) {
+        int ret = getBytesInCharNoException(bytes[curPos], sortOrder);
+        if (ret == -1) return -1;
+        ret += curPos;
+        if (ret >= range) return -1;
+        return ret;
+    }
+
+    // given an array of bytes containing utf-8 encoded strings, starting from offset, and return
+    // the previous character offset , -1 if UndecodableByteException. curPos points to current
+    // character starting offset.
+    private static int calculatePreCharOffset(byte[] bytes, int curPos, int offset,
+            SortOrder sortOrder) {
+        --curPos;
+        for (int i = 1, pos = curPos - i + 1; i <= 4 && offset <= pos; ++i, --pos) {
+            int ret = getBytesInCharNoException(bytes[pos], sortOrder);
+            if (ret == i) return pos;
+        }
+        return -1;
+    }
+
+    // return actural offsetInBytes corresponding to offsetInStr in utf-8 encoded strings bytes
+    // containing
+    // @param bytes an array of bytes containing utf-8 encoded strings
+    // @param offset
+    // @param length
+    // @param sortOrder
+    // @param offsetInStr offset for utf-8 encoded strings bytes array containing. Can be negative
+    // meaning counting from the end of encoded strings
+    // @return actural offsetInBytes corresponding to offsetInStr. -1 if offsetInStr is out of index
+    public static int calculateUTF8Offset(byte[] bytes, int offset, int length,
+            SortOrder sortOrder, int offsetInStr) {
+        if (offsetInStr == 0) return offset;
+        int ret, range = offset + length;
+        if (offsetInStr > 0) {
+            ret = offset;
+            while (offsetInStr > 0) {
+                ret = calculateNextCharOffset(bytes, ret, range, sortOrder);
+                if (ret == -1) return -1;
+                --offsetInStr;
+            }
+        } else {
+            ret = offset + length;
+            while (offsetInStr < 0) {
+                ret = calculatePreCharOffset(bytes, ret, offset, sortOrder);
+                // if calculateCurCharOffset returns -1, ret must be smaller than offset
+                if (ret < offset) return -1;
+                ++offsetInStr;
+            }
+        }
+        return ret;
+    }
+
     // Given an array of bytes containing encoding utf-8 encoded strings, the offset and a length
     // parameter, return the actual index into the byte array which would represent a substring
     // of <length> starting from the character at <offset>. We assume the <offset> is the start

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java b/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java
index 94b25d0..f40afc3 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/compile/WhereOptimizerTest.java
@@ -731,7 +731,8 @@ public class WhereOptimizerTest extends BaseConnectionlessQueryTest {
         assertEquals(
                 rowKeyFilter(like(
                     ENTITY_ID,
-                    likeArg)),
+                    likeArg,
+                    context)),
                 filter);
 
         byte[] startRow = ByteUtil.concat(
@@ -757,7 +758,8 @@ public class WhereOptimizerTest extends BaseConnectionlessQueryTest {
         assertEquals(
                 rowKeyFilter(like(
                     ENTITY_ID,
-                    likeArg)),
+                    likeArg,
+                    context)),
                 filter);
 
         byte[] startRow = ByteUtil.concat(
@@ -783,7 +785,8 @@ public class WhereOptimizerTest extends BaseConnectionlessQueryTest {
         assertEquals(
                 rowKeyFilter(like(
                     substr(ENTITY_ID,1,10),
-                    likeArg)),
+                    likeArg,
+                    context)),
                 filter);
 
         byte[] startRow = ByteUtil.concat(
@@ -809,7 +812,8 @@ public class WhereOptimizerTest extends BaseConnectionlessQueryTest {
         assertEquals(
                 rowKeyFilter(like(
                     substr(ENTITY_ID,4,10),
-                    likeArg)),
+                    likeArg,
+                    context)),
                 filter);
 
         byte[] startRow = PVarchar.INSTANCE.toBytes(tenantId);
@@ -832,7 +836,8 @@ public class WhereOptimizerTest extends BaseConnectionlessQueryTest {
         assertEquals(
                 rowKeyFilter(like(
                     ENTITY_ID,
-                    likeArg)),
+                    likeArg,
+                    context)),
                 filter);
 
         byte[] startRow = PVarchar.INSTANCE.toBytes(tenantId);
@@ -855,7 +860,8 @@ public class WhereOptimizerTest extends BaseConnectionlessQueryTest {
         assertEquals(
                 rowKeyFilter(not(like(
                     ENTITY_ID,
-                    likeArg))),
+                    likeArg,
+                    context))),
                 filter);
 
         byte[] startRow = PVarchar.INSTANCE.toBytes(tenantId);

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/test/java/org/apache/phoenix/expression/ILikeExpressionTest.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/expression/ILikeExpressionTest.java b/phoenix-core/src/test/java/org/apache/phoenix/expression/ILikeExpressionTest.java
index 3033edf..e66ad13 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/expression/ILikeExpressionTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/expression/ILikeExpressionTest.java
@@ -20,24 +20,40 @@ package org.apache.phoenix.expression;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
+import java.sql.SQLException;
 import java.util.Arrays;
 import java.util.List;
 
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.phoenix.parse.LikeParseNode.LikeType;
+import org.apache.phoenix.schema.SortOrder;
+import org.apache.phoenix.schema.types.PVarchar;
 import org.junit.Test;
 
 public class ILikeExpressionTest {
-    public boolean testExpression (String value, String expression) {
-      LiteralExpression v = LiteralExpression.newConstant(value);
-      LiteralExpression p = LiteralExpression.newConstant(expression);
+    private boolean testExpression (String value, String expression, SortOrder sortorder)
+            throws SQLException {
+      LiteralExpression v = LiteralExpression.newConstant(value, PVarchar.INSTANCE, sortorder);
+      LiteralExpression p = LiteralExpression.newConstant(expression, PVarchar.INSTANCE, sortorder);
       List<Expression> children = Arrays.<Expression>asList(v,p);
-      LikeExpression e = LikeExpression.create(children, LikeType.CASE_INSENSITIVE);
+      LikeExpression e1 = ByteBasedLikeExpression.create(children, LikeType.CASE_INSENSITIVE);
+      LikeExpression e2 = StringBasedLikeExpression.create(children, LikeType.CASE_INSENSITIVE);
       ImmutableBytesWritable ptr = new ImmutableBytesWritable();
-      boolean evaluated = e.evaluate(null, ptr);
-      Boolean result = (Boolean)e.getDataType().toObject(ptr);
-      assertTrue(evaluated);
-      return result;
+      boolean evaluated1 = e1.evaluate(null, ptr);
+      Boolean result1 = (Boolean)e1.getDataType().toObject(ptr);
+      assertTrue(evaluated1);
+      boolean evaluated2 = e2.evaluate(null, ptr);
+      Boolean result2 = (Boolean)e2.getDataType().toObject(ptr);
+      assertTrue(evaluated2);
+      assertEquals(result1, result2);
+      return result1;
+    }
+
+    private boolean testExpression(String value, String expression) throws SQLException {
+        boolean result1 = testExpression(value, expression, SortOrder.ASC);
+        boolean result2 = testExpression(value, expression, SortOrder.DESC);
+        assertEquals(result1, result2);
+        return result1;
     }
 
     @Test

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/test/java/org/apache/phoenix/expression/LikeExpressionTest.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/expression/LikeExpressionTest.java b/phoenix-core/src/test/java/org/apache/phoenix/expression/LikeExpressionTest.java
index 27e6547..0bf8b06 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/expression/LikeExpressionTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/expression/LikeExpressionTest.java
@@ -20,25 +20,42 @@ package org.apache.phoenix.expression;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
+import java.sql.SQLException;
 import java.util.Arrays;
 import java.util.List;
 
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.phoenix.parse.LikeParseNode.LikeType;
+import org.apache.phoenix.schema.SortOrder;
+import org.apache.phoenix.schema.types.PVarchar;
 import org.junit.Test;
 
 public class LikeExpressionTest {
-    public boolean testExpression (String value, String expression) {
-      LiteralExpression v = LiteralExpression.newConstant(value);
-      LiteralExpression p = LiteralExpression.newConstant(expression);
+    private boolean testExpression(String value, String expression, SortOrder sortorder)
+            throws SQLException {
+      LiteralExpression v = LiteralExpression.newConstant(value, PVarchar.INSTANCE, sortorder);
+      LiteralExpression p = LiteralExpression.newConstant(expression, PVarchar.INSTANCE, sortorder);
       List<Expression> children = Arrays.<Expression>asList(v,p);
-      LikeExpression e = LikeExpression.create(children, LikeType.CASE_SENSITIVE);
+      LikeExpression e1 = ByteBasedLikeExpression.create(children, LikeType.CASE_SENSITIVE);
+      LikeExpression e2 = StringBasedLikeExpression.create(children, LikeType.CASE_SENSITIVE);
       ImmutableBytesWritable ptr = new ImmutableBytesWritable();
-      boolean evaluated = e.evaluate(null, ptr);
-      Boolean result = (Boolean)e.getDataType().toObject(ptr);
-      assertTrue(evaluated);
-      return result;
+      boolean evaluated1 = e1.evaluate(null, ptr);
+      Boolean result1 = (Boolean)e1.getDataType().toObject(ptr);
+      assertTrue(evaluated1);
+      boolean evaluated2 = e2.evaluate(null, ptr);
+      Boolean result2 = (Boolean)e2.getDataType().toObject(ptr);
+      assertTrue(evaluated2);
+      assertEquals(result1, result2);
+      return result1;
     }
+
+    private boolean testExpression(String value, String expression) throws SQLException {
+        boolean result1 = testExpression(value, expression, SortOrder.ASC);
+        boolean result2 = testExpression(value, expression, SortOrder.DESC);
+        assertEquals(result1, result2);
+        return result1;
+    }
+
     @Test
     public void testStartWildcard() throws Exception {
         assertEquals(Boolean.FALSE, testExpression ("149na7-app1-2-", "%-w"));
@@ -58,4 +75,10 @@ public class LikeExpressionTest {
         assertEquals(Boolean.TRUE, testExpression ("test", "%s%"));
         assertEquals(Boolean.FALSE, testExpression ("test", "%S%"));
     }
+
+    @Test
+    public void testEmptySourceStr() throws Exception {
+        assertEquals(Boolean.TRUE, testExpression ("", "%"));
+        assertEquals(Boolean.FALSE, testExpression ("", "_"));
+    }
  }

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/test/java/org/apache/phoenix/expression/RegexpReplaceFunctionTest.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/expression/RegexpReplaceFunctionTest.java b/phoenix-core/src/test/java/org/apache/phoenix/expression/RegexpReplaceFunctionTest.java
new file mode 100644
index 0000000..ad11c1b
--- /dev/null
+++ b/phoenix-core/src/test/java/org/apache/phoenix/expression/RegexpReplaceFunctionTest.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.expression;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.sql.SQLException;
+import java.util.List;
+
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.phoenix.expression.function.ByteBasedRegexpReplaceFunction;
+import org.apache.phoenix.expression.function.StringBasedRegexpReplaceFunction;
+import org.apache.phoenix.schema.SortOrder;
+import org.apache.phoenix.schema.types.PVarchar;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class RegexpReplaceFunctionTest {
+    private final static PVarchar TYPE = PVarchar.INSTANCE;
+
+    private String evalExp(Expression exp) {
+        ImmutableBytesWritable ptr = new ImmutableBytesWritable();
+        boolean eval = exp.evaluate(null, ptr);
+        assertTrue(eval);
+        String res = (String) exp.getDataType().toObject(ptr);
+        return res;
+    }
+
+    private String testExpression(String srcStr, String patternStr, String replaceStr,
+            SortOrder sortOrder) throws SQLException {
+        Expression srcExp, patternExp, replaceExp;
+        srcExp = LiteralExpression.newConstant(srcStr, TYPE, sortOrder);
+        patternExp = LiteralExpression.newConstant(patternStr, TYPE, sortOrder);
+        replaceExp = LiteralExpression.newConstant(replaceStr, TYPE, sortOrder);
+        List<Expression> expressions = Lists.newArrayList(srcExp, patternExp, replaceExp);
+        String res1, res2;
+        res1 = evalExp(new ByteBasedRegexpReplaceFunction(expressions));
+        res2 = evalExp(new StringBasedRegexpReplaceFunction(expressions));
+        assertEquals(res1, res2);
+        return res1;
+    }
+
+    private String testExpression(String srcStr, String patternStr, String replaceStr)
+            throws SQLException {
+        String result1 = testExpression(srcStr, patternStr, replaceStr, SortOrder.ASC);
+        String result2 = testExpression(srcStr, patternStr, replaceStr, SortOrder.DESC);
+        assertEquals(result1, result2);
+        return result1;
+    }
+
+    private void testExpression(String srcStr, String patternStr, String replaceStr,
+            String expectedStr) throws SQLException {
+        String result = testExpression(srcStr, patternStr, replaceStr);
+        assertEquals(expectedStr, result);
+    }
+
+    @Test
+    public void test() throws Exception {
+        testExpression("aa11bb22cc33dd44ee", "[0-9]+", "*", "aa*bb*cc*dd*ee");
+        testExpression("aa11bb22cc33dd44ee", "[0-9]+", "", "aabbccddee");
+        testExpression("aa11bb22cc33dd44ee", "[a-z][0-9]", "", "a1b2c3d4ee");
+        testExpression("aa11bb22cc33dd44ee", "[a-z0-9]+", "", (String) null);
+    }
+}

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/test/java/org/apache/phoenix/expression/RegexpSplitFunctionTest.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/expression/RegexpSplitFunctionTest.java b/phoenix-core/src/test/java/org/apache/phoenix/expression/RegexpSplitFunctionTest.java
new file mode 100644
index 0000000..6157ce0
--- /dev/null
+++ b/phoenix-core/src/test/java/org/apache/phoenix/expression/RegexpSplitFunctionTest.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.expression;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.sql.SQLException;
+import java.util.List;
+
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.phoenix.expression.function.ByteBasedRegexpSplitFunction;
+import org.apache.phoenix.expression.function.StringBasedRegexpSplitFunction;
+import org.apache.phoenix.schema.SortOrder;
+import org.apache.phoenix.schema.types.PVarchar;
+import org.apache.phoenix.schema.types.PhoenixArray;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class RegexpSplitFunctionTest {
+    private final static PVarchar TYPE = PVarchar.INSTANCE;
+
+    private String[] evalExp(Expression exp) throws SQLException {
+        ImmutableBytesWritable ptr = new ImmutableBytesWritable();
+        boolean eval = exp.evaluate(null, ptr);
+        assertTrue(eval);
+        PhoenixArray evalRes = (PhoenixArray) exp.getDataType().toObject(ptr);
+        String[] res = (String[]) evalRes.getArray();
+        return res;
+    }
+
+    private String[] testExpression(String srcStr, String patternStr, SortOrder sortOrder)
+            throws SQLException {
+        Expression srcExp, patternExp;
+        srcExp = LiteralExpression.newConstant(srcStr, TYPE, sortOrder);
+        patternExp = LiteralExpression.newConstant(patternStr, TYPE, sortOrder);
+        List<Expression> expressions = Lists.newArrayList(srcExp, patternExp);
+        String[] res1, res2;
+        res1 = evalExp(new ByteBasedRegexpSplitFunction(expressions));
+        res2 = evalExp(new StringBasedRegexpSplitFunction(expressions));
+        testEqual(res2, res1);
+        return res1;
+    }
+
+    private String[] testExpression(String srcStr, String patternStr) throws SQLException {
+        String[] result1 = testExpression(srcStr, patternStr, SortOrder.ASC);
+        String[] result2 = testExpression(srcStr, patternStr, SortOrder.DESC);
+        testEqual(result1, result2);
+        return result1;
+    }
+
+    private void testEqual(String[] expectedStr, String[] result) {
+        if (result == null ^ expectedStr == null) return;
+        if (expectedStr == null) return;
+        assertEquals(expectedStr.length, result.length);
+        for (int i = 0; i < expectedStr.length; ++i)
+            assertEquals(expectedStr[i], result[i]);
+    }
+
+    private void testExpression(String srcStr, String patternStr, String[] expectedStr)
+            throws SQLException {
+        String[] result = testExpression(srcStr, patternStr);
+        testEqual(expectedStr, result);
+    }
+
+    @Test
+    public void test() throws Exception {
+        String[] res = new String[] { "ONE", "TWO", "THREE" };
+        testExpression("ONE:TWO:THREE", ":", res);
+        testExpression("ONE,TWO,THREE", ",", res);
+        testExpression("12ONE34TWO56THREE78", "[0-9]+", new String[] { null, "ONE", "TWO", "THREE",
+                null });
+        testExpression("ONE34TWO56THREE78", "[0-9]+", new String[] { "ONE", "TWO", "THREE", null });
+        testExpression("123ONE34TWO56THREE", "[0-9]+", new String[] { null, "ONE", "TWO", "THREE" });
+        testExpression("123", "[0-9]+", new String[] { null, null });
+        testExpression("ONE", "[0-9]+", new String[] { "ONE" });
+    }
+}

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/test/java/org/apache/phoenix/expression/RegexpSubstrFunctionTest.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/expression/RegexpSubstrFunctionTest.java b/phoenix-core/src/test/java/org/apache/phoenix/expression/RegexpSubstrFunctionTest.java
new file mode 100644
index 0000000..c2889b3
--- /dev/null
+++ b/phoenix-core/src/test/java/org/apache/phoenix/expression/RegexpSubstrFunctionTest.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.expression;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.sql.SQLException;
+import java.util.List;
+
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.phoenix.expression.function.ByteBasedRegexpSubstrFunction;
+import org.apache.phoenix.expression.function.StringBasedRegexpSubstrFunction;
+import org.apache.phoenix.schema.SortOrder;
+import org.apache.phoenix.schema.types.PInteger;
+import org.apache.phoenix.schema.types.PVarchar;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class RegexpSubstrFunctionTest {
+    private final static PVarchar TYPE = PVarchar.INSTANCE;
+
+    private String evalExp(Expression exp) {
+        ImmutableBytesWritable ptr = new ImmutableBytesWritable();
+        boolean eval = exp.evaluate(null, ptr);
+        assertTrue(eval);
+        String res = (String) exp.getDataType().toObject(ptr);
+        return res;
+    }
+
+    private String testExpression(String srcStr, String patternStr, int offset, SortOrder sortOrder) throws SQLException {
+        Expression srcExp, patternExp, offsetExp;
+        srcExp = LiteralExpression.newConstant(srcStr, TYPE, sortOrder);
+        patternExp = LiteralExpression.newConstant(patternStr, TYPE, sortOrder);
+        offsetExp = LiteralExpression.newConstant(offset, PInteger.INSTANCE, sortOrder);
+        List<Expression> expressions = Lists.newArrayList(srcExp, patternExp, offsetExp);
+        String res1, res2;
+        res1 = evalExp(new ByteBasedRegexpSubstrFunction(expressions));
+        res2 = evalExp(new StringBasedRegexpSubstrFunction(expressions));
+        assertEquals(res1, res2);
+        return res1;
+    }
+
+    private String testExpression(String srcStr, String patternStr, int offset) throws SQLException {
+        String result1 = testExpression(srcStr, patternStr, offset, SortOrder.ASC);
+        String result2 = testExpression(srcStr, patternStr, offset, SortOrder.DESC);
+        assertEquals(result1, result2);
+        return result1;
+    }
+
+    private void testExpression(String srcStr, String patternStr, int offset, String expectedStr)
+            throws SQLException {
+        String result = testExpression(srcStr, patternStr, offset);
+        assertEquals(expectedStr, result);
+    }
+
+    @Test
+    public void test() throws Exception {
+        testExpression("Report1?1", "[^\\\\?]+", 1, "Report1");
+        testExpression("Report1?2", "[^\\\\?]+", 1, "Report1");
+        testExpression("Report2?1", "[^\\\\?]+", 1, "Report2");
+        testExpression("Report3?2", "[^\\\\?]+", 1, "Report3");
+        testExpression("Report3?2", "[4-9]+", 0, (String) null);
+        testExpression("Report3?2", "[^\\\\?]+", 2, "eport3");
+        testExpression("Report3?2", "[^\\\\?]+", -5, "rt3");
+    }
+}

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/test/java/org/apache/phoenix/expression/SortOrderExpressionTest.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/expression/SortOrderExpressionTest.java b/phoenix-core/src/test/java/org/apache/phoenix/expression/SortOrderExpressionTest.java
index 8fb1a6c..b9ee0eb 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/expression/SortOrderExpressionTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/expression/SortOrderExpressionTest.java
@@ -30,16 +30,18 @@ import java.util.TimeZone;
 
 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.phoenix.expression.function.ByteBasedRegexpReplaceFunction;
+import org.apache.phoenix.expression.function.ByteBasedRegexpSubstrFunction;
 import org.apache.phoenix.expression.function.FunctionArgumentType;
 import org.apache.phoenix.expression.function.LTrimFunction;
 import org.apache.phoenix.expression.function.LengthFunction;
 import org.apache.phoenix.expression.function.LowerFunction;
 import org.apache.phoenix.expression.function.LpadFunction;
 import org.apache.phoenix.expression.function.RTrimFunction;
-import org.apache.phoenix.expression.function.RegexpReplaceFunction;
-import org.apache.phoenix.expression.function.RegexpSubstrFunction;
 import org.apache.phoenix.expression.function.RoundDateExpression;
 import org.apache.phoenix.expression.function.SqlTypeNameFunction;
+import org.apache.phoenix.expression.function.StringBasedRegexpReplaceFunction;
+import org.apache.phoenix.expression.function.StringBasedRegexpSubstrFunction;
 import org.apache.phoenix.expression.function.SubstrFunction;
 import org.apache.phoenix.expression.function.ToCharFunction;
 import org.apache.phoenix.expression.function.ToDateFunction;
@@ -80,13 +82,15 @@ public class SortOrderExpressionTest {
     @Test
     public void regexpSubstr() throws Exception {
         List<Expression> args = Lists.newArrayList(getInvertedLiteral("blah", PChar.INSTANCE), getLiteral("l.h"), getLiteral(2));
-        evaluateAndAssertResult(new RegexpSubstrFunction(args), "lah");
+        evaluateAndAssertResult(new StringBasedRegexpSubstrFunction(args), "lah");
+        evaluateAndAssertResult(new ByteBasedRegexpSubstrFunction(args), "lah");
     }
     
     @Test
     public void regexpReplace() throws Exception {
         List<Expression> args = Lists.newArrayList(getInvertedLiteral("blah", PChar.INSTANCE), getLiteral("l.h"), getLiteral("foo"));
-        evaluateAndAssertResult(new RegexpReplaceFunction(args), "bfoo");
+        evaluateAndAssertResult(new ByteBasedRegexpReplaceFunction(args), "bfoo");
+        evaluateAndAssertResult(new StringBasedRegexpReplaceFunction(args), "bfoo");
     }
     
     @Test

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/test/java/org/apache/phoenix/expression/util/regex/PatternPerformanceTest.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/expression/util/regex/PatternPerformanceTest.java b/phoenix-core/src/test/java/org/apache/phoenix/expression/util/regex/PatternPerformanceTest.java
new file mode 100644
index 0000000..908c662
--- /dev/null
+++ b/phoenix-core/src/test/java/org/apache/phoenix/expression/util/regex/PatternPerformanceTest.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.expression.util.regex;
+
+import static org.junit.Assert.assertTrue;
+
+import java.sql.SQLException;
+
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.phoenix.schema.types.PBoolean;
+import org.apache.phoenix.schema.types.PVarchar;
+import org.apache.phoenix.schema.types.PVarcharArray;
+import org.apache.phoenix.schema.types.PhoenixArray;
+import org.junit.Test;
+
+public class PatternPerformanceTest {
+
+    static private class Timer {
+        private long startTimeStamp;
+
+        public void reset() {
+            startTimeStamp = System.currentTimeMillis();
+        }
+
+        public double currentTime() {
+            return (System.currentTimeMillis() - startTimeStamp) / 1000.0;
+        }
+
+        public void printTime(String hint) {
+            System.out.println(hint + " Time=" + currentTime());
+        }
+    }
+
+    private String[] data = new String[] { "ONE:TWO:THREE", "ABC:DEF", "PKU:THU:FDU" };
+    private ImmutableBytesWritable[] dataPtr = new ImmutableBytesWritable[] { getPtr(data[0]),
+            getPtr(data[1]), getPtr(data[2]) };
+    private String patternString;
+    private ImmutableBytesWritable resultPtr = new ImmutableBytesWritable();
+    private int maxTimes = 10000000;
+    private Timer timer = new Timer();
+    private final boolean ENABLE_ASSERT = false;
+
+    private static ImmutableBytesWritable getPtr(String str) {
+        return new ImmutableBytesWritable(PVarchar.INSTANCE.toBytes(str));
+    }
+
+    private void testReplaceAll(ImmutableBytesWritable replacePtr, AbstractBasePattern pattern,
+            String name) {
+        timer.reset();
+        for (int i = 0; i < maxTimes; ++i) {
+            pattern.replaceAll(dataPtr[i % 3], replacePtr, resultPtr);
+            if (ENABLE_ASSERT) {
+                String result = (String) PVarchar.INSTANCE.toObject(resultPtr);
+                assertTrue((i % 3 == 1 && ":".equals(result))
+                        || (i % 3 != 1 && "::".equals(result)));
+            }
+        }
+        timer.printTime(name);
+    }
+
+    public void testReplaceAll() {
+        patternString = "[A-Z]+";
+        ImmutableBytesWritable replacePtr = getPtr("");
+        testReplaceAll(replacePtr, new JavaPattern(patternString), "Java replaceAll");
+        testReplaceAll(replacePtr, new JONIPattern(patternString), "JONI replaceAll");
+    }
+
+    private void testLike(AbstractBasePattern pattern, String name) {
+        timer.reset();
+        for (int i = 0; i < maxTimes; ++i) {
+            pattern.matches(dataPtr[i % 3], resultPtr);
+            if (ENABLE_ASSERT) {
+                Boolean b = (Boolean) PBoolean.INSTANCE.toObject(resultPtr);
+                assertTrue(i % 3 != 2 || b.booleanValue());
+            }
+        }
+        timer.printTime(name);
+    }
+
+    public void testLike() {
+        patternString = "\\Q\\E.*\\QU\\E.*\\QU\\E.*\\QU\\E.*\\Q\\E";
+        testLike(new JavaPattern(patternString), "Java Like");
+        testLike(new JONIPattern(patternString), "JONI Like");
+    }
+
+    private void testSubstr(AbstractBasePattern pattern, String name) {
+        timer.reset();
+        for (int i = 0; i < maxTimes; ++i) {
+            boolean ret = pattern.substr(dataPtr[i % 3], 0, resultPtr);
+            if (ENABLE_ASSERT) {
+                assertTrue(ret
+                        && (i % 3 != 2 || ":THU".equals(PVarchar.INSTANCE.toObject(resultPtr))));
+            }
+        }
+        timer.printTime(name);
+    }
+
+    public void testSubstr() {
+        patternString = "\\:[A-Z]+";
+        testSubstr(new JavaPattern(patternString), "Java Substr");
+        testSubstr(new JONIPattern(patternString), "JONI Substr");
+    }
+
+    private void testSplit(AbstractBaseSplitter pattern, String name) throws SQLException {
+        timer.reset();
+        for (int i = 0; i < maxTimes; ++i) {
+            boolean ret = pattern.split(dataPtr[i % 3], resultPtr);
+            if (ENABLE_ASSERT) {
+                PhoenixArray array = (PhoenixArray) PVarcharArray.INSTANCE.toObject(resultPtr);
+                assertTrue(ret && (i % 3 != 1 || ((String[]) array.getArray()).length == 2));
+            }
+        }
+        timer.printTime(name);
+    }
+
+    public void testSplit() throws SQLException {
+        patternString = "\\:";
+        testSplit(new GuavaSplitter(patternString), "GuavaSplit");
+        testSplit(new JONIPattern(patternString), "JONI Split");
+    }
+
+    @Test
+    public void test() throws Exception {
+        // testLike();
+        // testReplaceAll();
+        // testSubstr();
+        // testSplit();
+    }
+}

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/test/java/org/apache/phoenix/util/StringUtilTest.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/util/StringUtilTest.java b/phoenix-core/src/test/java/org/apache/phoenix/util/StringUtilTest.java
index 9c218fb..6d00562 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/util/StringUtilTest.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/util/StringUtilTest.java
@@ -17,7 +17,9 @@
 package org.apache.phoenix.util;
 
 import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
 
+import org.apache.phoenix.schema.SortOrder;
 import org.junit.Test;
 
 public class StringUtilTest {
@@ -48,5 +50,33 @@ public class StringUtilTest {
     public void testLpadZeroPadding() throws Exception {
         testLpad("ABCD", 4, "1234", "ABCD");
     }
-    
+
+    @Test
+    public void testCalculateUTF8Offset() throws Exception {
+        String tmp, padding = "padding", data = "零一二三四五六七八九", trailing = "trailing";
+        byte[] bytes = (padding + data + trailing).getBytes();
+        int ret, offset = padding.getBytes().length, length = data.getBytes().length;
+
+        tmp = padding;
+        for (int i = 0; i < data.length(); ++i) {
+            ret = StringUtil.calculateUTF8Offset(bytes, offset, length, SortOrder.ASC, i);
+            assertEquals(tmp.getBytes().length, ret);
+            tmp = tmp + data.charAt(i);
+        }
+        for (int i = data.length(); i < data.length() + 10; ++i) {
+            ret = StringUtil.calculateUTF8Offset(bytes, offset, length, SortOrder.ASC, i);
+            assertEquals(-1, ret);
+        }
+
+        for (int i = -data.length() - 10; i < -data.length(); ++i) {
+            ret = StringUtil.calculateUTF8Offset(bytes, offset, length, SortOrder.ASC, i);
+            assertEquals(-1, ret);
+        }
+        tmp = padding;
+        for (int i = -data.length(); i <= -1; ++i) {
+            ret = StringUtil.calculateUTF8Offset(bytes, offset, length, SortOrder.ASC, i);
+            assertEquals("i=" + i, tmp.getBytes().length, ret);
+            tmp = tmp + data.charAt(i + data.length());
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java b/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java
index 872c318..66695f8 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java
@@ -56,15 +56,16 @@ import org.apache.phoenix.coprocessor.generated.MetaDataProtos.ClearCacheRequest
 import org.apache.phoenix.coprocessor.generated.MetaDataProtos.ClearCacheResponse;
 import org.apache.phoenix.coprocessor.generated.MetaDataProtos.MetaDataService;
 import org.apache.phoenix.expression.AndExpression;
+import org.apache.phoenix.expression.ByteBasedLikeExpression;
 import org.apache.phoenix.expression.ComparisonExpression;
 import org.apache.phoenix.expression.Expression;
 import org.apache.phoenix.expression.InListExpression;
 import org.apache.phoenix.expression.KeyValueColumnExpression;
-import org.apache.phoenix.expression.LikeExpression;
 import org.apache.phoenix.expression.LiteralExpression;
 import org.apache.phoenix.expression.NotExpression;
 import org.apache.phoenix.expression.OrExpression;
 import org.apache.phoenix.expression.RowKeyColumnExpression;
+import org.apache.phoenix.expression.StringBasedLikeExpression;
 import org.apache.phoenix.expression.function.SubstrFunction;
 import org.apache.phoenix.filter.MultiCQKeyValueComparisonFilter;
 import org.apache.phoenix.filter.MultiKeyValueComparisonFilter;
@@ -77,6 +78,8 @@ import org.apache.phoenix.jdbc.PhoenixPreparedStatement;
 import org.apache.phoenix.parse.LikeParseNode.LikeType;
 import org.apache.phoenix.query.KeyRange;
 import org.apache.phoenix.query.QueryConstants;
+import org.apache.phoenix.query.QueryServices;
+import org.apache.phoenix.query.QueryServicesOptions;
 import org.apache.phoenix.schema.PColumn;
 import org.apache.phoenix.schema.RowKeyValueAccessor;
 import org.apache.phoenix.schema.TableRef;
@@ -264,13 +267,26 @@ public class TestUtil {
         return  new ComparisonExpression(Arrays.asList(e, LiteralExpression.newConstant(o)), op);
     }
 
-    public static Expression like(Expression e, Object o) {
-        return LikeExpression.create(Arrays.asList(e, LiteralExpression.newConstant(o)), LikeType.CASE_SENSITIVE);
+    private static boolean useByteBasedRegex(StatementContext context) {
+        return context
+                .getConnection()
+                .getQueryServices()
+                .getProps()
+                .getBoolean(QueryServices.USE_BYTE_BASED_REGEX_ATTRIB,
+                    QueryServicesOptions.DEFAULT_USE_BYTE_BASED_REGEX);
     }
 
-    public static Expression ilike(Expression e, Object o) {
-      return LikeExpression.create(Arrays.asList(e, LiteralExpression.newConstant(o)), LikeType.CASE_INSENSITIVE);
-  }
+    public static Expression like(Expression e, Object o, StatementContext context) {
+        return useByteBasedRegex(context)?
+               ByteBasedLikeExpression.create(Arrays.asList(e, LiteralExpression.newConstant(o)), LikeType.CASE_SENSITIVE):
+               StringBasedLikeExpression.create(Arrays.asList(e, LiteralExpression.newConstant(o)), LikeType.CASE_SENSITIVE);
+    }
+
+    public static Expression ilike(Expression e, Object o, StatementContext context) {
+        return useByteBasedRegex(context)?
+                ByteBasedLikeExpression.create(Arrays.asList(e, LiteralExpression.newConstant(o)), LikeType.CASE_INSENSITIVE):
+                StringBasedLikeExpression.create(Arrays.asList(e, LiteralExpression.newConstant(o)), LikeType.CASE_INSENSITIVE);
+    }
 
     public static Expression substr(Expression e, Object offset, Object length) {
         return  new SubstrFunction(Arrays.asList(e, LiteralExpression.newConstant(offset), LiteralExpression.newConstant(length)));

http://git-wip-us.apache.org/repos/asf/phoenix/blob/52b0f23d/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index da5e13a..3981466 100644
--- a/pom.xml
+++ b/pom.xml
@@ -105,6 +105,7 @@
     <htrace.version>2.04</htrace.version>
     <collections.version>3.2.1</collections.version>
     <jodatime.version>2.7</jodatime.version>
+    <joni.version>2.1.2</joni.version>
 
     <!-- Test Dependencies -->
     <mockito-all.version>1.8.5</mockito-all.version>


Mime
View raw message