tajo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hyun...@apache.org
Subject git commit: TAJO-947: ColPartitionStoreExec can cause URISyntaxException due to special characters. (Mai Hai Thanh via hyunsik)
Date Mon, 11 Aug 2014 05:04:41 GMT
Repository: tajo
Updated Branches:
  refs/heads/master ddfc3f330 -> 87e7ba214


TAJO-947: ColPartitionStoreExec can cause URISyntaxException due to special characters. (Mai
Hai Thanh via hyunsik)


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/87e7ba21
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/87e7ba21
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/87e7ba21

Branch: refs/heads/master
Commit: 87e7ba21491ac8a5a6a56357d7b4185c94f5dfd6
Parents: ddfc3f3
Author: Hyunsik Choi <hyunsik@apache.org>
Authored: Mon Aug 11 14:04:07 2014 +0900
Committer: Hyunsik Choi <hyunsik@apache.org>
Committed: Mon Aug 11 14:04:07 2014 +0900

----------------------------------------------------------------------
 CHANGES                                         |  3 +
 .../java/org/apache/tajo/util/StringUtils.java  | 96 ++++++++++++++++++++
 .../HashBasedColPartitionStoreExec.java         |  3 +-
 .../SortBasedColPartitionStoreExec.java         |  7 +-
 .../org/apache/tajo/engine/utils/TupleUtil.java |  3 +-
 .../tajo/engine/query/TestTablePartitions.java  | 43 +++++++++
 .../TestTablePartitions/lineitemspecial.tbl     |  5 +
 .../TestTablePartitions/lineitemspecial_ddl.sql |  3 +
 .../TestSpecialCharPartitionKeys1.result        |  4 +
 .../TestSpecialCharPartitionKeys2.result        |  3 +
 10 files changed, 165 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 182d07d..8788b17 100644
--- a/CHANGES
+++ b/CHANGES
@@ -109,6 +109,9 @@ Release 0.9.0 - unreleased
 
   BUG FIXES
 
+    TAJO-947: ColPartitionStoreExec can cause URISyntaxException due 
+    to special characters. (Mai Hai Thanh via hyunsik)
+
     TAJO-999: SequenceFile key class need to be compatible. (jaehwa)
 
     TAJO-994: 'count(distinct x)' function counts first null value. (hyunsik)

http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java b/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java
index 41ea153..90391a8 100644
--- a/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java
+++ b/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java
@@ -21,10 +21,12 @@ package org.apache.tajo.util;
 import org.apache.commons.lang.CharUtils;
 import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.commons.lang.SystemUtils;
+import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.util.ShutdownHookManager;
 import org.apache.hadoop.util.SignalLogger;
 
 import java.util.Arrays;
+import java.util.BitSet;
 
 public class StringUtils {
 
@@ -180,4 +182,98 @@ public class StringUtils {
   public static String unicodeEscapedDelimiter(char c) {
     return CharUtils.unicodeEscaped(c);
   }
+
+  /**
+   * The following lines of code that deals with escape characters is mostly copied from
HIVE's FileUtils.java 
+   */
+
+  static BitSet charToEscape = new BitSet(128);
+  static {
+    for (char c = 0; c < ' '; c++) {
+      charToEscape.set(c);
+    }
+
+    /**
+     * ASCII 01-1F are HTTP control characters that need to be escaped.
+     * \u000A and \u000D are \n and \r, respectively.
+     */
+    char[] clist = new char[] {'\u0001', '\u0002', '\u0003', '\u0004',
+        '\u0005', '\u0006', '\u0007', '\u0008', '\u0009', '\n', '\u000B',
+        '\u000C', '\r', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012',
+        '\u0013', '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019',
+        '\u001A', '\u001B', '\u001C', '\u001D', '\u001E', '\u001F',
+        '"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F', '{',
+        '[', ']', '^'};
+
+    for (char c : clist) {
+      charToEscape.set(c);
+    }
+
+    if(Shell.WINDOWS){
+      // On windows, following chars need to be escaped as well
+      char [] winClist = {' ', '<','>','|'};
+      for (char c : winClist) {
+        charToEscape.set(c);
+      }
+    }
+  }
+
+  static boolean needsEscaping(char c) {
+    return c >= 0 && c < charToEscape.size() && charToEscape.get(c);
+  }
+
+  public static String escapePathName(String path) {
+    return escapePathName(path, null);
+  }
+
+  /**
+   * Escapes a path name.
+   * @param path The path to escape.
+   * @param defaultPath
+   * The default name for the path, if the given path is empty or null.
+   * @return An escaped path name.
+   */
+  public static String escapePathName(String path, String defaultPath) {
+    if (path == null || path.length() == 0) {
+      if (defaultPath == null) {
+        return "__TAJO_DEFAULT_PARTITION__";
+      } else {
+        return defaultPath;
+      }
+    }
+
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < path.length(); i++) {
+      char c = path.charAt(i);
+      if (needsEscaping(c)) {
+        sb.append('%');
+        sb.append(String.format("%1$02X", (int) c));
+      } else {
+        sb.append(c);
+      }
+    }
+    return sb.toString();
+  }
+
+  public static String unescapePathName(String path) {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < path.length(); i++) {
+      char c = path.charAt(i);
+      if (c == '%' && i + 2 < path.length()) {
+        int code = -1;
+        try {
+          code = Integer.valueOf(path.substring(i + 1, i + 3), 16);
+        } catch (Exception e) {
+          code = -1;
+        }
+        if (code >= 0) {
+          sb.append((char) code);
+          i += 2;
+          continue;
+        }
+      }
+      sb.append(c);
+    }
+    return sb.toString();
+  }
 }

http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashBasedColPartitionStoreExec.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashBasedColPartitionStoreExec.java
b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashBasedColPartitionStoreExec.java
index 6cef22e..44d1270 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashBasedColPartitionStoreExec.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashBasedColPartitionStoreExec.java
@@ -26,6 +26,7 @@ import org.apache.tajo.datum.Datum;
 import org.apache.tajo.engine.planner.logical.StoreTableNode;
 import org.apache.tajo.storage.Appender;
 import org.apache.tajo.storage.Tuple;
+import org.apache.tajo.util.StringUtils;
 import org.apache.tajo.worker.TaskAttemptContext;
 
 import java.io.IOException;
@@ -79,7 +80,7 @@ public class HashBasedColPartitionStoreExec extends ColPartitionStoreExec
{
           if(i > 0)
             sb.append("/");
           sb.append(keyNames[i]).append("=");
-          sb.append(datum.asChars());
+          sb.append(StringUtils.escapePathName(datum.asChars()));
         }
       }
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortBasedColPartitionStoreExec.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortBasedColPartitionStoreExec.java
b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortBasedColPartitionStoreExec.java
index d09e296..9ce455f 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortBasedColPartitionStoreExec.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortBasedColPartitionStoreExec.java
@@ -30,6 +30,7 @@ import org.apache.tajo.engine.planner.logical.StoreTableNode;
 import org.apache.tajo.storage.Appender;
 import org.apache.tajo.storage.Tuple;
 import org.apache.tajo.storage.VTuple;
+import org.apache.tajo.util.StringUtils;
 import org.apache.tajo.worker.TaskAttemptContext;
 
 import java.io.IOException;
@@ -78,8 +79,8 @@ public class SortBasedColPartitionStoreExec extends ColPartitionStoreExec
{
       if(i > 0) {
         sb.append("/");
       }
-      sb.append(keyNames[i]).append("=");
-      sb.append(datum.asChars());
+      sb.append(keyNames[i]).append("=");      
+      sb.append(StringUtils.escapePathName(datum.asChars()));
     }
     return sb.toString();
   }
@@ -95,7 +96,7 @@ public class SortBasedColPartitionStoreExec extends ColPartitionStoreExec
{
         appender = getAppender(getSubdirectory(currentKey));
         prevKey = new VTuple(currentKey);
       } else {
-        if (!prevKey.equals(currentKey)) {
+        if (!prevKey.equals(currentKey) && !getSubdirectory(prevKey).equalsIgnoreCase(getSubdirectory(currentKey)))
{
           appender.close();
           StatisticsUtil.aggregateTableStat(aggregated, appender.getStats());
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleUtil.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleUtil.java b/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleUtil.java
index f2e47bc..0752e11 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleUtil.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/utils/TupleUtil.java
@@ -37,6 +37,7 @@ import org.apache.tajo.storage.RowStoreUtil.RowStoreEncoder;
 import org.apache.tajo.storage.Tuple;
 import org.apache.tajo.storage.TupleRange;
 import org.apache.tajo.storage.VTuple;
+import org.apache.tajo.util.StringUtils;
 
 import java.io.UnsupportedEncodingException;
 import java.net.URLEncoder;
@@ -258,7 +259,7 @@ public class TupleUtil {
       }
       int columnId = partitionColumnSchema.getColumnIdByName(parts[0]);
       Column keyColumn = partitionColumnSchema.getColumn(columnId);
-      tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), parts[1]));
+      tuple.put(columnId, DatumFactory.createFromString(keyColumn.getDataType(), StringUtils.unescapePathName(parts[1])));
     }
     for (; i < partitionColumnSchema.size(); i++) {
       tuple.put(i, NullDatum.get());

http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
index d9aea53..d80fdb5 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
@@ -786,4 +786,47 @@ public class TestTablePartitions extends QueryTestCaseBase {
     fail("Can't find query from workers" + queryId);
     return null;
   }
+
+  @Test
+  public final void TestSpecialCharPartitionKeys1() throws Exception {
+    // See - TAJO-947: ColPartitionStoreExec can cause URISyntaxException due to special
characters.
+
+    executeDDL("lineitemspecial_ddl.sql", "lineitemspecial.tbl");
+
+    executeString("CREATE TABLE IF NOT EXISTS pTable947 (id int, name text) PARTITION BY
COLUMN (type text)")
+        .close();
+    executeString("INSERT OVERWRITE INTO pTable947 SELECT l_orderkey, l_shipinstruct, l_shipmode
FROM lineitemspecial")
+        .close();
+    ResultSet res = executeString("select * from pTable947 where type='RA:*?><I/L#%S'
or type='AIR'");
+
+    String resStr = resultSetToString(res);
+    String expected =
+        "id,name,type\n" +
+            "-------------------------------\n"
+            + "3,NONE,AIR\n"
+            + "3,TEST SPECIAL CHARS,RA:*?><I/L#%S\n";
+
+    assertEquals(expected, resStr);
+    cleanupQuery(res);
+  }
+
+  @Test
+  public final void TestSpecialCharPartitionKeys2() throws Exception {
+    // See - TAJO-947: ColPartitionStoreExec can cause URISyntaxException due to special
characters.
+
+    executeDDL("lineitemspecial_ddl.sql", "lineitemspecial.tbl");
+
+    executeString("CREATE TABLE IF NOT EXISTS pTable947 (id int, name text) PARTITION BY
COLUMN (type text)")
+        .close();
+    executeString("INSERT OVERWRITE INTO pTable947 SELECT l_orderkey, l_shipinstruct, l_shipmode
FROM lineitemspecial")
+        .close();
+
+    ResultSet res = executeString("select * from pTable947 where type='RA:*?><I/L#%S'");
+    assertResultSet(res);
+    cleanupQuery(res);
+
+    res = executeString("select * from pTable947 where type='RA:*?><I/L#%S' or type='AIR01'");
+    assertResultSet(res);
+    cleanupQuery(res);
+  }
 }

http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/test/resources/dataset/TestTablePartitions/lineitemspecial.tbl
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/dataset/TestTablePartitions/lineitemspecial.tbl
b/tajo-core/src/test/resources/dataset/TestTablePartitions/lineitemspecial.tbl
new file mode 100644
index 0000000..2241213
--- /dev/null
+++ b/tajo-core/src/test/resources/dataset/TestTablePartitions/lineitemspecial.tbl
@@ -0,0 +1,5 @@
+1|DELIVER IN PERSON|TRUCK|
+1|TAKE BACK RETURN|MAIL|
+2|TAKE BACK RETURN|RAIL|
+3|NONE|AIR|
+3|TEST SPECIAL CHARS|RA:*?><I/L#%S|

http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/test/resources/queries/TestTablePartitions/lineitemspecial_ddl.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestTablePartitions/lineitemspecial_ddl.sql
b/tajo-core/src/test/resources/queries/TestTablePartitions/lineitemspecial_ddl.sql
new file mode 100644
index 0000000..77e76d5
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestTablePartitions/lineitemspecial_ddl.sql
@@ -0,0 +1,3 @@
+create external table if not exists lineitemspecial (
+    l_orderkey INT4, l_shipinstruct TEXT, l_shipmode TEXT)
+using csv with ('csvfile.delimiter'='|', 'csvfile.null'='NULL') location ${table.path};

http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys1.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys1.result
b/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys1.result
new file mode 100644
index 0000000..98af4ec
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys1.result
@@ -0,0 +1,4 @@
+id,name,type
+-------------------------------
+3,NONE,AIR
+3,TEST SPECIAL CHARS,RA:*?><I/L#%S
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/87e7ba21/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys2.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys2.result
b/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys2.result
new file mode 100644
index 0000000..d20fff7
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestTablePartitions/TestSpecialCharPartitionKeys2.result
@@ -0,0 +1,3 @@
+id,name,type
+-------------------------------
+3,TEST SPECIAL CHARS,RA:*?><I/L#%S
\ No newline at end of file


Mime
View raw message