drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From meh...@apache.org
Subject drill git commit: DRILL-3718: After TextReader finishes reading a field surrounded by double quotes, the reader would skip whitespaces only if those whitespaces are not used as delimiter
Date Tue, 15 Sep 2015 21:57:29 GMT
Repository: drill
Updated Branches:
  refs/heads/master 0c1b293d9 -> 48bc0b9a8


DRILL-3718: After TextReader finishes reading a field surrounded by double quotes, the reader
would skip whitespaces only if those whitespaces are not used as delimiter


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/48bc0b9a
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/48bc0b9a
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/48bc0b9a

Branch: refs/heads/master
Commit: 48bc0b9a8916af7191b0a99351c27fd5b69786c3
Parents: 0c1b293
Author: Hsuan-Yi Chu <hsuanyi@usc.edu>
Authored: Thu Aug 27 17:02:10 2015 -0700
Committer: Hsuan-Yi Chu <hsuanyi@usc.edu>
Committed: Tue Sep 15 12:28:55 2015 -0700

----------------------------------------------------------------------
 .../store/easy/text/compliant/TextReader.java   | 10 ++--
 .../org/apache/drill/TestExampleQueries.java    |  2 +-
 .../exec/store/text/TestNewTextReader.java      | 52 ++++++++++++++++++++
 .../resources/bootstrap-storage-plugins.json    |  5 ++
 .../src/test/resources/store/text/WithQuote.ssv |  3 ++
 .../src/test/resources/store/text/WithQuote.tbl |  3 ++
 .../src/test/resources/store/text/WithQuote.tsv |  3 ++
 pom.xml                                         |  1 +
 8 files changed, 74 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/48bc0b9a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/TextReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/TextReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/TextReader.java
index 3899509..5d41254 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/TextReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/TextReader.java
@@ -259,9 +259,11 @@ final class TextReader {
       ch = input.nextChar();
     }
 
-    // handles whitespaces after quoted value: whitespaces are ignored. Content after whitespaces
may be parsed if
-    // 'parseUnescapedQuotes' is enabled.
-    if (ch != newLine && ch <= ' ') {
+    // Handles whitespaces after quoted value:
+    // Whitespaces are ignored (i.e., ch <= ' ') if they are not used as delimiters (i.e.,
ch != ' ')
+    // For example, in tab-separated files (TSV files), '\t' is used as delimiter and should
not be ignored
+    // Content after whitespaces may be parsed if 'parseUnescapedQuotes' is enabled.
+    if (ch != newLine && ch <= ' ' && ch != delimiter) {
       final DrillBuf workBuf = this.workBuf;
       workBuf.resetWriterIndex();
       do {
@@ -272,7 +274,7 @@ final class TextReader {
         if (ch == newLine) {
           return;
         }
-      } while (ch <= ' ');
+      } while (ch <= ' ' && ch != delimiter);
 
       // there's more stuff after the quoted value, not only empty spaces.
       if (!(ch == delimiter || ch == newLine) && parseUnescapedQuotes) {

http://git-wip-us.apache.org/repos/asf/drill/blob/48bc0b9a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
index e88e2b3..eebffc1 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java
@@ -1191,4 +1191,4 @@ public class TestExampleQueries extends BaseTestQuery {
         .build()
         .run();
   }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/48bc0b9a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestNewTextReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestNewTextReader.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestNewTextReader.java
index e63e528..6b8e16a 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestNewTextReader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestNewTextReader.java
@@ -23,6 +23,7 @@ import static org.junit.Assert.fail;
 
 import org.apache.drill.BaseTestQuery;
 import org.apache.drill.common.exceptions.UserRemoteException;
+import org.apache.drill.common.util.FileUtils;
 import org.apache.drill.exec.proto.UserBitShared.DrillPBError.ErrorType;
 import org.junit.Test;
 
@@ -60,4 +61,55 @@ public class TestNewTextReader extends BaseTestQuery {
       assertTrue("Error message should contain " + COL_NAME, ex.getMessage().contains(COL_NAME));
     }
   }
+
+  @Test // see DRILL-3718
+  public void testTabSeparatedWithQuote() throws Exception {
+    final String root = FileUtils.getResourceAsFile("/store/text/WithQuote.tsv").toURI().toString();
+    final String query = String.format("select columns[0] as c0, columns[1] as c1, columns[2]
as c2 \n" +
+        "from dfs_test.`%s` ", root);
+
+    testBuilder()
+        .sqlQuery(query)
+        .unOrdered()
+        .baselineColumns("c0", "c1", "c2")
+        .baselineValues("a", "a", "a")
+        .baselineValues("a", "a", "a")
+        .baselineValues("a", "a", "a")
+        .build()
+        .run();
+  }
+
+  @Test // see DRILL-3718
+  public void testSpaceSeparatedWithQuote() throws Exception {
+    final String root = FileUtils.getResourceAsFile("/store/text/WithQuote.ssv").toURI().toString();
+    final String query = String.format("select columns[0] as c0, columns[1] as c1, columns[2]
as c2 \n" +
+        "from dfs_test.`%s` ", root);
+
+    testBuilder()
+        .sqlQuery(query)
+        .unOrdered()
+        .baselineColumns("c0", "c1", "c2")
+        .baselineValues("a", "a", "a")
+        .baselineValues("a", "a", "a")
+        .baselineValues("a", "a", "a")
+        .build()
+        .run();
+  }
+
+  @Test // see DRILL-3718
+  public void testPipSeparatedWithQuote() throws Exception {
+    final String root = FileUtils.getResourceAsFile("/store/text/WithQuote.tbl").toURI().toString();
+    final String query = String.format("select columns[0] as c0, columns[1] as c1, columns[2]
as c2 \n" +
+            "from dfs_test.`%s` ", root);
+
+    testBuilder()
+        .sqlQuery(query)
+        .unOrdered()
+        .baselineColumns("c0", "c1", "c2")
+        .baselineValues("a", "a", "a")
+        .baselineValues("a", "a", "a")
+        .baselineValues("a", "a", "a")
+        .build()
+        .run();
+  }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/48bc0b9a/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json b/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
index 4a7a53f..452baaf 100644
--- a/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
+++ b/exec/java-exec/src/test/resources/bootstrap-storage-plugins.json
@@ -29,6 +29,11 @@
           extensions: [ "tsv" ],
           delimiter: "\t"
         },
+        "ssv" : {
+          type: "text",
+          extensions: [ "ssv" ],
+          delimiter: " "
+        },
         "parquet" : {
           type: "parquet"
         },

http://git-wip-us.apache.org/repos/asf/drill/blob/48bc0b9a/exec/java-exec/src/test/resources/store/text/WithQuote.ssv
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/text/WithQuote.ssv b/exec/java-exec/src/test/resources/store/text/WithQuote.ssv
new file mode 100644
index 0000000..442f7db
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/text/WithQuote.ssv
@@ -0,0 +1,3 @@
+"a" a a
+a "a" a
+a a "a"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/48bc0b9a/exec/java-exec/src/test/resources/store/text/WithQuote.tbl
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/text/WithQuote.tbl b/exec/java-exec/src/test/resources/store/text/WithQuote.tbl
new file mode 100644
index 0000000..f4ee282
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/text/WithQuote.tbl
@@ -0,0 +1,3 @@
+"a"|a|a
+a|"a"|a
+a|a|"a"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/48bc0b9a/exec/java-exec/src/test/resources/store/text/WithQuote.tsv
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/text/WithQuote.tsv b/exec/java-exec/src/test/resources/store/text/WithQuote.tsv
new file mode 100644
index 0000000..607d067
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/text/WithQuote.tsv
@@ -0,0 +1,3 @@
+"a"	a	a
+a	"a"	a
+a	a	"a"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/48bc0b9a/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index c17e612..93f423a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -170,6 +170,7 @@
             <exclude>**/*.csv</exclude>
             <exclude>**/*.tsv</exclude>
             <exclude>**/*.txt</exclude>
+            <exclude>**/*.ssv</exclude>
             <exclude>**/drill-*.conf</exclude>
             <exclude>**/.buildpath</exclude>
             <exclude>**/*.proto</exclude>


Mime
View raw message