drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jacq...@apache.org
Subject [2/5] drill git commit: DRILL-4048: Fix reading required dictionary encoded varbinary data in parquet files after recent update
Date Sat, 07 Nov 2015 05:41:55 GMT
DRILL-4048: Fix reading required dictionary encoded varbinary data in parquet files after recent
update

Fix was small, this update is a little larger than necessary because I was hoping to create
a unit test by modifying the one I had added in the earlier patch with the version upgrade.
Unfortunately we don't have a good way to generate Parquet files with required columns from
unit tests right now. So I just added a smaller subset of the binary file that was posted
on
the JIRA issue. The refactoring of the earlier test was still useful for readability,
so I kept it in.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/a5a1aa6b
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/a5a1aa6b
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/a5a1aa6b

Branch: refs/heads/master
Commit: a5a1aa6b487d0642641ce70f2c2500465956f9ec
Parents: ffe0240
Author: Jason Altekruse <altekrusejason@gmail.com>
Authored: Fri Nov 6 19:24:28 2015 -0800
Committer: Jacques Nadeau <jacques@apache.org>
Committed: Fri Nov 6 21:41:07 2015 -0800

----------------------------------------------------------------------
 .../columnreaders/VarLengthColumnReaders.java   |   3 +-
 .../physical/impl/writer/TestParquetWriter.java |  71 ++++++++++++++-----
 .../parquet/required_dictionary.parquet         | Bin 0 -> 8020 bytes
 3 files changed, 54 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/a5a1aa6b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLengthColumnReaders.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLengthColumnReaders.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLengthColumnReaders.java
index ba126d2..39cad0e 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLengthColumnReaders.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/columnreaders/VarLengthColumnReaders.java
@@ -187,7 +187,8 @@ public class VarLengthColumnReaders {
 
       if (usingDictionary) {
         currDictValToWrite = pageReader.dictionaryValueReader.readBytes();
-        mutator.setSafe(index, currDictValToWrite.toByteBuffer(), 0, currDictValToWrite.length());
+        ByteBuffer buf = currDictValToWrite.toByteBuffer();
+        mutator.setSafe(index, buf, buf.position(), currDictValToWrite.length());
       } else {
         mutator.setSafe(index, start, start + length, bytebuf);
       }

http://git-wip-us.apache.org/repos/asf/drill/blob/a5a1aa6b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
index 51d5d08..007561d 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/writer/TestParquetWriter.java
@@ -21,7 +21,12 @@ import java.io.File;
 import java.io.FileWriter;
 import java.math.BigDecimal;
 import java.sql.Date;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 
+import com.google.common.base.Joiner;
 import org.apache.drill.BaseTestQuery;
 import org.apache.drill.exec.ExecConstants;
 import org.apache.drill.exec.fn.interp.TestConstantFolding;
@@ -45,25 +50,46 @@ public class TestParquetWriter extends BaseTestQuery {
   public TemporaryFolder folder = new TemporaryFolder();
   static FileSystem fs;
 
-  private String allTypesSelection =
-      "cast( `int_col` AS             int)                `int_col`, " +
-          "cast( `bigint_col` AS          bigint)             `bigint_col`, " +
-          // TODO(DRILL-3367)
-//          "cast( `decimal9_col` AS        decimal(9, 4))      `decimal9_col`, " +
-//          "cast( `decimal18_col` AS       decimal(18,9))      `decimal18_col`, " +
-//          "cast( `decimal28sparse_col` AS decimal(28, 14))    `decimal28sparse_col`, "
+
-//          "cast( `decimal38sparse_col` AS decimal(38, 19))    `decimal38sparse_col`, "
+
-          "cast( `date_col` AS            date)               `date_col`, " +
-          "cast( `timestamp_col` AS       timestamp)          `timestamp_col`, " +
-          "cast( `float4_col` AS          float)              `float4_col`, " +
-          "cast( `float8_col` AS          double)             `float8_col`, " +
-          "cast( `varbinary_col` AS       varbinary(65000))   `varbinary_col`, " +
-          // TODO(DRILL-2297)
-//        "cast( `intervalyear_col` AS    interval year)      `intervalyear_col`, " +
-          "cast( `intervalday_col` AS     interval day)       `intervalday_col`, " +
-          "cast( `bit_col` AS             boolean)            `bit_col`, " +
-          "      `varchar_col`                                `varchar_col`, " +
-          "cast( `time_col` AS            time)               `time_col` ";
+  // Map storing a convenient name as well as the cast type necessary
+  // to produce it casting from a varchar
+  private static final Map<String, String> allTypes = new HashMap<>();
+
+  // Select statement for all supported Drill types, for use in conjunction with
+  // the file parquet/alltypes.json in the resources directory
+  private static final String allTypesSelection;
+
+  static {
+    allTypes.put("int",                "int");
+    allTypes.put("bigint",             "bigint");
+    // TODO(DRILL-3367)
+//    allTypes.put("decimal(9, 4)",      "decimal9");
+//    allTypes.put("decimal(18,9)",      "decimal18");
+//    allTypes.put("decimal(28, 14)",    "decimal28sparse");
+//    allTypes.put("decimal(38, 19)",    "decimal38sparse");
+    allTypes.put("date",               "date");
+    allTypes.put("timestamp",          "timestamp");
+    allTypes.put("float",              "float4");
+    allTypes.put("double",             "float8");
+    allTypes.put("varbinary(65000)",   "varbinary");
+    // TODO(DRILL-2297)
+//    allTypes.put("interval year",      "intervalyear");
+    allTypes.put("interval day",       "intervalday");
+    allTypes.put("boolean",            "bit");
+    allTypes.put("varchar",            "varchar");
+    allTypes.put("time",               "time");
+
+    List<String> allTypeSelectsAndCasts = new ArrayList<>();
+    for (String s : allTypes.keySet()) {
+      // don't need to cast a varchar, just add the column reference
+      if (s.equals("varchar")) {
+        allTypeSelectsAndCasts.add(String.format("`%s_col`", allTypes.get(s)));
+        continue;
+      }
+      allTypeSelectsAndCasts.add(String.format("cast(`%s_col` AS %S) `%s_col`", allTypes.get(s),
s, allTypes.get(s)));
+    }
+    allTypesSelection = Joiner.on(",").join(allTypeSelectsAndCasts);
+  }
+
 
   private String allTypesTable = "cp.`/parquet/alltypes.json`";
 
@@ -158,6 +184,12 @@ public class TestParquetWriter extends BaseTestQuery {
   }
 
   @Test
+  public void testDictionaryError() throws Exception {
+    compareParquetReadersColumnar("*", "cp.`parquet/required_dictionary.parquet`");
+    runTestAndValidate("*", "*", "cp.`parquet/required_dictionary.parquet`", "required_dictionary");
+  }
+
+  @Test
   public void testDictionaryEncoding() throws Exception {
     String selection = "type";
     String inputTable = "cp.`donuts.json`";
@@ -668,6 +700,7 @@ public class TestParquetWriter extends BaseTestQuery {
 
   public void runTestAndValidate(String selection, String validationSelection, String inputTable,
String outputFile) throws Exception {
     try {
+      deleteTableIfExists(outputFile);
       test("use dfs_test.tmp");
   //    test("ALTER SESSION SET `planner.add_producer_consumer` = false");
       String query = String.format("SELECT %s FROM %s", selection, inputTable);

http://git-wip-us.apache.org/repos/asf/drill/blob/a5a1aa6b/exec/java-exec/src/test/resources/parquet/required_dictionary.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/required_dictionary.parquet b/exec/java-exec/src/test/resources/parquet/required_dictionary.parquet
new file mode 100644
index 0000000..c837192
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/required_dictionary.parquet
differ


Mime
View raw message