drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s..@apache.org
Subject drill git commit: DRILL-1818: Fix complex projection pushdown
Date Sun, 07 Dec 2014 23:07:16 GMT
Repository: drill
Updated Branches:
  refs/heads/0.7.0 142e57761 -> cd74ccede


DRILL-1818: Fix complex projection pushdown

Also fixes:
Issue where converted type was lost when projecting two elements in a repeated type
Issue when selecting multiple elements in a repeated type, but using inconsistent casing


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/cd74cced
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/cd74cced
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/cd74cced

Branch: refs/heads/0.7.0
Commit: cd74ccedef56e52d6811151b45be17ae7224ae58
Parents: 142e577
Author: Steven Phillips <sphillips@maprtech.com>
Authored: Sun Dec 7 03:33:51 2014 -0800
Committer: Steven Phillips <sphillips@maprtech.com>
Committed: Sun Dec 7 04:05:58 2014 -0800

----------------------------------------------------------------------
 .../exec/store/parquet2/DrillParquetReader.java |  48 ++++++--
 .../drill/exec/vector/complex/MapVector.java    |   4 +-
 .../exec/store/parquet/TestParquetComplex.java  |  88 +++++++++++++++
 .../store/parquet/complex/baseline.json         | 109 +++++++++++++++++++
 .../store/parquet/complex/baseline2.json        |  11 ++
 .../store/parquet/complex/baseline3.json        |  10 ++
 .../store/parquet/complex/baseline4.json        |  16 +++
 .../store/parquet/complex/complex.parquet       | Bin 0 -> 2055 bytes
 8 files changed, 272 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/cd74cced/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetReader.java
index 4455c50..dec5222 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetReader.java
@@ -24,7 +24,9 @@ import java.util.Collection;
 import java.util.ArrayList;
 import java.util.Map;
 import java.util.HashMap;
+import java.util.Set;
 
+import com.google.common.collect.Sets;
 import org.apache.drill.common.exceptions.ExecutionSetupException;
 import org.apache.drill.common.expression.PathSegment;
 import org.apache.drill.common.expression.SchemaPath;
@@ -125,28 +127,41 @@ public class DrillParquetReader extends AbstractRecordReader {
 
     String messageName = schema.getName();
     List<ColumnDescriptor> schemaColumns = schema.getColumns();
+    // parquet type.union() seems to lose ConvertedType info when merging two columns that
are the same type. This can
+    // happen when selecting two elements from an array. So to work around this, we use set
of SchemaPath to avoid duplicates
+    // and then merge the types at the end
+    Set<SchemaPath> schemaPaths = Sets.newLinkedHashSet(); // Use LinkedHashSet to
preserver ordering, otherwise DrillParquetGroupConverter breaks
 
     for (SchemaPath path : columns) {
       boolean colNotFound=true;
-      for (ColumnDescriptor colDesc: schemaColumns) {
+      schemaColumnLoop: for (ColumnDescriptor colDesc: schemaColumns) {
         String[] schemaColDesc = Arrays.copyOf(colDesc.getPath(), colDesc.getPath().length);
         SchemaPath schemaPath = SchemaPath.getCompoundPath(schemaColDesc);
         PathSegment schemaSeg = schemaPath.getRootSegment();
         PathSegment colSeg = path.getRootSegment();
         List<String> segments = Lists.newArrayList();
-        while(schemaSeg != null && colSeg != null){
+        while(schemaSeg != null || colSeg != null){
+          // if one is null but not the other, this is not a match
+          if (schemaSeg == null || colSeg == null) {
+            continue schemaColumnLoop;
+          }
           if (colSeg.isNamed()) {
             // DRILL-1739 - Use case insensitive name comparison
             if(schemaSeg.getNameSegment().getPath().equalsIgnoreCase(colSeg.getNameSegment().getPath()))
{
               segments.add(schemaSeg.getNameSegment().getPath());
             }else{
-              break;
+              continue schemaColumnLoop;
             }
           }else{
             colSeg=colSeg.getChild();
             continue;
           }
-          colSeg = colSeg.getChild();
+          while ((colSeg = colSeg.getChild()) != null && colSeg.isArray()) {
+            colSeg = colSeg.getChild();
+            if (colSeg == null) {
+              break;
+            }
+          }
           schemaSeg = schemaSeg.getChild();
         }
         // Field exists in schema
@@ -154,14 +169,7 @@ public class DrillParquetReader extends AbstractRecordReader {
           String[] pathSegments = new String[segments.size()];
           segments.toArray(pathSegments);
           colNotFound=false;
-          // Use the field names from the schema otherwise we get an exception if the case
of the name doesn't match
-          Type t = getType(pathSegments, 0, schema);
-
-          if (projection == null) {
-            projection = new MessageType(messageName, t);
-          } else {
-            projection = projection.union(new MessageType(messageName, t));
-          }
+          schemaPaths.add(schemaPath);
           break;
         }
       }
@@ -169,6 +177,22 @@ public class DrillParquetReader extends AbstractRecordReader {
         columnsNotFound.add(path);
       }
     }
+    for (SchemaPath schemaPath : schemaPaths) {
+      List<String> segments = Lists.newArrayList();
+      PathSegment seg = schemaPath.getRootSegment();
+      do {
+        segments.add(seg.getNameSegment().getPath());
+      } while ((seg = seg.getChild()) != null);
+      String[] pathSegments = new String[segments.size()];
+      segments.toArray(pathSegments);
+      Type t = getType(pathSegments, 0, schema);
+
+      if (projection == null) {
+        projection = new MessageType(messageName, t);
+      } else {
+        projection = projection.union(new MessageType(messageName, t));
+      }
+    }
     return projection;
   }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/cd74cced/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
index 6e26547..513bfcb 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
@@ -137,7 +137,7 @@ public class MapVector extends AbstractContainerVector {
     if (vectors.put(name, vv) != null) {
       throw new IllegalStateException();
     }
-    vectorIds.put(name, new VectorWithOrdinal(vv, ordinal));
+    vectorIds.put(name.toLowerCase(), new VectorWithOrdinal(vv, ordinal));
     vectorsById.put(ordinal, vv);
     field.addChild(vv.getField());
   }
@@ -445,7 +445,7 @@ public class MapVector extends AbstractContainerVector {
 
   @Override
   public VectorWithOrdinal getVectorWithOrdinal(String name) {
-    return vectorIds.get(name);
+    return vectorIds.get(name.toLowerCase());
   }
 
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/cd74cced/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java
new file mode 100644
index 0000000..9919d3b
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.parquet;
+
+import org.apache.drill.BaseTestQuery;
+import org.junit.Test;
+
+public class TestParquetComplex extends BaseTestQuery {
+
+  private static final String DATAFILE = "cp.`store/parquet/complex/complex.parquet`";
+
+  @Test
+  public void testStar() throws Exception {
+    testBuilder()
+            .sqlQuery("select * from cp.`store/parquet/complex/complex.parquet`")
+            .ordered()
+            .jsonBaselineFile("store/parquet/complex/baseline.json")
+            .build()
+            .run();
+  }
+
+  @Test
+  public void missingColumnInMap() throws Exception {
+    String query = "select t.trans_info.keywords as keywords from cp.`store/parquet/complex/complex.parquet`
t";
+    String[] columns = {"keywords"};
+    testBuilder()
+            .sqlQuery(query)
+            .ordered()
+            .jsonBaselineFile("store/parquet/complex/baseline2.json")
+            .baselineColumns(columns)
+            .build()
+            .run();
+  }
+
+  @Test
+  public void secondElementInMap() throws Exception {
+    String query = String.format("select t.`marketing_info`.keywords as keywords from %s
t", DATAFILE);
+    String[] columns = {"keywords"};
+    testBuilder()
+            .sqlQuery(query)
+            .ordered()
+            .jsonBaselineFile("store/parquet/complex/baseline3.json")
+            .baselineColumns(columns)
+            .build()
+            .run();
+  }
+
+  @Test
+  public void elementsOfArray() throws Exception {
+    String query = String.format("select t.`marketing_info`.keywords[0] as keyword0, t.`marketing_info`.keywords[2]
as keyword2 from %s t", DATAFILE);
+    String[] columns = {"keyword0", "keyword2"};
+    testBuilder()
+            .sqlQuery(query)
+            .ordered()
+            .jsonBaselineFile("store/parquet/complex/baseline4.json")
+            .baselineColumns(columns)
+            .build()
+            .run();
+  }
+
+  @Test
+  public void elementsOfArrayCaseInsensitive() throws Exception {
+    String query = String.format("select t.`MARKETING_INFO`.keywords[0] as keyword0, t.`Marketing_Info`.Keywords[2]
as keyword2 from %s t", DATAFILE);
+    String[] columns = {"keyword0", "keyword2"};
+    testBuilder()
+            .sqlQuery(query)
+            .ordered()
+            .jsonBaselineFile("store/parquet/complex/baseline4.json")
+            .baselineColumns(columns)
+            .build()
+            .run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/cd74cced/exec/java-exec/src/test/resources/store/parquet/complex/baseline.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/parquet/complex/baseline.json b/exec/java-exec/src/test/resources/store/parquet/complex/baseline.json
new file mode 100644
index 0000000..796f5b7
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/parquet/complex/baseline.json
@@ -0,0 +1,109 @@
+{ "amount" : 80.5,
+    "date" : "2013-07-26",
+    "marketing_info" : { "camp_id" : 4,
+        "keywords" : [ "go",
+            "to",
+            "thing",
+            "watch",
+            "made",
+            "laughing",
+            "might",
+            "pay",
+            "in",
+            "your",
+            "hold"
+        ]
+    },
+    "time" : "04:56:59",
+    "trans_id" : 0,
+    "trans_info" : { "prod_id" : [ 16 ],
+        "purch_flag" : "false"
+    },
+    "user_info" : { "cust_id" : 28,
+        "device" : "IOS5",
+        "state" : "mt"
+    }
+}
+{ "amount" : 100.40000000000001,
+  "date" : "2013-05-16",
+  "marketing_info" : { "camp_id" : 6,
+      "keywords" : [ "pronounce",
+          "tree",
+          "instead",
+          "games",
+          "sigh"
+        ]
+    },
+  "time" : "07:31:54",
+  "trans_id" : 1,
+  "trans_info" : { "prod_id" : [  ],
+      "purch_flag" : "false"
+    },
+  "user_info" : { "cust_id" : 86623,
+      "device" : "AOS4.2",
+      "state" : "mi"
+    }
+}
+{ "amount" : 20.25,
+  "date" : "2013-06-09",
+  "marketing_info" : { "camp_id" : 17,
+      "keywords" : [  ]
+    },
+  "time" : "15:31:45",
+  "trans_id" : 2,
+  "trans_info" : { "prod_id" : [ 293,
+          90
+        ],
+      "purch_flag" : "true"
+    },
+  "user_info" : { "cust_id" : 11,
+      "device" : "IOS5",
+      "state" : "la"
+    }
+}
+{ "amount" : 500.75,
+  "date" : "2013-07-19",
+  "marketing_info" : { "camp_id" : 17,
+      "keywords" : [ "it's" ]
+    },
+  "time" : "11:24:22",
+  "trans_id" : 3,
+  "trans_info" : { "prod_id" : [ 173,
+          18,
+          121,
+          84,
+          115,
+          226,
+          464,
+          525,
+          35,
+          11,
+          94,
+          45
+        ],
+      "purch_flag" : "false"
+    },
+  "user_info" : { "cust_id" : 666,
+      "device" : "IOS5",
+      "state" : "nj"
+    }
+}
+{ "amount" : 34.200000000000003,
+  "date" : "2013-07-21",
+  "marketing_info" : { "camp_id" : 8,
+      "keywords" : [ "fallout" ]
+    },
+  "time" : "08:01:13",
+  "trans_id" : 4,
+  "trans_info" : { "prod_id" : [ 311,
+          29,
+          5,
+          41
+        ],
+      "purch_flag" : "false"
+    },
+  "user_info" : { "cust_id" : 999,
+      "device" : "IOS7",
+      "state" : "ct"
+    }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/cd74cced/exec/java-exec/src/test/resources/store/parquet/complex/baseline2.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/parquet/complex/baseline2.json b/exec/java-exec/src/test/resources/store/parquet/complex/baseline2.json
new file mode 100644
index 0000000..62fc383
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/parquet/complex/baseline2.json
@@ -0,0 +1,11 @@
+{
+  "keywords" : null
+} {
+  "keywords" : null
+} {
+  "keywords" : null
+} {
+  "keywords" : null
+} {
+  "keywords" : null
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/cd74cced/exec/java-exec/src/test/resources/store/parquet/complex/baseline3.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/parquet/complex/baseline3.json b/exec/java-exec/src/test/resources/store/parquet/complex/baseline3.json
new file mode 100644
index 0000000..9ccfb5c
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/parquet/complex/baseline3.json
@@ -0,0 +1,10 @@
+{
+  "keywords" : [ "go", "to", "thing", "watch", "made", "laughing", "might", "pay", "in",
"your", "hold" ]
+} {
+  "keywords" : [ "pronounce", "tree", "instead", "games", "sigh" ]
+} {
+  "keywords" : [] } {
+  "keywords" : [ "it's" ]
+} {
+  "keywords" : [ "fallout" ]
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/cd74cced/exec/java-exec/src/test/resources/store/parquet/complex/baseline4.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/parquet/complex/baseline4.json b/exec/java-exec/src/test/resources/store/parquet/complex/baseline4.json
new file mode 100644
index 0000000..232f9db
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/parquet/complex/baseline4.json
@@ -0,0 +1,16 @@
+{
+  "keyword0" : "go",
+  "keyword2" : "thing"
+} {
+  "keyword0" : "pronounce",
+  "keyword2" : "instead"
+} {
+  "keyword0" : null,
+  "keyword2" : null
+} {
+  "keyword0" : "it's",
+  "keyword2" : null
+} {
+  "keyword0" : "fallout",
+  "keyword2" : null
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/cd74cced/exec/java-exec/src/test/resources/store/parquet/complex/complex.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/parquet/complex/complex.parquet b/exec/java-exec/src/test/resources/store/parquet/complex/complex.parquet
new file mode 100644
index 0000000..3d6327f
Binary files /dev/null and b/exec/java-exec/src/test/resources/store/parquet/complex/complex.parquet
differ


Mime
View raw message