Return-Path: X-Original-To: apmail-drill-commits-archive@www.apache.org Delivered-To: apmail-drill-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 3D2A110DBF for ; Sun, 7 Dec 2014 23:05:26 +0000 (UTC) Received: (qmail 63000 invoked by uid 500); 7 Dec 2014 23:05:25 -0000 Delivered-To: apmail-drill-commits-archive@drill.apache.org Received: (qmail 62972 invoked by uid 500); 7 Dec 2014 23:05:25 -0000 Mailing-List: contact commits-help@drill.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: commits@drill.apache.org Delivered-To: mailing list commits@drill.apache.org Received: (qmail 62962 invoked by uid 99); 7 Dec 2014 23:05:25 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 07 Dec 2014 23:05:25 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id EFA799BEFF3; Sun, 7 Dec 2014 23:05:24 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: smp@apache.org To: commits@drill.apache.org Message-Id: <73b6243bc51941d2a37ac64190a1ed21@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: drill git commit: DRILL-1818: Fix complex projection pushdown Date: Sun, 7 Dec 2014 23:05:24 +0000 (UTC) Repository: drill Updated Branches: refs/heads/master 3abba58a2 -> 54ce3d375 DRILL-1818: Fix complex projection pushdown Also fixes: Issue where converted type was lost when projecting two elements in a repeated type Issue when selecting multiple elements in a repeated type, but using inconsistent casing Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/54ce3d37 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/54ce3d37 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/54ce3d37 Branch: refs/heads/master Commit: 54ce3d375258d973edb3f024fb79f4372ffdc708 Parents: 3abba58 Author: Steven Phillips Authored: Sun Dec 7 03:33:51 2014 -0800 Committer: Steven Phillips Committed: Sun Dec 7 15:04:40 2014 -0800 ---------------------------------------------------------------------- .../exec/store/parquet2/DrillParquetReader.java | 48 ++++++-- .../drill/exec/vector/complex/MapVector.java | 4 +- .../exec/store/parquet/TestParquetComplex.java | 88 +++++++++++++++ .../store/parquet/complex/baseline.json | 109 +++++++++++++++++++ .../store/parquet/complex/baseline2.json | 11 ++ .../store/parquet/complex/baseline3.json | 10 ++ .../store/parquet/complex/baseline4.json | 16 +++ .../store/parquet/complex/complex.parquet | Bin 0 -> 2055 bytes 8 files changed, 272 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/54ce3d37/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetReader.java index 4455c50..dec5222 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet2/DrillParquetReader.java @@ -24,7 +24,9 @@ import java.util.Collection; import java.util.ArrayList; import java.util.Map; import java.util.HashMap; +import java.util.Set; +import com.google.common.collect.Sets; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.common.expression.PathSegment; import org.apache.drill.common.expression.SchemaPath; @@ -125,28 +127,41 @@ public class DrillParquetReader extends AbstractRecordReader { String messageName = schema.getName(); List schemaColumns = schema.getColumns(); + // parquet type.union() seems to lose ConvertedType info when merging two columns that are the same type. This can + // happen when selecting two elements from an array. So to work around this, we use set of SchemaPath to avoid duplicates + // and then merge the types at the end + Set schemaPaths = Sets.newLinkedHashSet(); // Use LinkedHashSet to preserver ordering, otherwise DrillParquetGroupConverter breaks for (SchemaPath path : columns) { boolean colNotFound=true; - for (ColumnDescriptor colDesc: schemaColumns) { + schemaColumnLoop: for (ColumnDescriptor colDesc: schemaColumns) { String[] schemaColDesc = Arrays.copyOf(colDesc.getPath(), colDesc.getPath().length); SchemaPath schemaPath = SchemaPath.getCompoundPath(schemaColDesc); PathSegment schemaSeg = schemaPath.getRootSegment(); PathSegment colSeg = path.getRootSegment(); List segments = Lists.newArrayList(); - while(schemaSeg != null && colSeg != null){ + while(schemaSeg != null || colSeg != null){ + // if one is null but not the other, this is not a match + if (schemaSeg == null || colSeg == null) { + continue schemaColumnLoop; + } if (colSeg.isNamed()) { // DRILL-1739 - Use case insensitive name comparison if(schemaSeg.getNameSegment().getPath().equalsIgnoreCase(colSeg.getNameSegment().getPath())) { segments.add(schemaSeg.getNameSegment().getPath()); }else{ - break; + continue schemaColumnLoop; } }else{ colSeg=colSeg.getChild(); continue; } - colSeg = colSeg.getChild(); + while ((colSeg = colSeg.getChild()) != null && colSeg.isArray()) { + colSeg = colSeg.getChild(); + if (colSeg == null) { + break; + } + } schemaSeg = schemaSeg.getChild(); } // Field exists in schema @@ -154,14 +169,7 @@ public class DrillParquetReader extends AbstractRecordReader { String[] pathSegments = new String[segments.size()]; segments.toArray(pathSegments); colNotFound=false; - // Use the field names from the schema otherwise we get an exception if the case of the name doesn't match - Type t = getType(pathSegments, 0, schema); - - if (projection == null) { - projection = new MessageType(messageName, t); - } else { - projection = projection.union(new MessageType(messageName, t)); - } + schemaPaths.add(schemaPath); break; } } @@ -169,6 +177,22 @@ public class DrillParquetReader extends AbstractRecordReader { columnsNotFound.add(path); } } + for (SchemaPath schemaPath : schemaPaths) { + List segments = Lists.newArrayList(); + PathSegment seg = schemaPath.getRootSegment(); + do { + segments.add(seg.getNameSegment().getPath()); + } while ((seg = seg.getChild()) != null); + String[] pathSegments = new String[segments.size()]; + segments.toArray(pathSegments); + Type t = getType(pathSegments, 0, schema); + + if (projection == null) { + projection = new MessageType(messageName, t); + } else { + projection = projection.union(new MessageType(messageName, t)); + } + } return projection; } http://git-wip-us.apache.org/repos/asf/drill/blob/54ce3d37/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java index 6e26547..513bfcb 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java @@ -137,7 +137,7 @@ public class MapVector extends AbstractContainerVector { if (vectors.put(name, vv) != null) { throw new IllegalStateException(); } - vectorIds.put(name, new VectorWithOrdinal(vv, ordinal)); + vectorIds.put(name.toLowerCase(), new VectorWithOrdinal(vv, ordinal)); vectorsById.put(ordinal, vv); field.addChild(vv.getField()); } @@ -445,7 +445,7 @@ public class MapVector extends AbstractContainerVector { @Override public VectorWithOrdinal getVectorWithOrdinal(String name) { - return vectorIds.get(name); + return vectorIds.get(name.toLowerCase()); } } http://git-wip-us.apache.org/repos/asf/drill/blob/54ce3d37/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java new file mode 100644 index 0000000..9919d3b --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetComplex.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.parquet; + +import org.apache.drill.BaseTestQuery; +import org.junit.Test; + +public class TestParquetComplex extends BaseTestQuery { + + private static final String DATAFILE = "cp.`store/parquet/complex/complex.parquet`"; + + @Test + public void testStar() throws Exception { + testBuilder() + .sqlQuery("select * from cp.`store/parquet/complex/complex.parquet`") + .ordered() + .jsonBaselineFile("store/parquet/complex/baseline.json") + .build() + .run(); + } + + @Test + public void missingColumnInMap() throws Exception { + String query = "select t.trans_info.keywords as keywords from cp.`store/parquet/complex/complex.parquet` t"; + String[] columns = {"keywords"}; + testBuilder() + .sqlQuery(query) + .ordered() + .jsonBaselineFile("store/parquet/complex/baseline2.json") + .baselineColumns(columns) + .build() + .run(); + } + + @Test + public void secondElementInMap() throws Exception { + String query = String.format("select t.`marketing_info`.keywords as keywords from %s t", DATAFILE); + String[] columns = {"keywords"}; + testBuilder() + .sqlQuery(query) + .ordered() + .jsonBaselineFile("store/parquet/complex/baseline3.json") + .baselineColumns(columns) + .build() + .run(); + } + + @Test + public void elementsOfArray() throws Exception { + String query = String.format("select t.`marketing_info`.keywords[0] as keyword0, t.`marketing_info`.keywords[2] as keyword2 from %s t", DATAFILE); + String[] columns = {"keyword0", "keyword2"}; + testBuilder() + .sqlQuery(query) + .ordered() + .jsonBaselineFile("store/parquet/complex/baseline4.json") + .baselineColumns(columns) + .build() + .run(); + } + + @Test + public void elementsOfArrayCaseInsensitive() throws Exception { + String query = String.format("select t.`MARKETING_INFO`.keywords[0] as keyword0, t.`Marketing_Info`.Keywords[2] as keyword2 from %s t", DATAFILE); + String[] columns = {"keyword0", "keyword2"}; + testBuilder() + .sqlQuery(query) + .ordered() + .jsonBaselineFile("store/parquet/complex/baseline4.json") + .baselineColumns(columns) + .build() + .run(); + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/54ce3d37/exec/java-exec/src/test/resources/store/parquet/complex/baseline.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/store/parquet/complex/baseline.json b/exec/java-exec/src/test/resources/store/parquet/complex/baseline.json new file mode 100644 index 0000000..796f5b7 --- /dev/null +++ b/exec/java-exec/src/test/resources/store/parquet/complex/baseline.json @@ -0,0 +1,109 @@ +{ "amount" : 80.5, + "date" : "2013-07-26", + "marketing_info" : { "camp_id" : 4, + "keywords" : [ "go", + "to", + "thing", + "watch", + "made", + "laughing", + "might", + "pay", + "in", + "your", + "hold" + ] + }, + "time" : "04:56:59", + "trans_id" : 0, + "trans_info" : { "prod_id" : [ 16 ], + "purch_flag" : "false" + }, + "user_info" : { "cust_id" : 28, + "device" : "IOS5", + "state" : "mt" + } +} +{ "amount" : 100.40000000000001, + "date" : "2013-05-16", + "marketing_info" : { "camp_id" : 6, + "keywords" : [ "pronounce", + "tree", + "instead", + "games", + "sigh" + ] + }, + "time" : "07:31:54", + "trans_id" : 1, + "trans_info" : { "prod_id" : [ ], + "purch_flag" : "false" + }, + "user_info" : { "cust_id" : 86623, + "device" : "AOS4.2", + "state" : "mi" + } +} +{ "amount" : 20.25, + "date" : "2013-06-09", + "marketing_info" : { "camp_id" : 17, + "keywords" : [ ] + }, + "time" : "15:31:45", + "trans_id" : 2, + "trans_info" : { "prod_id" : [ 293, + 90 + ], + "purch_flag" : "true" + }, + "user_info" : { "cust_id" : 11, + "device" : "IOS5", + "state" : "la" + } +} +{ "amount" : 500.75, + "date" : "2013-07-19", + "marketing_info" : { "camp_id" : 17, + "keywords" : [ "it's" ] + }, + "time" : "11:24:22", + "trans_id" : 3, + "trans_info" : { "prod_id" : [ 173, + 18, + 121, + 84, + 115, + 226, + 464, + 525, + 35, + 11, + 94, + 45 + ], + "purch_flag" : "false" + }, + "user_info" : { "cust_id" : 666, + "device" : "IOS5", + "state" : "nj" + } +} +{ "amount" : 34.200000000000003, + "date" : "2013-07-21", + "marketing_info" : { "camp_id" : 8, + "keywords" : [ "fallout" ] + }, + "time" : "08:01:13", + "trans_id" : 4, + "trans_info" : { "prod_id" : [ 311, + 29, + 5, + 41 + ], + "purch_flag" : "false" + }, + "user_info" : { "cust_id" : 999, + "device" : "IOS7", + "state" : "ct" + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/54ce3d37/exec/java-exec/src/test/resources/store/parquet/complex/baseline2.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/store/parquet/complex/baseline2.json b/exec/java-exec/src/test/resources/store/parquet/complex/baseline2.json new file mode 100644 index 0000000..62fc383 --- /dev/null +++ b/exec/java-exec/src/test/resources/store/parquet/complex/baseline2.json @@ -0,0 +1,11 @@ +{ + "keywords" : null +} { + "keywords" : null +} { + "keywords" : null +} { + "keywords" : null +} { + "keywords" : null +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/drill/blob/54ce3d37/exec/java-exec/src/test/resources/store/parquet/complex/baseline3.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/store/parquet/complex/baseline3.json b/exec/java-exec/src/test/resources/store/parquet/complex/baseline3.json new file mode 100644 index 0000000..9ccfb5c --- /dev/null +++ b/exec/java-exec/src/test/resources/store/parquet/complex/baseline3.json @@ -0,0 +1,10 @@ +{ + "keywords" : [ "go", "to", "thing", "watch", "made", "laughing", "might", "pay", "in", "your", "hold" ] +} { + "keywords" : [ "pronounce", "tree", "instead", "games", "sigh" ] +} { + "keywords" : [] } { + "keywords" : [ "it's" ] +} { + "keywords" : [ "fallout" ] +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/drill/blob/54ce3d37/exec/java-exec/src/test/resources/store/parquet/complex/baseline4.json ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/store/parquet/complex/baseline4.json b/exec/java-exec/src/test/resources/store/parquet/complex/baseline4.json new file mode 100644 index 0000000..232f9db --- /dev/null +++ b/exec/java-exec/src/test/resources/store/parquet/complex/baseline4.json @@ -0,0 +1,16 @@ +{ + "keyword0" : "go", + "keyword2" : "thing" +} { + "keyword0" : "pronounce", + "keyword2" : "instead" +} { + "keyword0" : null, + "keyword2" : null +} { + "keyword0" : "it's", + "keyword2" : null +} { + "keyword0" : "fallout", + "keyword2" : null +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/drill/blob/54ce3d37/exec/java-exec/src/test/resources/store/parquet/complex/complex.parquet ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/store/parquet/complex/complex.parquet b/exec/java-exec/src/test/resources/store/parquet/complex/complex.parquet new file mode 100644 index 0000000..3d6327f Binary files /dev/null and b/exec/java-exec/src/test/resources/store/parquet/complex/complex.parquet differ