Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 78DC8200B39 for ; Sat, 25 Jun 2016 00:11:50 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 77504160A5A; Fri, 24 Jun 2016 22:11:50 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 98038160A58 for ; Sat, 25 Jun 2016 00:11:49 +0200 (CEST) Received: (qmail 97344 invoked by uid 500); 24 Jun 2016 22:11:48 -0000 Mailing-List: contact commits-help@drill.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: commits@drill.apache.org Delivered-To: mailing list commits@drill.apache.org Received: (qmail 97335 invoked by uid 99); 24 Jun 2016 22:11:48 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 24 Jun 2016 22:11:48 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id A5B78E049D; Fri, 24 Jun 2016 22:11:48 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: amansinha@apache.org To: commits@drill.apache.org Message-Id: <5ed301a0758b4107bd90e00a9c3b5a39@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: drill git commit: DRILL-4574: Avro Plugin: Flatten does not work correctly on record items Date: Fri, 24 Jun 2016 22:11:48 +0000 (UTC) archived-at: Fri, 24 Jun 2016 22:11:50 -0000 Repository: drill Updated Branches: refs/heads/master 11602456b -> 1c9e92b0c DRILL-4574: Avro Plugin: Flatten does not work correctly on record items * When applied to arrays of record, the flatten function always returned the last item only in the array when querying avro records * The problem was that the RepeatedMapWriter's start() and end() methods wer only called once and not for every array item * This commit adds tests that show the expected behaviour for both non-primitive and primitive arrays close apache/drill#459 Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/1c9e92b0 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/1c9e92b0 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/1c9e92b0 Branch: refs/heads/master Commit: 1c9e92b0cec18b4ee5a005fd6006ad329e3fa568 Parents: 1160245 Author: baunz Authored: Sun Apr 3 09:43:40 2016 +0200 Committer: Aman Sinha Committed: Fri Jun 24 15:07:25 2016 -0700 ---------------------------------------------------------------------- .../drill/exec/store/avro/AvroRecordReader.java | 5 +- .../drill/exec/store/avro/AvroFormatTest.java | 65 ++++++++++++++++++++ .../drill/exec/store/avro/AvroTestUtil.java | 40 ++++++++++++ 3 files changed, 108 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/1c9e92b0/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java index 89e220c..bbc9b04 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/avro/AvroRecordReader.java @@ -195,6 +195,7 @@ public class AvroRecordReader extends AbstractRecordReader { } process(((GenericRecord) value).get(field.name()), field.schema(), field.name(), _writer, fieldSelection.getChild(field.name())); + } break; case ARRAY: @@ -207,11 +208,11 @@ public class AvroRecordReader extends AbstractRecordReader { } else { writer = (MapOrListWriterImpl) writer.list(fieldName); } - writer.start(); for (final Object o : array) { + writer.start(); process(o, elementSchema, fieldName, writer, fieldSelection.getChild(fieldName)); + writer.end(); } - writer.end(); break; case UNION: // currently supporting only nullable union (optional fields) like ["null", "some-type"]. http://git-wip-us.apache.org/repos/asf/drill/blob/1c9e92b0/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java index af4d0e6..f804e88 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroFormatTest.java @@ -326,6 +326,71 @@ public class AvroFormatTest extends BaseTestQuery { test(sql); } + /** + * See + * + */ + @Test + public void testFlattenPrimitiveArray() throws Exception { + final String file = AvroTestUtil.generateSimpleArraySchema_NoNullValues().getFilePath(); + + final String sql = "select a_string, flatten(c_string_array) as array_item " + + "from dfs_test.`" + file + "` t"; + + TestBuilder testBuilder = testBuilder().sqlQuery(sql).unOrdered() + .baselineColumns("a_string", "array_item"); + + for (int i = 0; i < AvroTestUtil.RECORD_COUNT; i++) { + + for (int j = 0; j < AvroTestUtil.ARRAY_SIZE; j++) { + testBuilder.baselineValues("a_" + i, "c_string_array_" + i + "_" + j); + } + } + + + testBuilder.go(); + + } + + private TestBuilder nestedArrayQueryTestBuilder(String file) { + + final String sql = "select rec_nr, array_item['nested_1_int'] as array_item_nested_int from " + + "(select a_int as rec_nr, flatten(t.b_array) as array_item " + "from dfs_test.`" + file + "` t) a"; + + TestBuilder testBuilder = testBuilder().sqlQuery(sql).unOrdered().baselineColumns("rec_nr", + "array_item_nested_int"); + + return testBuilder; + + } + + + /** + * See + */ + @Test + public void testFlattenComplexArray() throws Exception { + final String file = AvroTestUtil.generateNestedArraySchema().getFilePath(); + + TestBuilder testBuilder = nestedArrayQueryTestBuilder(file); + for (int i = 0; i < AvroTestUtil.RECORD_COUNT; i++) { + for (int j = 0; j < AvroTestUtil.ARRAY_SIZE; j++) { + testBuilder.baselineValues(i, j); + } + } + testBuilder.go(); + + } + /** + * See + */ + @Test + public void testFlattenEmptyComplexArrayMustYieldNoResults() throws Exception { + final String file = AvroTestUtil.generateNestedArraySchema(AvroTestUtil.RECORD_COUNT, 0).getFilePath(); + TestBuilder testBuilder = nestedArrayQueryTestBuilder(file); + testBuilder.expectsEmptyResultSet(); + } + @Test public void testNestedUnionArraySchema_withNullValues() throws Exception { http://git-wip-us.apache.org/repos/asf/drill/blob/1c9e92b0/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java index 96508d8..86d29ae 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/avro/AvroTestUtil.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -449,6 +450,45 @@ public class AvroTestUtil { return record; } + public static AvroTestRecordWriter generateNestedArraySchema() throws IOException { + return generateNestedArraySchema(RECORD_COUNT, ARRAY_SIZE); + } + + public static AvroTestRecordWriter generateNestedArraySchema(int numRecords, int numArrayItems) throws IOException { + + final File file = File.createTempFile("avro-nested-test", ".avro"); + file.deleteOnExit(); + + final Schema schema = SchemaBuilder.record("AvroRecordReaderTest").namespace("org.apache.drill.exec.store.avro") + .fields().name("a_int").type().intType().noDefault().name("b_array").type().array().items() + .record("my_record_1").namespace("foo.blah.org").fields().name("nested_1_int").type().optional().intType() + .endRecord().arrayDefault(Collections.emptyList()).endRecord(); + + final Schema arraySchema = schema.getField("b_array").schema(); + final Schema itemSchema = arraySchema.getElementType(); + + final AvroTestRecordWriter record = new AvroTestRecordWriter(schema, file); + try { + for (int i = 0; i < numRecords; i++) { + record.startRecord(); + record.put("a_int", i); + GenericArray array = new GenericData.Array<>(ARRAY_SIZE, arraySchema); + + for (int j = 0; j < numArrayItems; j++) { + final GenericRecord nestedRecord = new GenericData.Record(itemSchema); + nestedRecord.put("nested_1_int", j); + array.add(nestedRecord); + } + record.put("b_array", array); + record.endRecord(); + } + } finally { + record.close(); + } + + return record; + } + public static AvroTestRecordWriter generateMapSchema_withNullValues() throws Exception { final File file = File.createTempFile("avro-nested-test", ".avro");