drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From par...@apache.org
Subject drill git commit: DRILL-3537: Whe scanning files in ScanBatch, ignore all the empty files before reach a non-empty file
Date Mon, 27 Jul 2015 21:30:20 GMT
Repository: drill
Updated Branches:
  refs/heads/master 1b69869d9 -> a219f8784


DRILL-3537: Whe scanning files in ScanBatch, ignore all the empty files before reach a non-empty
file


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/a219f878
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/a219f878
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/a219f878

Branch: refs/heads/master
Commit: a219f8784c55ce3bc15b9bb3a19d7b33e4021c00
Parents: 1b69869
Author: Hsuan-Yi Chu <hsuanyi@usc.edu>
Authored: Thu Jul 23 17:20:17 2015 -0700
Committer: Parth Chandra <parthc@apache.org>
Committed: Mon Jul 27 14:23:33 2015 -0700

----------------------------------------------------------------------
 .../apache/drill/exec/physical/impl/ScanBatch.java  | 12 +++++++++++-
 .../exec/vector/complex/writer/TestJsonReader.java  | 16 ++++++++++++++++
 .../store/json/jsonDirectoryWithEmpyFile/a.json     |  0
 .../store/json/jsonDirectoryWithEmpyFile/b.json     |  3 +++
 4 files changed, 30 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/a219f878/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
index 6bf1280..4b91e1f 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/ScanBatch.java
@@ -83,7 +83,7 @@ public class ScanBatch implements CloseableRecordBatch {
   private String partitionColumnDesignator;
   private boolean done = false;
   private SchemaChangeCallBack callBack = new SchemaChangeCallBack();
-
+  private boolean hasReadNonEmptyFile = false;
   public ScanBatch(PhysicalOperator subScanConfig, FragmentContext context, OperatorContext
oContext,
                    Iterator<RecordReader> readers, List<String[]> partitionColumns,
List<Integer> selectedPartitionColumns) throws ExecutionSetupException {
     this.context = context;
@@ -186,6 +186,15 @@ public class ScanBatch implements CloseableRecordBatch {
             return IterOutcome.NONE;
           }
 
+          // If all the files we have read so far are just empty, the schema is not useful
+          if(!hasReadNonEmptyFile) {
+            container.clear();
+            for (ValueVector v : fieldVectorMap.values()) {
+              v.clear();
+            }
+            fieldVectorMap.clear();
+          }
+
           currentReader.cleanup();
           currentReader = readers.next();
           partitionValues = partitionColumns.hasNext() ? partitionColumns.next() : null;
@@ -208,6 +217,7 @@ public class ScanBatch implements CloseableRecordBatch {
         }
       }
 
+      hasReadNonEmptyFile = true;
       populatePartitionVectors();
 
       // this is a slight misuse of this metric but it will allow Readers to report how many
records they generated.

http://git-wip-us.apache.org/repos/asf/drill/blob/a219f878/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
index 912a5f0..7d6c71c 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
@@ -296,6 +296,22 @@ public class TestJsonReader extends BaseTestQuery {
     batchLoader.clear();
   }
 
+  @Test
+  public void testJsonDirectoryWithEmptyFile() throws Exception {
+    String root = FileUtils.getResourceAsFile("/store/json/jsonDirectoryWithEmpyFile").toURI().toString();
+
+    String queryRightEmpty = String.format(
+        "select * from dfs_test.`%s`", root);
+
+    testBuilder()
+        .sqlQuery(queryRightEmpty)
+        .unOrdered()
+        .baselineColumns("a")
+        .baselineValues(1l)
+        .build()
+        .run();
+  }
+
   private void testExistentColumns(RecordBatchLoader batchLoader) throws SchemaChangeException
{
     VectorWrapper<?> vw = batchLoader.getValueAccessorById(
         RepeatedBigIntVector.class, //

http://git-wip-us.apache.org/repos/asf/drill/blob/a219f878/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/a.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/a.json
b/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/a.json
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/drill/blob/a219f878/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/b.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/b.json
b/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/b.json
new file mode 100644
index 0000000..73d0e9c
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/json/jsonDirectoryWithEmpyFile/b.json
@@ -0,0 +1,3 @@
+{
+    a : 1
+}
\ No newline at end of file


Mime
View raw message