drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From par...@apache.org
Subject [4/4] drill git commit: DRILL-4919: Fix select count(1) / count(*) on csv with header
Date Sat, 14 Jan 2017 01:47:47 GMT
DRILL-4919: Fix select count(1) / count(*) on csv with header

This closes #714


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/34969583
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/34969583
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/34969583

Branch: refs/heads/master
Commit: 34969583bfab410c80cb14a1c20249f097d5f7a7
Parents: 535623b
Author: Arina Ielchiieva <arina.yelchiyeva@gmail.com>
Authored: Thu Dec 29 15:42:53 2016 +0000
Committer: Parth Chandra <parthc@apache.org>
Committed: Fri Jan 13 17:46:13 2017 -0800

----------------------------------------------------------------------
 .../compliant/CompliantTextRecordReader.java    | 18 +++++++++++++++-
 .../drill/exec/store/text/TestCsvHeader.java    | 22 ++++++++++++++++++--
 2 files changed, 37 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/34969583/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
index d324270..ac4abb9 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.easy.text.compliant;
 
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Maps;
 import com.univocity.parsers.common.TextParsingException;
 import io.netty.buffer.DrillBuf;
@@ -51,8 +52,12 @@ public class CompliantTextRecordReader extends AbstractRecordReader {
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(CompliantTextRecordReader.class);
 
   private static final int MAX_RECORDS_PER_BATCH = 8096;
-  static final int READ_BUFFER = 1024*1024;
+  private static final int READ_BUFFER = 1024*1024;
   private static final int WHITE_SPACE_BUFFER = 64*1024;
+  // When no named column is required, ask SCAN to return a DEFAULT column.
+  // If such column does not exist, it will be returned as a nullable-int column.
+  private static final List<SchemaPath> DEFAULT_NAMED_TEXT_COLS_TO_READ =
+      ImmutableList.of(SchemaPath.getSimplePath("_DEFAULT_COL_TO_READ_"));
 
   // settings to be used while parsing
   private TextParsingSettings settings;
@@ -89,8 +94,19 @@ public class CompliantTextRecordReader extends AbstractRecordReader {
     return super.isStarQuery();
   }
 
+  /**
+   * Returns list of default columns to read to replace empty list of columns.
+   * For text files without headers returns "columns[0]".
+   * Text files with headers do not support columns syntax,
+   * so when header extraction is enabled, returns fake named column "_DEFAULT_COL_TO_READ_".
+   *
+   * @return list of default columns to read
+   */
   @Override
   protected List<SchemaPath> getDefaultColumnsToRead() {
+    if (settings.isHeaderExtractionEnabled()) {
+      return DEFAULT_NAMED_TEXT_COLS_TO_READ;
+    }
     return DEFAULT_TEXT_COLS_TO_READ;
   }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/34969583/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
index a2e548b..cf54bb0 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/text/TestCsvHeader.java
@@ -17,6 +17,7 @@
  */
 package org.apache.drill.exec.store.text;
 
+import com.google.common.collect.Lists;
 import org.apache.drill.BaseTestQuery;
 import org.apache.drill.TestBuilder;
 import org.apache.drill.common.util.FileUtils;
@@ -24,14 +25,14 @@ import org.apache.drill.common.util.FileUtils;
 import java.io.BufferedOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
+import java.util.List;
 
 import org.junit.Before;
 import org.junit.Test;
 
 public class TestCsvHeader extends BaseTestQuery{
 
-  static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(TestCsvHeader.class);
-  String root;
+  private String root;
 
   @Before
   public void initialize() throws Exception {
@@ -185,4 +186,21 @@ public class TestCsvHeader extends BaseTestQuery{
       }
       builder.go();
   }
+
+  @Test
+  public void testCountOnCsvWithHeader() throws Exception {
+    final String query = "select count(%s) as cnt from %s.`%s`";
+    final List<Object> options = Lists.<Object>newArrayList("*", 1, "'A'");
+
+    for (Object option : options) {
+      testBuilder()
+          .sqlQuery(query, option, TEMP_SCHEMA, root)
+          .unOrdered()
+          .baselineColumns("cnt")
+          .baselineValues(4L)
+          .build()
+          .run();
+    }
+  }
+
 }


Mime
View raw message