calcite-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jh...@apache.org
Subject calcite git commit: [CALCITE-1676] In file adapter, scan directory for .csv, .json and .gz files
Date Wed, 08 Mar 2017 17:45:52 GMT
Repository: calcite
Updated Branches:
  refs/heads/master f85469156 -> a56b84e3b


 [CALCITE-1676] In file adapter, scan directory for .csv, .json and .gz files


Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/a56b84e3
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/a56b84e3
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/a56b84e3

Branch: refs/heads/master
Commit: a56b84e3ba2dbcb47b60f040c86a43209acc154c
Parents: f854691
Author: Julian Hyde <jhyde@apache.org>
Authored: Tue Mar 7 18:08:14 2017 -0800
Committer: Julian Hyde <jhyde@apache.org>
Committed: Tue Mar 7 18:08:14 2017 -0800

----------------------------------------------------------------------
 .../calcite/adapter/csv/CsvEnumerator.java      |   5 +-
 .../apache/calcite/adapter/file/FileSchema.java |  77 ++++++++++++++++---
 .../calcite/adapter/file/FileSchemaFactory.java |  10 ++-
 .../apache/calcite/adapter/file/SqlTest.java    |  53 +++++++++++++
 file/src/test/resources/sales-csv.json          |  32 ++++++++
 file/src/test/resources/sales-csv/DEPTS.csv     |   4 +
 file/src/test/resources/sales-csv/EMPS.csv.gz   | Bin 0 -> 262 bytes
 file/src/test/resources/sales-csv/EMPTY.csv     |   0
 .../test/resources/sales-csv/HEADER_ONLY.csv    |   1 +
 file/src/test/resources/sales-csv/SDEPTS.csv    |   7 ++
 10 files changed, 178 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/calcite/blob/a56b84e3/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvEnumerator.java
----------------------------------------------------------------------
diff --git a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvEnumerator.java b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvEnumerator.java
index ba4ff10..262c283 100644
--- a/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvEnumerator.java
+++ b/example/csv/src/main/java/org/apache/calcite/adapter/csv/CsvEnumerator.java
@@ -118,7 +118,10 @@ class CsvEnumerator<E> implements Enumerator<E> {
     }
     try {
       reader = openCsv(source);
-      final String[] strings = reader.readNext();
+      String[] strings = reader.readNext();
+      if (strings == null) {
+        strings = new String[] {"EmptyFileHasNoColumns:boolean"};
+      }
       for (String string : strings) {
         final String name;
         final CsvFieldType fieldType;

http://git-wip-us.apache.org/repos/asf/calcite/blob/a56b84e3/file/src/main/java/org/apache/calcite/adapter/file/FileSchema.java
----------------------------------------------------------------------
diff --git a/file/src/main/java/org/apache/calcite/adapter/file/FileSchema.java b/file/src/main/java/org/apache/calcite/adapter/file/FileSchema.java
index 388f46b..97c5cb4 100644
--- a/file/src/main/java/org/apache/calcite/adapter/file/FileSchema.java
+++ b/file/src/main/java/org/apache/calcite/adapter/file/FileSchema.java
@@ -29,6 +29,7 @@ import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 
 import java.io.File;
+import java.io.FilenameFilter;
 import java.net.MalformedURLException;
 import java.util.List;
 import java.util.Map;
@@ -55,6 +56,23 @@ class FileSchema extends AbstractSchema {
     this.baseDirectory = baseDirectory;
   }
 
+  /** Looks for a suffix on a string and returns
+   * either the string with the suffix removed
+   * or the original string. */
+  private static String trim(String s, String suffix) {
+    String trimmed = trimOrNull(s, suffix);
+    return trimmed != null ? trimmed : s;
+  }
+
+  /** Looks for a suffix on a string and returns
+   * either the string with the suffix removed
+   * or null. */
+  private static String trimOrNull(String s, String suffix) {
+    return s.endsWith(suffix)
+        ? s.substring(0, s.length() - suffix.length())
+        : null;
+  }
+
   @Override protected Map<String, Table> getTableMap() {
     final ImmutableMap.Builder<String, Table> builder = ImmutableMap.builder();
 
@@ -68,10 +86,42 @@ class FileSchema extends AbstractSchema {
       }
     }
 
+    // Look for files in the directory ending in ".csv", ".csv.gz", ".json",
+    // ".json.gz".
+    final Source baseSource = Sources.of(baseDirectory);
+    File[] files = baseDirectory.listFiles(
+        new FilenameFilter() {
+          public boolean accept(File dir, String name) {
+            final String nameSansGz = trim(name, ".gz");
+            return nameSansGz.endsWith(".csv")
+                || nameSansGz.endsWith(".json");
+          }
+        });
+    if (files == null) {
+      System.out.println("directory " + baseDirectory + " not found");
+      files = new File[0];
+    }
+    // Build a map from table name to table; each file becomes a table.
+    for (File file : files) {
+      Source source = Sources.of(file);
+      Source sourceSansGz = source.trim(".gz");
+      final Source sourceSansJson = sourceSansGz.trimOrNull(".json");
+      if (sourceSansJson != null) {
+        JsonTable table = new JsonTable(source);
+        builder.put(sourceSansJson.relative(baseSource).path(), table);
+        continue;
+      }
+      final Source sourceSansCsv = sourceSansGz.trimOrNull(".csv");
+      if (sourceSansCsv != null) {
+        addTable(builder, source, sourceSansCsv.relative(baseSource).path(),
+            null);
+      }
+    }
+
     return builder.build();
   }
 
-  private void addTable(ImmutableMap.Builder<String, Table> builder,
+  private boolean addTable(ImmutableMap.Builder<String, Table> builder,
       Map<String, Object> tableDef) throws MalformedURLException {
     final String tableName = (String) tableDef.get("name");
     final String url = (String) tableDef.get("url");
@@ -82,28 +132,37 @@ class FileSchema extends AbstractSchema {
     } else {
       source = Sources.of(baseDirectory).append(source0);
     }
+    return addTable(builder, source, tableName, tableDef);
+  }
 
+  private boolean addTable(ImmutableMap.Builder<String, Table> builder,
+      Source source, String tableName, Map<String, Object> tableDef)  {
     final Source sourceSansGz = source.trim(".gz");
     final Source sourceSansJson = sourceSansGz.trimOrNull(".json");
     if (sourceSansJson != null) {
       JsonTable table = new JsonTable(source);
       builder.put(Util.first(tableName, sourceSansJson.path()), table);
-      return;
+      return true;
     }
     final Source sourceSansCsv = sourceSansGz.trimOrNull(".csv");
     if (sourceSansCsv != null) {
       final Table table = new CsvFilterableTable(source, null);
       builder.put(Util.first(tableName, sourceSansCsv.path()), table);
-      return;
+      return true;
     }
 
-    try {
-      FileTable table = FileTable.create(source, tableDef);
-      builder.put(Util.first(tableName, source.path()), table);
-    } catch (Exception e) {
-      throw new RuntimeException("Unable to instantiate table for: "
-          + tableName);
+    if (tableDef != null) {
+      try {
+        FileTable table = FileTable.create(source, tableDef);
+        builder.put(Util.first(tableName, source.path()), table);
+        return true;
+      } catch (Exception e) {
+        throw new RuntimeException("Unable to instantiate table for: "
+            + tableName);
+      }
     }
+
+    return false;
   }
 }
 

http://git-wip-us.apache.org/repos/asf/calcite/blob/a56b84e3/file/src/main/java/org/apache/calcite/adapter/file/FileSchemaFactory.java
----------------------------------------------------------------------
diff --git a/file/src/main/java/org/apache/calcite/adapter/file/FileSchemaFactory.java b/file/src/main/java/org/apache/calcite/adapter/file/FileSchemaFactory.java
index 6aa0788..c92a729 100644
--- a/file/src/main/java/org/apache/calcite/adapter/file/FileSchemaFactory.java
+++ b/file/src/main/java/org/apache/calcite/adapter/file/FileSchemaFactory.java
@@ -43,7 +43,15 @@ public class FileSchemaFactory implements SchemaFactory {
         (List) operand.get("tables");
     final File baseDirectory =
         (File) operand.get(ModelHandler.ExtraOperand.BASE_DIRECTORY.camelName);
-    return new FileSchema(parentSchema, name, baseDirectory, tables);
+    File directoryFile = baseDirectory;
+    final String directory = (String) operand.get("directory");
+    if (baseDirectory != null && directory != null) {
+      directoryFile = new File(directory);
+      if (!directoryFile.isAbsolute()) {
+        directoryFile = new File(baseDirectory, directory);
+      }
+    }
+    return new FileSchema(parentSchema, name, directoryFile, tables);
   }
 }
 

http://git-wip-us.apache.org/repos/asf/calcite/blob/a56b84e3/file/src/test/java/org/apache/calcite/adapter/file/SqlTest.java
----------------------------------------------------------------------
diff --git a/file/src/test/java/org/apache/calcite/adapter/file/SqlTest.java b/file/src/test/java/org/apache/calcite/adapter/file/SqlTest.java
index 86360f1..37d3bfe 100644
--- a/file/src/test/java/org/apache/calcite/adapter/file/SqlTest.java
+++ b/file/src/test/java/org/apache/calcite/adapter/file/SqlTest.java
@@ -26,9 +26,12 @@ import java.sql.DriverManager;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Statement;
+import java.sql.Types;
 import java.util.Properties;
 
+import static org.hamcrest.CoreMatchers.is;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
 
 /**
  * System test of the Calcite file adapter, which can also read and parse
@@ -164,6 +167,56 @@ public class SqlTest {
         "DEPTNO=30; NAME=Accounts");
   }
 
+  /** Reads the DEPTS table from the CSV schema. */
+  @Test public void testCsvSalesDepts() throws SQLException {
+    final String sql = "select * from sales.depts";
+    checkSql("sales-csv", sql,
+        "DEPTNO=10; NAME=Sales",
+        "DEPTNO=20; NAME=Marketing",
+        "DEPTNO=30; NAME=Accounts");
+  }
+
+  /** Reads the EMPS table from the CSV schema. */
+  @Test public void testCsvSalesEmps() throws SQLException {
+    final String sql = "select * from sales.emps";
+    checkSql("sales-csv", sql,
+        "EMPNO=100; NAME=Fred; DEPTNO=10; GENDER=; CITY=; EMPID=30; AGE=25; SLACKER=true;
MANAGER=false; JOINEDAT=1996-08-03",
+        "EMPNO=110; NAME=Eric; DEPTNO=20; GENDER=M; CITY=San Francisco; EMPID=3; AGE=80;
SLACKER=null; MANAGER=false; JOINEDAT=2001-01-01",
+        "EMPNO=110; NAME=John; DEPTNO=40; GENDER=M; CITY=Vancouver; EMPID=2; AGE=null; SLACKER=false;
MANAGER=true; JOINEDAT=2002-05-03",
+        "EMPNO=120; NAME=Wilma; DEPTNO=20; GENDER=F; CITY=; EMPID=1; AGE=5; SLACKER=null;
MANAGER=true; JOINEDAT=2005-09-07",
+        "EMPNO=130; NAME=Alice; DEPTNO=40; GENDER=F; CITY=Vancouver; EMPID=2; AGE=null; SLACKER=false;
MANAGER=true; JOINEDAT=2007-01-01");
+  }
+
+  /** Reads the HEADER_ONLY table from the CSV schema. The CSV file has one
+   * line - the column headers - but no rows of data. */
+  @Test public void testCsvSalesHeaderOnly() throws SQLException {
+    final String sql = "select * from sales.header_only";
+    checkSql("sales-csv", sql);
+  }
+
+  /** Reads the EMPTY table from the CSV schema. The CSV file has no lines,
+   * therefore the table has a system-generated column called
+   * "EmptyFileHasNoColumns". */
+  @Test public void testCsvSalesEmpty() throws SQLException {
+    final String sql = "select * from sales.empty";
+    checkSql(sql, "sales-csv", new Function<ResultSet, Void>() {
+      public Void apply(ResultSet resultSet) {
+        try {
+          assertThat(resultSet.getMetaData().getColumnCount(), is(1));
+          assertThat(resultSet.getMetaData().getColumnName(1),
+              is("EmptyFileHasNoColumns"));
+          assertThat(resultSet.getMetaData().getColumnType(1),
+              is(Types.BOOLEAN));
+          String actual = SqlTest.toString(resultSet);
+          assertThat(actual, is(""));
+        } catch (SQLException e) {
+          throw new RuntimeException(e);
+        }
+        return null;
+      }
+    });
+  }
+
 }
 
 // End SqlTest.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/a56b84e3/file/src/test/resources/sales-csv.json
----------------------------------------------------------------------
diff --git a/file/src/test/resources/sales-csv.json b/file/src/test/resources/sales-csv.json
new file mode 100644
index 0000000..a647222
--- /dev/null
+++ b/file/src/test/resources/sales-csv.json
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * A JSON model of a Calcite schema based on CSV files.
+ */
+{
+  "version": "1.0",
+  "defaultSchema": "SALES",
+  "schemas": [
+    {
+      "name": "SALES",
+      "type": "custom",
+      "factory": "org.apache.calcite.adapter.file.FileSchemaFactory",
+      "operand": {
+        "directory": "sales-csv"
+      }
+    }
+  ]
+}

http://git-wip-us.apache.org/repos/asf/calcite/blob/a56b84e3/file/src/test/resources/sales-csv/DEPTS.csv
----------------------------------------------------------------------
diff --git a/file/src/test/resources/sales-csv/DEPTS.csv b/file/src/test/resources/sales-csv/DEPTS.csv
new file mode 100644
index 0000000..b240228
--- /dev/null
+++ b/file/src/test/resources/sales-csv/DEPTS.csv
@@ -0,0 +1,4 @@
+DEPTNO:int,NAME:string
+10,"Sales"
+20,"Marketing"
+30,"Accounts"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/calcite/blob/a56b84e3/file/src/test/resources/sales-csv/EMPS.csv.gz
----------------------------------------------------------------------
diff --git a/file/src/test/resources/sales-csv/EMPS.csv.gz b/file/src/test/resources/sales-csv/EMPS.csv.gz
new file mode 100644
index 0000000..294bad4
Binary files /dev/null and b/file/src/test/resources/sales-csv/EMPS.csv.gz differ

http://git-wip-us.apache.org/repos/asf/calcite/blob/a56b84e3/file/src/test/resources/sales-csv/EMPTY.csv
----------------------------------------------------------------------
diff --git a/file/src/test/resources/sales-csv/EMPTY.csv b/file/src/test/resources/sales-csv/EMPTY.csv
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/calcite/blob/a56b84e3/file/src/test/resources/sales-csv/HEADER_ONLY.csv
----------------------------------------------------------------------
diff --git a/file/src/test/resources/sales-csv/HEADER_ONLY.csv b/file/src/test/resources/sales-csv/HEADER_ONLY.csv
new file mode 100644
index 0000000..9c71637
--- /dev/null
+++ b/file/src/test/resources/sales-csv/HEADER_ONLY.csv
@@ -0,0 +1 @@
+DEPTNO:int,NAME:string

http://git-wip-us.apache.org/repos/asf/calcite/blob/a56b84e3/file/src/test/resources/sales-csv/SDEPTS.csv
----------------------------------------------------------------------
diff --git a/file/src/test/resources/sales-csv/SDEPTS.csv b/file/src/test/resources/sales-csv/SDEPTS.csv
new file mode 100644
index 0000000..b555c42
--- /dev/null
+++ b/file/src/test/resources/sales-csv/SDEPTS.csv
@@ -0,0 +1,7 @@
+DEPTNO:int,NAME:string
+10,"Sales"
+20,"Marketing"
+30,"Accounts"
+40,"40"
+50,"50"
+60,"60"


Mime
View raw message