tajo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hyun...@apache.org
Subject [06/50] [abbrv] git commit: TAJO-849: Add Parquet storage to HCatalogStore. (jaehwa)
Date Wed, 09 Jul 2014 04:10:23 GMT
TAJO-849: Add Parquet storage to HCatalogStore. (jaehwa)


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/64106a32
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/64106a32
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/64106a32

Branch: refs/heads/window_function
Commit: 64106a3223c882c260ba39edd53c85d1900b83a0
Parents: f781007
Author: blrunner <jhjung@gruter.com>
Authored: Mon Jun 2 16:14:37 2014 -0700
Committer: blrunner <jhjung@gruter.com>
Committed: Mon Jun 2 16:14:37 2014 -0700

----------------------------------------------------------------------
 CHANGES                                         |  2 ++
 .../tajo-catalog-drivers/tajo-hcatalog/pom.xml  |  7 +++++
 .../tajo/catalog/store/HCatalogStore.java       |  9 +++++-
 .../apache/tajo/catalog/store/HCatalogUtil.java |  5 +++-
 .../tajo/catalog/store/TestHCatalogStore.java   | 30 ++++++++++++++++++++
 tajo-dist/pom.xml                               |  8 ++++++
 6 files changed, 59 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/64106a32/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 84a83e8..d2436e9 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,8 @@ Release 0.9.0 - unreleased
 
   NEW FEATURES
 
+    TAJO-849: Add Parquet storage to HCatalogStore. (jaehwa)
+
     TAJO-494: Extend TajoClient to run a query with a plan context serialized 
     as the JSON form. (jihoon)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/64106a32/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/pom.xml
----------------------------------------------------------------------
diff --git a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/pom.xml b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/pom.xml
index a3cb99b..2c939d4 100644
--- a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/pom.xml
+++ b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/pom.xml
@@ -138,6 +138,8 @@
       </activation>
       <properties>
         <hive.version>0.12.0</hive.version>
+        <parquet.version>1.4.2</parquet.version>
+        <parquet.format.version>2.0.0</parquet.format.version>
       </properties>
       <dependencies>
         <dependency>
@@ -304,6 +306,11 @@
             </exclusion>
           </exclusions>
         </dependency>
+        <dependency>
+          <groupId>com.twitter</groupId>
+          <artifactId>parquet-hive-bundle</artifactId>
+          <version>${parquet.version}</version>
+        </dependency>
       </dependencies>
     </profile>
     <profile>

http://git-wip-us.apache.org/repos/asf/tajo/blob/64106a32/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogStore.java
----------------------------------------------------------------------
diff --git a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogStore.java
b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogStore.java
index 7924af1..3008ed9 100644
--- a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogStore.java
+++ b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogStore.java
@@ -537,7 +537,14 @@ public class HCatalogStore extends CatalogConstants implements CatalogStore
{
           table.getParameters().remove(StorageConstants.SEQUENCEFILE_NULL);
         }
       } else {
-        throw new CatalogException(new NotImplementedException(tableDesc.getMeta().getStoreType().name()));
+        if (tableDesc.getMeta().getStoreType().equals(CatalogProtos.StoreType.PARQUET)) {
+          sd.setInputFormat(parquet.hive.DeprecatedParquetInputFormat.class.getName());
+          sd.setOutputFormat(parquet.hive.DeprecatedParquetOutputFormat.class.getName());
+          sd.getSerdeInfo().setSerializationLib(parquet.hive.serde.ParquetHiveSerDe.class.getName());
+        } else {
+          throw new CatalogException(new NotImplementedException(tableDesc.getMeta().getStoreType
+              ().name()));
+        }
       }
 
       sd.setSortCols(new ArrayList<Order>());

http://git-wip-us.apache.org/repos/asf/tajo/blob/64106a32/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogUtil.java
----------------------------------------------------------------------
diff --git a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogUtil.java
b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogUtil.java
index 98aa7c5..9e60768 100644
--- a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogUtil.java
+++ b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/main/java/org/apache/tajo/catalog/store/HCatalogUtil.java
@@ -30,6 +30,7 @@ import org.apache.hcatalog.data.schema.HCatSchema;
 import org.apache.tajo.catalog.exception.CatalogException;
 import org.apache.tajo.catalog.proto.CatalogProtos;
 import org.apache.tajo.common.TajoDataTypes;
+import parquet.hadoop.mapred.DeprecatedParquetOutputFormat;
 
 public class HCatalogUtil {
   protected final Log LOG = LogFactory.getLog(getClass());
@@ -128,7 +129,9 @@ public class HCatalogUtil {
     } else if(outputFormatClass.equals(HiveSequenceFileOutputFormat.class.getSimpleName()))
{
       return CatalogProtos.StoreType.SEQUENCEFILE.name();
     } else if(outputFormatClass.equals(RCFileOutputFormat.class.getSimpleName())) {
-        return CatalogProtos.StoreType.RCFILE.name();
+      return CatalogProtos.StoreType.RCFILE.name();
+    } else if(outputFormatClass.equals(DeprecatedParquetOutputFormat.class.getSimpleName()))
{
+      return CatalogProtos.StoreType.PARQUET.name();
     } else {
       throw new CatalogException("Not supported file output format. - file output format:"
+ fileFormat);
     }

http://git-wip-us.apache.org/repos/asf/tajo/blob/64106a32/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/test/java/org/apache/tajo/catalog/store/TestHCatalogStore.java
----------------------------------------------------------------------
diff --git a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/test/java/org/apache/tajo/catalog/store/TestHCatalogStore.java
b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/test/java/org/apache/tajo/catalog/store/TestHCatalogStore.java
index 729184a..a507b08 100644
--- a/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/test/java/org/apache/tajo/catalog/store/TestHCatalogStore.java
+++ b/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/src/test/java/org/apache/tajo/catalog/store/TestHCatalogStore.java
@@ -369,4 +369,34 @@ public class TestHCatalogStore {
     store.dropTable(DB_NAME, REGION);
   }
 
+
+  @Test
+  public void testTableUsingParquet() throws Exception {
+    TableMeta meta = new TableMeta(CatalogProtos.StoreType.PARQUET, new KeyValueSet());
+
+    org.apache.tajo.catalog.Schema schema = new org.apache.tajo.catalog.Schema();
+    schema.addColumn("c_custkey", TajoDataTypes.Type.INT4);
+    schema.addColumn("c_name", TajoDataTypes.Type.TEXT);
+    schema.addColumn("c_address", TajoDataTypes.Type.TEXT);
+    schema.addColumn("c_nationkey", TajoDataTypes.Type.INT4);
+    schema.addColumn("c_phone", TajoDataTypes.Type.TEXT);
+    schema.addColumn("c_acctbal", TajoDataTypes.Type.FLOAT8);
+    schema.addColumn("c_mktsegment", TajoDataTypes.Type.TEXT);
+    schema.addColumn("c_comment", TajoDataTypes.Type.TEXT);
+
+    TableDesc table = new TableDesc(CatalogUtil.buildFQName(DB_NAME, CUSTOMER), schema, meta,
+        new Path(warehousePath, new Path(DB_NAME, CUSTOMER)));
+    store.createTable(table.getProto());
+    assertTrue(store.existTable(DB_NAME, CUSTOMER));
+
+    TableDesc table1 = new TableDesc(store.getTable(DB_NAME, CUSTOMER));
+    assertEquals(table.getName(), table1.getName());
+    assertEquals(table.getPath(), table1.getPath());
+    assertEquals(table.getSchema().size(), table1.getSchema().size());
+    for (int i = 0; i < table.getSchema().size(); i++) {
+      assertEquals(table.getSchema().getColumn(i).getSimpleName(), table1.getSchema().getColumn(i).getSimpleName());
+    }
+
+    store.dropTable(DB_NAME, CUSTOMER);
+  }
 }

http://git-wip-us.apache.org/repos/asf/tajo/blob/64106a32/tajo-dist/pom.xml
----------------------------------------------------------------------
diff --git a/tajo-dist/pom.xml b/tajo-dist/pom.xml
index c826c57..103bbac 100644
--- a/tajo-dist/pom.xml
+++ b/tajo-dist/pom.xml
@@ -120,6 +120,14 @@
                       run cp -r $ROOT/tajo-jdbc/target/tajo-jdbc-${project.version}.jar ./share/jdbc-dist
                       run cp -r $ROOT/tajo-jdbc/target/lib/* ./share/jdbc-dist
 
+                      if [ -f $ROOT/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/target/lib/parquet-hive-bundle-*.jar
]
+                      then
+                      run cp -r $ROOT/tajo-catalog/tajo-catalog-drivers/tajo-hcatalog/target/lib/parquet-hive-bundle-*.jar
lib/
+                      echo
+                      echo "Tajo installed parquet-hive-bundle library at: ${project.build.directory}/tajo-${project.version}"
+                      echo
+                      fi
+
                       echo
                       echo "Tajo dist layout available at: ${project.build.directory}/tajo-${project.version}"
                       echo


Mime
View raw message