parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tians...@apache.org
Subject [1/3] incubator-parquet-mr git commit: PARQUET-117: implement the new page format for Parquet 2.0
Date Thu, 04 Dec 2014 21:16:24 GMT
Repository: incubator-parquet-mr
Updated Branches:
  refs/heads/master b5f6a3bd8 -> ccc29e4dd


http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/ccc29e4d/parquet-pig/src/test/java/parquet/pig/TupleConsumerPerfTest.java
----------------------------------------------------------------------
diff --git a/parquet-pig/src/test/java/parquet/pig/TupleConsumerPerfTest.java b/parquet-pig/src/test/java/parquet/pig/TupleConsumerPerfTest.java
index 66a51ae..68ad1fe 100644
--- a/parquet-pig/src/test/java/parquet/pig/TupleConsumerPerfTest.java
+++ b/parquet-pig/src/test/java/parquet/pig/TupleConsumerPerfTest.java
@@ -29,7 +29,7 @@ import org.apache.pig.parser.ParserException;
 
 import parquet.Log;
 import parquet.column.ParquetProperties.WriterVersion;
-import parquet.column.impl.ColumnWriteStoreImpl;
+import parquet.column.impl.ColumnWriteStoreV1;
 import parquet.column.page.PageReadStore;
 import parquet.column.page.mem.MemPageStore;
 import parquet.hadoop.api.ReadSupport.ReadContext;
@@ -56,11 +56,11 @@ public class TupleConsumerPerfTest {
     MessageType schema = new PigSchemaConverter().convert(Utils.getSchemaFromString(pigSchema));
 
     MemPageStore memPageStore = new MemPageStore(0);
-    ColumnWriteStoreImpl columns = new ColumnWriteStoreImpl(memPageStore, 50*1024*1024, 50*1024*1024,
50*1024*1024, false, WriterVersion.PARQUET_1_0);
+    ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(memPageStore, 50*1024*1024, 50*1024*1024,
50*1024*1024, false, WriterVersion.PARQUET_1_0);
     write(memPageStore, columns, schema, pigSchema);
     columns.flush();
     read(memPageStore, pigSchema, pigSchemaProjected, pigSchemaNoString);
-    System.out.println(columns.memSize()+" bytes used total");
+    System.out.println(columns.getBufferedSize()+" bytes used total");
     System.out.println("max col size: "+columns.maxColMemSize()+" bytes");
   }
 
@@ -153,7 +153,7 @@ public class TupleConsumerPerfTest {
     return map;
   }
 
-  private static void write(MemPageStore memPageStore, ColumnWriteStoreImpl columns, MessageType
schema, String pigSchemaString) throws ExecException, ParserException {
+  private static void write(MemPageStore memPageStore, ColumnWriteStoreV1 columns, MessageType
schema, String pigSchemaString) throws ExecException, ParserException {
     MessageColumnIO columnIO = newColumnFactory(pigSchemaString);
     TupleWriteSupport tupleWriter = TupleWriteSupport.fromPigSchema(pigSchemaString);
     tupleWriter.init(null);

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/ccc29e4d/parquet-thrift/src/test/java/parquet/thrift/TestParquetReadProtocol.java
----------------------------------------------------------------------
diff --git a/parquet-thrift/src/test/java/parquet/thrift/TestParquetReadProtocol.java b/parquet-thrift/src/test/java/parquet/thrift/TestParquetReadProtocol.java
index f14b7da..eb20412 100644
--- a/parquet-thrift/src/test/java/parquet/thrift/TestParquetReadProtocol.java
+++ b/parquet-thrift/src/test/java/parquet/thrift/TestParquetReadProtocol.java
@@ -36,7 +36,7 @@ import org.junit.Test;
 
 import parquet.Log;
 import parquet.column.ParquetProperties.WriterVersion;
-import parquet.column.impl.ColumnWriteStoreImpl;
+import parquet.column.impl.ColumnWriteStoreV1;
 import parquet.column.page.mem.MemPageStore;
 import parquet.io.ColumnIOFactory;
 import parquet.io.MessageColumnIO;
@@ -145,7 +145,7 @@ public class TestParquetReadProtocol {
     final MessageType schema = schemaConverter.convert(thriftClass);
     LOG.info(schema);
     final MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
-    final ColumnWriteStoreImpl columns = new ColumnWriteStoreImpl(memPageStore, 10000, 10000,
10000, false, WriterVersion.PARQUET_1_0);
+    final ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(memPageStore, 10000, 10000,
10000, false, WriterVersion.PARQUET_1_0);
     final RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
     final StructType thriftType = schemaConverter.toStructType(thriftClass);
     ParquetWriteProtocol parquetWriteProtocol = new ParquetWriteProtocol(recordWriter, columnIO,
thriftType);

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/ccc29e4d/parquet-tools/src/main/java/parquet/tools/command/DumpCommand.java
----------------------------------------------------------------------
diff --git a/parquet-tools/src/main/java/parquet/tools/command/DumpCommand.java b/parquet-tools/src/main/java/parquet/tools/command/DumpCommand.java
index 387c6bb..eb32057 100644
--- a/parquet-tools/src/main/java/parquet/tools/command/DumpCommand.java
+++ b/parquet-tools/src/main/java/parquet/tools/command/DumpCommand.java
@@ -20,7 +20,6 @@ import java.math.BigInteger;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
-
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -35,13 +34,14 @@ import org.apache.commons.cli.Options;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 
-import com.google.common.base.Joiner;
-
 import parquet.column.ColumnDescriptor;
 import parquet.column.ColumnReader;
 import parquet.column.impl.ColumnReadStoreImpl;
+import parquet.column.page.DataPage;
+import parquet.column.page.DataPage.Visitor;
+import parquet.column.page.DataPageV1;
+import parquet.column.page.DataPageV2;
 import parquet.column.page.DictionaryPage;
-import parquet.column.page.Page;
 import parquet.column.page.PageReadStore;
 import parquet.column.page.PageReader;
 import parquet.hadoop.ParquetFileReader;
@@ -57,6 +57,8 @@ import parquet.tools.util.MetadataUtils;
 import parquet.tools.util.PrettyPrintWriter;
 import parquet.tools.util.PrettyPrintWriter.WhiteSpaceHandler;
 
+import com.google.common.base.Joiner;
+
 public class DumpCommand extends ArgsOnlyCommand {
     private static final Charset UTF8 = Charset.forName("UTF-8");
     private static final CharsetDecoder UTF8_DECODER = UTF8.newDecoder();
@@ -227,7 +229,7 @@ public class DumpCommand extends ArgsOnlyCommand {
         }
     }
 
-    public static void dump(PrettyPrintWriter out, PageReadStore store, ColumnDescriptor
column) throws IOException {
+    public static void dump(final PrettyPrintWriter out, PageReadStore store, ColumnDescriptor
column) throws IOException {
         PageReader reader = store.getPageReader(column);
 
         long vc = reader.getTotalValueCount();
@@ -244,12 +246,26 @@ public class DumpCommand extends ArgsOnlyCommand {
         out.println();
         out.rule('-');
 
-        Page page = reader.readPage();
+        DataPage page = reader.readPage();
         for (long count = 0; page != null; count++) {
             out.format("page %d:", count);
-            out.format(" DLE:%s", page.getDlEncoding());
-            out.format(" RLE:%s", page.getRlEncoding());
-            out.format(" VLE:%s", page.getValueEncoding());
+            page.accept(new Visitor<Void>() {
+              @Override
+              public Void visit(DataPageV1 pageV1) {
+                out.format(" DLE:%s", pageV1.getDlEncoding());
+                out.format(" RLE:%s", pageV1.getRlEncoding());
+                out.format(" VLE:%s", pageV1.getValueEncoding());
+                return null;
+              }
+
+              @Override
+              public Void visit(DataPageV2 pageV2) {
+                out.format(" DLE:RLE");
+                out.format(" RLE:RLE");
+                out.format(" VLE:%s", pageV2.getDataEncoding());
+                return null;
+              }
+            });
             out.format(" SZ:%d", page.getUncompressedSize());
             out.format(" VC:%d", page.getValueCount());
             out.println();

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/ccc29e4d/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 5b419fb..0a04668 100644
--- a/pom.xml
+++ b/pom.xml
@@ -215,7 +215,7 @@
                    <previousVersion>${previous.version}</previousVersion>
                    <excludes>
                      <exclude>parquet/org/**</exclude>
-                     <exclude>parquet/column/values/**</exclude>
+                     <exclude>parquet/column/**</exclude>
                      <exclude>parquet/hadoop/ParquetInputSplit</exclude>
                    </excludes>
                  </requireBackwardCompatibility>


Mime
View raw message