parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ziva...@apache.org
Subject [parquet-mr] branch master updated: Experiment.
Date Wed, 21 Nov 2018 17:20:43 GMT
This is an automated email from the ASF dual-hosted git repository.

zivanfi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 97a880c  Experiment.
97a880c is described below

commit 97a880cfc4fc3c2c74ff1302bc6e4aab1582b6df
Author: Zoltan Ivanfi <zi@cloudera.com>
AuthorDate: Fri Oct 26 15:08:18 2018 +0200

    Experiment.
---
 parquet-benchmarks/run.sh                          |  4 ++-
 .../apache/parquet/benchmarks/WriteBenchmarks.java | 16 +++++-----
 ...enchmarks.java => WriteBenchmarksParquet1.java} | 36 +++++++++++-----------
 .../hadoop/InternalParquetRecordWriter.java        |  2 +-
 4 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/parquet-benchmarks/run.sh b/parquet-benchmarks/run.sh
index cfaddae..dfb0297 100755
--- a/parquet-benchmarks/run.sh
+++ b/parquet-benchmarks/run.sh
@@ -22,7 +22,9 @@
 SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P )
 
 echo "Starting WRITE benchmarks"
-java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Write* "$@"
+java -XX:+PreserveFramePointer -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Write*
-wi 0 -i 1 -f 3 -tu s -bm ss -rf json
+exit 0
+
 echo "Generating test data"
 java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator
generate
 echo "Data generated, starting READ benchmarks"
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
index 5c26a84..265c5ec 100644
--- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
@@ -55,7 +55,7 @@ public class WriteBenchmarks {
                                PAGE_SIZE_DEFAULT,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               ONE_MILLION);
+                               50 * ONE_MILLION);
   }
 
   @Benchmark
@@ -69,7 +69,7 @@ public class WriteBenchmarks {
                                PAGE_SIZE_4M,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               ONE_MILLION);
+                               50 * ONE_MILLION);
   }
 
   @Benchmark
@@ -83,7 +83,7 @@ public class WriteBenchmarks {
                                PAGE_SIZE_8M,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               ONE_MILLION);
+                               50 * ONE_MILLION);
   }
 
   @Benchmark
@@ -97,7 +97,7 @@ public class WriteBenchmarks {
                                PAGE_SIZE_4M,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               ONE_MILLION);
+                               50 * ONE_MILLION);
   }
 
   @Benchmark
@@ -111,7 +111,7 @@ public class WriteBenchmarks {
                                PAGE_SIZE_8M,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               ONE_MILLION);
+                               50 * ONE_MILLION);
   }
 
   //TODO how to handle lzo jar?
@@ -126,7 +126,7 @@ public class WriteBenchmarks {
 //            PAGE_SIZE_DEFAULT,
 //            FIXED_LEN_BYTEARRAY_SIZE,
 //            LZO,
-//            ONE_MILLION);
+//            50 * ONE_MILLION);
 //  }
 
   @Benchmark
@@ -140,7 +140,7 @@ public class WriteBenchmarks {
                                PAGE_SIZE_DEFAULT,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                SNAPPY,
-                               ONE_MILLION);
+                               50 * ONE_MILLION);
   }
 
   @Benchmark
@@ -154,6 +154,6 @@ public class WriteBenchmarks {
                                PAGE_SIZE_DEFAULT,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                GZIP,
-                               ONE_MILLION);
+                               50 * ONE_MILLION);
   }
 }
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java
similarity index 86%
copy from parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
copy to parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java
index 5c26a84..edd87ba 100644
--- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java
@@ -29,13 +29,13 @@ import static org.apache.parquet.benchmarks.BenchmarkFiles.*;
 
 import java.io.IOException;
 
-import static org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_2_0;
+import static org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_1_0;
 import static org.apache.parquet.hadoop.metadata.CompressionCodecName.GZIP;
 import static org.apache.parquet.hadoop.metadata.CompressionCodecName.SNAPPY;
 import static org.apache.parquet.hadoop.metadata.CompressionCodecName.UNCOMPRESSED;
 
 @State(Thread)
-public class WriteBenchmarks {
+public class WriteBenchmarksParquet1 {
   private DataGenerator dataGenerator = new DataGenerator();
 
   @Setup(Level.Iteration)
@@ -50,12 +50,12 @@ public class WriteBenchmarks {
   {
     dataGenerator.generateData(file_1M,
                                configuration,
-                               PARQUET_2_0,
+                               PARQUET_1_0,
                                BLOCK_SIZE_DEFAULT,
                                PAGE_SIZE_DEFAULT,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               ONE_MILLION);
+                               50 * ONE_MILLION);
   }
 
   @Benchmark
@@ -64,12 +64,12 @@ public class WriteBenchmarks {
   {
     dataGenerator.generateData(file_1M_BS256M_PS4M,
                                configuration,
-                               PARQUET_2_0,
+                               PARQUET_1_0,
                                BLOCK_SIZE_256M,
                                PAGE_SIZE_4M,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               ONE_MILLION);
+                               50 * ONE_MILLION);
   }
 
   @Benchmark
@@ -78,12 +78,12 @@ public class WriteBenchmarks {
   {
     dataGenerator.generateData(file_1M_BS256M_PS8M,
                                configuration,
-                               PARQUET_2_0,
+                               PARQUET_1_0,
                                BLOCK_SIZE_256M,
                                PAGE_SIZE_8M,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               ONE_MILLION);
+                               50 * ONE_MILLION);
   }
 
   @Benchmark
@@ -92,12 +92,12 @@ public class WriteBenchmarks {
   {
     dataGenerator.generateData(file_1M_BS512M_PS4M,
                                configuration,
-                               PARQUET_2_0,
+                               PARQUET_1_0,
                                BLOCK_SIZE_512M,
                                PAGE_SIZE_4M,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               ONE_MILLION);
+                               50 * ONE_MILLION);
   }
 
   @Benchmark
@@ -106,12 +106,12 @@ public class WriteBenchmarks {
   {
     dataGenerator.generateData(file_1M_BS512M_PS8M,
                                configuration,
-                               PARQUET_2_0,
+                               PARQUET_1_0,
                                BLOCK_SIZE_512M,
                                PAGE_SIZE_8M,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               ONE_MILLION);
+                               50 * ONE_MILLION);
   }
 
   //TODO how to handle lzo jar?
@@ -121,12 +121,12 @@ public class WriteBenchmarks {
 //  {
 //    dataGenerator.generateData(parquetFile_1M_LZO,
 //            configuration,
-//            WriterVersion.PARQUET_2_0,
+//            WriterVersion.PARQUET_1_0,
 //            BLOCK_SIZE_DEFAULT,
 //            PAGE_SIZE_DEFAULT,
 //            FIXED_LEN_BYTEARRAY_SIZE,
 //            LZO,
-//            ONE_MILLION);
+//            50 * ONE_MILLION);
 //  }
 
   @Benchmark
@@ -135,12 +135,12 @@ public class WriteBenchmarks {
   {
     dataGenerator.generateData(file_1M_SNAPPY,
                                configuration,
-                               PARQUET_2_0,
+                               PARQUET_1_0,
                                BLOCK_SIZE_DEFAULT,
                                PAGE_SIZE_DEFAULT,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                SNAPPY,
-                               ONE_MILLION);
+                               50 * ONE_MILLION);
   }
 
   @Benchmark
@@ -149,11 +149,11 @@ public class WriteBenchmarks {
   {
     dataGenerator.generateData(file_1M_GZIP,
                                configuration,
-                               PARQUET_2_0,
+                               PARQUET_1_0,
                                BLOCK_SIZE_DEFAULT,
                                PAGE_SIZE_DEFAULT,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                GZIP,
-                               ONE_MILLION);
+                               50 * ONE_MILLION);
   }
 }
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
index d8af379..8b79dca 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
@@ -43,7 +43,7 @@ class InternalParquetRecordWriter<T> {
   private static final Logger LOG = LoggerFactory.getLogger(InternalParquetRecordWriter.class);
 
   private static final int MINIMUM_RECORD_COUNT_FOR_CHECK = 100;
-  private static final int MAXIMUM_RECORD_COUNT_FOR_CHECK = 10000;
+  private static final int MAXIMUM_RECORD_COUNT_FOR_CHECK = 100;
 
   private final ParquetFileWriter parquetFileWriter;
   private final WriteSupport<T> writeSupport;


Mime
View raw message