parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ziva...@apache.org
Subject [parquet-mr] branch master updated: Revert "Experiment."
Date Wed, 21 Nov 2018 17:28:00 GMT
This is an automated email from the ASF dual-hosted git repository.

zivanfi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 4d9a2fd  Revert "Experiment."
4d9a2fd is described below

commit 4d9a2fd01f33858bd5eb392a5f7bd0967fbec3f8
Author: Zoltan Ivanfi <zi@cloudera.com>
AuthorDate: Wed Nov 21 18:27:42 2018 +0100

    Revert "Experiment."
    
    This reverts commit 97a880cfc4fc3c2c74ff1302bc6e4aab1582b6df.
---
 parquet-benchmarks/run.sh                          |   4 +-
 .../apache/parquet/benchmarks/WriteBenchmarks.java |  16 +--
 .../benchmarks/WriteBenchmarksParquet1.java        | 159 ---------------------
 .../hadoop/InternalParquetRecordWriter.java        |   2 +-
 4 files changed, 10 insertions(+), 171 deletions(-)

diff --git a/parquet-benchmarks/run.sh b/parquet-benchmarks/run.sh
index dfb0297..cfaddae 100755
--- a/parquet-benchmarks/run.sh
+++ b/parquet-benchmarks/run.sh
@@ -22,9 +22,7 @@
 SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P )
 
 echo "Starting WRITE benchmarks"
-java -XX:+PreserveFramePointer -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Write*
-wi 0 -i 1 -f 3 -tu s -bm ss -rf json
-exit 0
-
+java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Write* "$@"
 echo "Generating test data"
 java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator
generate
 echo "Data generated, starting READ benchmarks"
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
index 265c5ec..5c26a84 100644
--- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
+++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
@@ -55,7 +55,7 @@ public class WriteBenchmarks {
                                PAGE_SIZE_DEFAULT,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               50 * ONE_MILLION);
+                               ONE_MILLION);
   }
 
   @Benchmark
@@ -69,7 +69,7 @@ public class WriteBenchmarks {
                                PAGE_SIZE_4M,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               50 * ONE_MILLION);
+                               ONE_MILLION);
   }
 
   @Benchmark
@@ -83,7 +83,7 @@ public class WriteBenchmarks {
                                PAGE_SIZE_8M,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               50 * ONE_MILLION);
+                               ONE_MILLION);
   }
 
   @Benchmark
@@ -97,7 +97,7 @@ public class WriteBenchmarks {
                                PAGE_SIZE_4M,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               50 * ONE_MILLION);
+                               ONE_MILLION);
   }
 
   @Benchmark
@@ -111,7 +111,7 @@ public class WriteBenchmarks {
                                PAGE_SIZE_8M,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                UNCOMPRESSED,
-                               50 * ONE_MILLION);
+                               ONE_MILLION);
   }
 
   //TODO how to handle lzo jar?
@@ -126,7 +126,7 @@ public class WriteBenchmarks {
 //            PAGE_SIZE_DEFAULT,
 //            FIXED_LEN_BYTEARRAY_SIZE,
 //            LZO,
-//            50 * ONE_MILLION);
+//            ONE_MILLION);
 //  }
 
   @Benchmark
@@ -140,7 +140,7 @@ public class WriteBenchmarks {
                                PAGE_SIZE_DEFAULT,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                SNAPPY,
-                               50 * ONE_MILLION);
+                               ONE_MILLION);
   }
 
   @Benchmark
@@ -154,6 +154,6 @@ public class WriteBenchmarks {
                                PAGE_SIZE_DEFAULT,
                                FIXED_LEN_BYTEARRAY_SIZE,
                                GZIP,
-                               50 * ONE_MILLION);
+                               ONE_MILLION);
   }
 }
diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java
b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java
deleted file mode 100644
index edd87ba..0000000
--- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.parquet.benchmarks;
-
-import org.openjdk.jmh.annotations.Benchmark;
-import org.openjdk.jmh.annotations.Level;
-import org.openjdk.jmh.annotations.Setup;
-import org.openjdk.jmh.annotations.State;
-
-import static org.openjdk.jmh.annotations.Scope.Thread;
-import static org.apache.parquet.benchmarks.BenchmarkConstants.*;
-import static org.apache.parquet.benchmarks.BenchmarkFiles.*;
-
-import java.io.IOException;
-
-import static org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_1_0;
-import static org.apache.parquet.hadoop.metadata.CompressionCodecName.GZIP;
-import static org.apache.parquet.hadoop.metadata.CompressionCodecName.SNAPPY;
-import static org.apache.parquet.hadoop.metadata.CompressionCodecName.UNCOMPRESSED;
-
-@State(Thread)
-public class WriteBenchmarksParquet1 {
-  private DataGenerator dataGenerator = new DataGenerator();
-
-  @Setup(Level.Iteration)
-  public void cleanup() {
-    //clean existing test data at the beginning of each iteration
-    dataGenerator.cleanup();
-  }
-
-  @Benchmark
-  public void write1MRowsDefaultBlockAndPageSizeUncompressed()
-          throws IOException
-  {
-    dataGenerator.generateData(file_1M,
-                               configuration,
-                               PARQUET_1_0,
-                               BLOCK_SIZE_DEFAULT,
-                               PAGE_SIZE_DEFAULT,
-                               FIXED_LEN_BYTEARRAY_SIZE,
-                               UNCOMPRESSED,
-                               50 * ONE_MILLION);
-  }
-
-  @Benchmark
-  public void write1MRowsBS256MPS4MUncompressed()
-          throws IOException
-  {
-    dataGenerator.generateData(file_1M_BS256M_PS4M,
-                               configuration,
-                               PARQUET_1_0,
-                               BLOCK_SIZE_256M,
-                               PAGE_SIZE_4M,
-                               FIXED_LEN_BYTEARRAY_SIZE,
-                               UNCOMPRESSED,
-                               50 * ONE_MILLION);
-  }
-
-  @Benchmark
-  public void write1MRowsBS256MPS8MUncompressed()
-          throws IOException
-  {
-    dataGenerator.generateData(file_1M_BS256M_PS8M,
-                               configuration,
-                               PARQUET_1_0,
-                               BLOCK_SIZE_256M,
-                               PAGE_SIZE_8M,
-                               FIXED_LEN_BYTEARRAY_SIZE,
-                               UNCOMPRESSED,
-                               50 * ONE_MILLION);
-  }
-
-  @Benchmark
-  public void write1MRowsBS512MPS4MUncompressed()
-          throws IOException
-  {
-    dataGenerator.generateData(file_1M_BS512M_PS4M,
-                               configuration,
-                               PARQUET_1_0,
-                               BLOCK_SIZE_512M,
-                               PAGE_SIZE_4M,
-                               FIXED_LEN_BYTEARRAY_SIZE,
-                               UNCOMPRESSED,
-                               50 * ONE_MILLION);
-  }
-
-  @Benchmark
-  public void write1MRowsBS512MPS8MUncompressed()
-          throws IOException
-  {
-    dataGenerator.generateData(file_1M_BS512M_PS8M,
-                               configuration,
-                               PARQUET_1_0,
-                               BLOCK_SIZE_512M,
-                               PAGE_SIZE_8M,
-                               FIXED_LEN_BYTEARRAY_SIZE,
-                               UNCOMPRESSED,
-                               50 * ONE_MILLION);
-  }
-
-  //TODO how to handle lzo jar?
-//  @Benchmark
-//  public void write1MRowsDefaultBlockAndPageSizeLZO()
-//          throws IOException
-//  {
-//    dataGenerator.generateData(parquetFile_1M_LZO,
-//            configuration,
-//            WriterVersion.PARQUET_1_0,
-//            BLOCK_SIZE_DEFAULT,
-//            PAGE_SIZE_DEFAULT,
-//            FIXED_LEN_BYTEARRAY_SIZE,
-//            LZO,
-//            50 * ONE_MILLION);
-//  }
-
-  @Benchmark
-  public void write1MRowsDefaultBlockAndPageSizeSNAPPY()
-          throws IOException
-  {
-    dataGenerator.generateData(file_1M_SNAPPY,
-                               configuration,
-                               PARQUET_1_0,
-                               BLOCK_SIZE_DEFAULT,
-                               PAGE_SIZE_DEFAULT,
-                               FIXED_LEN_BYTEARRAY_SIZE,
-                               SNAPPY,
-                               50 * ONE_MILLION);
-  }
-
-  @Benchmark
-  public void write1MRowsDefaultBlockAndPageSizeGZIP()
-          throws IOException
-  {
-    dataGenerator.generateData(file_1M_GZIP,
-                               configuration,
-                               PARQUET_1_0,
-                               BLOCK_SIZE_DEFAULT,
-                               PAGE_SIZE_DEFAULT,
-                               FIXED_LEN_BYTEARRAY_SIZE,
-                               GZIP,
-                               50 * ONE_MILLION);
-  }
-}
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
index 8b79dca..d8af379 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
@@ -43,7 +43,7 @@ class InternalParquetRecordWriter<T> {
   private static final Logger LOG = LoggerFactory.getLogger(InternalParquetRecordWriter.class);
 
   private static final int MINIMUM_RECORD_COUNT_FOR_CHECK = 100;
-  private static final int MAXIMUM_RECORD_COUNT_FOR_CHECK = 100;
+  private static final int MAXIMUM_RECORD_COUNT_FOR_CHECK = 10000;
 
   private final ParquetFileWriter parquetFileWriter;
   private final WriteSupport<T> writeSupport;


Mime
View raw message