From commits-return-1460-archive-asf-public=cust-asf.ponee.io@parquet.apache.org Wed Nov 21 18:20:45 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id C1274180668 for ; Wed, 21 Nov 2018 18:20:44 +0100 (CET) Received: (qmail 17888 invoked by uid 500); 21 Nov 2018 17:20:43 -0000 Mailing-List: contact commits-help@parquet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@parquet.apache.org Delivered-To: mailing list commits@parquet.apache.org Received: (qmail 17875 invoked by uid 99); 21 Nov 2018 17:20:43 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 21 Nov 2018 17:20:43 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 4C3ED854E4; Wed, 21 Nov 2018 17:20:43 +0000 (UTC) Date: Wed, 21 Nov 2018 17:20:43 +0000 To: "commits@parquet.apache.org" Subject: [parquet-mr] branch master updated: Experiment. MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <154282084320.17667.10981342395204889605@gitbox.apache.org> From: zivanfi@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: parquet-mr X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: a7b5a4b24a3e17edce9273a0654e799075c86dbe X-Git-Newrev: 97a880cfc4fc3c2c74ff1302bc6e4aab1582b6df X-Git-Rev: 97a880cfc4fc3c2c74ff1302bc6e4aab1582b6df X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. zivanfi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/parquet-mr.git The following commit(s) were added to refs/heads/master by this push: new 97a880c Experiment. 97a880c is described below commit 97a880cfc4fc3c2c74ff1302bc6e4aab1582b6df Author: Zoltan Ivanfi AuthorDate: Fri Oct 26 15:08:18 2018 +0200 Experiment. --- parquet-benchmarks/run.sh | 4 ++- .../apache/parquet/benchmarks/WriteBenchmarks.java | 16 +++++----- ...enchmarks.java => WriteBenchmarksParquet1.java} | 36 +++++++++++----------- .../hadoop/InternalParquetRecordWriter.java | 2 +- 4 files changed, 30 insertions(+), 28 deletions(-) diff --git a/parquet-benchmarks/run.sh b/parquet-benchmarks/run.sh index cfaddae..dfb0297 100755 --- a/parquet-benchmarks/run.sh +++ b/parquet-benchmarks/run.sh @@ -22,7 +22,9 @@ SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P ) echo "Starting WRITE benchmarks" -java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Write* "$@" +java -XX:+PreserveFramePointer -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Write* -wi 0 -i 1 -f 3 -tu s -bm ss -rf json +exit 0 + echo "Generating test data" java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar org.apache.parquet.benchmarks.DataGenerator generate echo "Data generated, starting READ benchmarks" diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java index 5c26a84..265c5ec 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java @@ -55,7 +55,7 @@ public class WriteBenchmarks { PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -69,7 +69,7 @@ public class WriteBenchmarks { PAGE_SIZE_4M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -83,7 +83,7 @@ public class WriteBenchmarks { PAGE_SIZE_8M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -97,7 +97,7 @@ public class WriteBenchmarks { PAGE_SIZE_4M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -111,7 +111,7 @@ public class WriteBenchmarks { PAGE_SIZE_8M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } //TODO how to handle lzo jar? @@ -126,7 +126,7 @@ public class WriteBenchmarks { // PAGE_SIZE_DEFAULT, // FIXED_LEN_BYTEARRAY_SIZE, // LZO, -// ONE_MILLION); +// 50 * ONE_MILLION); // } @Benchmark @@ -140,7 +140,7 @@ public class WriteBenchmarks { PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, SNAPPY, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -154,6 +154,6 @@ public class WriteBenchmarks { PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, GZIP, - ONE_MILLION); + 50 * ONE_MILLION); } } diff --git a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java similarity index 86% copy from parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java copy to parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java index 5c26a84..edd87ba 100644 --- a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java +++ b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java @@ -29,13 +29,13 @@ import static org.apache.parquet.benchmarks.BenchmarkFiles.*; import java.io.IOException; -import static org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_2_0; +import static org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_1_0; import static org.apache.parquet.hadoop.metadata.CompressionCodecName.GZIP; import static org.apache.parquet.hadoop.metadata.CompressionCodecName.SNAPPY; import static org.apache.parquet.hadoop.metadata.CompressionCodecName.UNCOMPRESSED; @State(Thread) -public class WriteBenchmarks { +public class WriteBenchmarksParquet1 { private DataGenerator dataGenerator = new DataGenerator(); @Setup(Level.Iteration) @@ -50,12 +50,12 @@ public class WriteBenchmarks { { dataGenerator.generateData(file_1M, configuration, - PARQUET_2_0, + PARQUET_1_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -64,12 +64,12 @@ public class WriteBenchmarks { { dataGenerator.generateData(file_1M_BS256M_PS4M, configuration, - PARQUET_2_0, + PARQUET_1_0, BLOCK_SIZE_256M, PAGE_SIZE_4M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -78,12 +78,12 @@ public class WriteBenchmarks { { dataGenerator.generateData(file_1M_BS256M_PS8M, configuration, - PARQUET_2_0, + PARQUET_1_0, BLOCK_SIZE_256M, PAGE_SIZE_8M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -92,12 +92,12 @@ public class WriteBenchmarks { { dataGenerator.generateData(file_1M_BS512M_PS4M, configuration, - PARQUET_2_0, + PARQUET_1_0, BLOCK_SIZE_512M, PAGE_SIZE_4M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -106,12 +106,12 @@ public class WriteBenchmarks { { dataGenerator.generateData(file_1M_BS512M_PS8M, configuration, - PARQUET_2_0, + PARQUET_1_0, BLOCK_SIZE_512M, PAGE_SIZE_8M, FIXED_LEN_BYTEARRAY_SIZE, UNCOMPRESSED, - ONE_MILLION); + 50 * ONE_MILLION); } //TODO how to handle lzo jar? @@ -121,12 +121,12 @@ public class WriteBenchmarks { // { // dataGenerator.generateData(parquetFile_1M_LZO, // configuration, -// WriterVersion.PARQUET_2_0, +// WriterVersion.PARQUET_1_0, // BLOCK_SIZE_DEFAULT, // PAGE_SIZE_DEFAULT, // FIXED_LEN_BYTEARRAY_SIZE, // LZO, -// ONE_MILLION); +// 50 * ONE_MILLION); // } @Benchmark @@ -135,12 +135,12 @@ public class WriteBenchmarks { { dataGenerator.generateData(file_1M_SNAPPY, configuration, - PARQUET_2_0, + PARQUET_1_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, SNAPPY, - ONE_MILLION); + 50 * ONE_MILLION); } @Benchmark @@ -149,11 +149,11 @@ public class WriteBenchmarks { { dataGenerator.generateData(file_1M_GZIP, configuration, - PARQUET_2_0, + PARQUET_1_0, BLOCK_SIZE_DEFAULT, PAGE_SIZE_DEFAULT, FIXED_LEN_BYTEARRAY_SIZE, GZIP, - ONE_MILLION); + 50 * ONE_MILLION); } } diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java index d8af379..8b79dca 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java @@ -43,7 +43,7 @@ class InternalParquetRecordWriter { private static final Logger LOG = LoggerFactory.getLogger(InternalParquetRecordWriter.class); private static final int MINIMUM_RECORD_COUNT_FOR_CHECK = 100; - private static final int MAXIMUM_RECORD_COUNT_FOR_CHECK = 10000; + private static final int MAXIMUM_RECORD_COUNT_FOR_CHECK = 100; private final ParquetFileWriter parquetFileWriter; private final WriteSupport writeSupport;