From commits-return-7679-archive-asf-public=cust-asf.ponee.io@kudu.apache.org Wed Jul 10 04:27:14 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id C117F18062B for ; Wed, 10 Jul 2019 06:27:13 +0200 (CEST) Received: (qmail 54069 invoked by uid 500); 10 Jul 2019 04:27:13 -0000 Mailing-List: contact commits-help@kudu.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@kudu.apache.org Delivered-To: mailing list commits@kudu.apache.org Received: (qmail 54058 invoked by uid 99); 10 Jul 2019 04:27:12 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 10 Jul 2019 04:27:12 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id BA4AF87AD7; Wed, 10 Jul 2019 04:27:07 +0000 (UTC) Date: Wed, 10 Jul 2019 04:27:07 +0000 To: "commits@kudu.apache.org" Subject: [kudu] branch master updated: Extend benchmark for ColumnarRowBlockToPB. MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <156273282771.27140.15844676436721468267@gitbox.apache.org> From: todd@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: kudu X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: 0892c19425bd1a9e3eb293d93a43e1ce38dc3d2d X-Git-Newrev: 3cbc0d4fbe295748d6ffdf1e5e7edeaf94ef0911 X-Git-Rev: 3cbc0d4fbe295748d6ffdf1e5e7edeaf94ef0911 X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. todd pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git The following commit(s) were added to refs/heads/master by this push: new 3cbc0d4 Extend benchmark for ColumnarRowBlockToPB. 3cbc0d4 is described below commit 3cbc0d4fbe295748d6ffdf1e5e7edeaf94ef0911 Author: triplesheep AuthorDate: Thu Jun 27 07:40:09 2019 +0000 Extend benchmark for ColumnarRowBlockToPB. Extend the origin ColumnarRowBlockToPB benchmark in two aspects. Both column scale and selected row count are configurable. Change-Id: Ie29937c316be624151e6c51e54545c4f023e603d Reviewed-on: http://gerrit.cloudera.org:8080/13749 Reviewed-by: Todd Lipcon Tested-by: Kudu Jenkins --- src/kudu/common/wire_protocol-test.cc | 103 +++++++++++++++++++++++++++++----- 1 file changed, 90 insertions(+), 13 deletions(-) diff --git a/src/kudu/common/wire_protocol-test.cc b/src/kudu/common/wire_protocol-test.cc index 4885b96..f09fa5d 100644 --- a/src/kudu/common/wire_protocol-test.cc +++ b/src/kudu/common/wire_protocol-test.cc @@ -17,9 +17,12 @@ #include "kudu/common/wire_protocol.h" -#include +#include #include +#include #include +#include +#include #include #include @@ -32,8 +35,10 @@ #include "kudu/common/row.h" #include "kudu/common/rowblock.h" #include "kudu/common/schema.h" +#include "kudu/common/types.h" #include "kudu/common/wire_protocol.pb.h" #include "kudu/gutil/port.h" +#include "kudu/gutil/strings/substitute.h" #include "kudu/util/bitmap.h" #include "kudu/util/bloom_filter.h" #include "kudu/util/faststring.h" @@ -50,6 +55,7 @@ using std::string; using std::unique_ptr; using std::vector; +using strings::Substitute; namespace kudu { @@ -83,8 +89,84 @@ class WireProtocolTest : public KuduTest { row.cell(2).set_null(false); } } + + void ResetBenchmarkSchema(int num_columns) { + vector column_schemas; + column_schemas.reserve(num_columns); + for (int i = 0; i < num_columns; i++) { + column_schemas.emplace_back(Substitute("col$0", i), i % 2 ? STRING : INT32); + } + benchmark_schema_.Reset(column_schemas, 1); + } + + void FillRowBlockForBenchmark(RowBlock* block) { + test_data_arena_.Reset(); + for (int i = 0; i < block->nrows(); i++) { + RowBlockRow row = block->row(i); + for (int j = 0; j < benchmark_schema_.num_columns(); j++) { + const ColumnSchema& column_schema = benchmark_schema_.column(j); + DataType type = column_schema.type_info()->type(); + if (type == STRING) { + Slice col; + CHECK(test_data_arena_.RelocateSlice(Substitute("hello world $0", + column_schema.name()), &col)); + memcpy(row.mutable_cell_ptr(j), &col, sizeof(Slice)); + } else if (type == INT32) { + memcpy(row.mutable_cell_ptr(j), &i, sizeof(int32_t)); + } else { + LOG(FATAL) << "Unexpected type."; + } + } + } + } + + void SelectRandomRowsWithRate(RowBlock* block, double rate) { + CHECK_LE(rate, 1.0); + CHECK_GE(rate, 0.0); + int select_count = block->nrows() * rate; + SelectionVector* select_vector = block->selection_vector(); + if (rate == 1.0) { + select_vector->SetAllTrue(); + } else if (rate == 0.0) { + select_vector->SetAllFalse(); + } else { + vector indexes(block->nrows()); + std::iota(indexes.begin(), indexes.end(), 0); + std::random_shuffle(indexes.begin(), indexes.end()); + indexes.resize(select_count); + select_vector->SetAllFalse(); + for (auto index : indexes) { + select_vector->SetRowSelected(index); + } + } + CHECK_EQ(select_vector->CountSelected(), select_count); + } + + // Use column_count to control the schema scale. + // Use select_rate to control the number of selected rows. + void RunBenchmark(int column_count, double select_rate) { + ResetBenchmarkSchema(column_count); + Arena arena(1024); + const int kNumTrials = AllowSlowTests() ? 100 : 10; + RowBlock block(&benchmark_schema_, 10000, &arena); + FillRowBlockForBenchmark(&block); + SelectRandomRowsWithRate(&block, select_rate); + + RowwiseRowBlockPB pb; + faststring direct, indirect; + LOG_TIMING(INFO, Substitute("Converting to PB with column count $0 and row select rate $1 ", + column_count, select_rate)) { + for (int i = 0; i < kNumTrials; ++i) { + pb.Clear(); + direct.clear(); + indirect.clear(); + SerializeRowBlock(block, &pb, nullptr, &direct, &indirect); + } + } + } protected: Schema schema_; + Schema benchmark_schema_; Arena test_data_arena_; }; @@ -340,18 +422,13 @@ TEST_F(WireProtocolTest, TestColumnarRowBlockToPBWithPadding) { #ifdef NDEBUG TEST_F(WireProtocolTest, TestColumnarRowBlockToPBBenchmark) { - Arena arena(1024); - const int kNumTrials = AllowSlowTests() ? 100 : 10; - RowBlock block(&schema_, 10000 * kNumTrials, &arena); - FillRowBlockWithTestRows(&block); - - RowwiseRowBlockPB pb; - - LOG_TIMING(INFO, "Converting to PB") { - for (int i = 0; i < kNumTrials; i++) { - pb.Clear(); - faststring direct, indirect; - SerializeRowBlock(block, &pb, NULL, &direct, &indirect); + // Can set column_counts = {3, 30, 300} together with + // select_rates = {1.0, 0.8, 0.5, 0.2} for benchmark. + vector column_counts = {3}; + vector select_rates = {1.0}; + for (auto column_count : column_counts) { + for (auto select_rate : select_rates) { + RunBenchmark(column_count, select_rate); } } }