Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 50438200B6F for ; Wed, 10 Aug 2016 04:53:14 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 4EB07160AB0; Wed, 10 Aug 2016 02:53:14 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 713DB160AA5 for ; Wed, 10 Aug 2016 04:53:13 +0200 (CEST) Received: (qmail 72501 invoked by uid 500); 10 Aug 2016 02:53:12 -0000 Mailing-List: contact commits-help@kudu.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@kudu.apache.org Delivered-To: mailing list commits@kudu.apache.org Received: (qmail 72492 invoked by uid 99); 10 Aug 2016 02:53:12 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 10 Aug 2016 02:53:12 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 7E409E03E8; Wed, 10 Aug 2016 02:53:12 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: danburkert@apache.org To: commits@kudu.apache.org Date: Wed, 10 Aug 2016 02:53:12 -0000 Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: [1/2] kudu git commit: cfile-test: some test micro-optimization to avoid timeouts archived-at: Wed, 10 Aug 2016 02:53:14 -0000 Repository: kudu Updated Branches: refs/heads/master 0348499e3 -> acf093c18 cfile-test: some test micro-optimization to avoid timeouts It turns out that SCOPED_TRACE is relatively slow. Removing some SCOPED_TRACE in hot parts of the test speeds some of the test cases up by almost 2x in ASAN builds on my box. Given that these tests are fairly deterministic given a seed, and have been stable for a long time, leaving the SCOPED_TRACEs in has limited value. I only removed those that are in hot paths rather than removing all SCOPED_TRACEs. I also optimized the "large strings" test to avoid using a large amount of padding via the StringPrintf argument. The new implementation seems to run almost twice as fast on my machine. Comparing the 'CacheTypes/TestCFileBothCacheTypes.TestReadWriteLargeStrings/0' in fast-test ASAN mode before and after: Before: real 0m39.710s user 0m28.808s sys 0m3.828s After: real 0m21.274s user 0m9.824s sys 0m3.832s Change-Id: I951f4ef47c5275ea9743b1be0ff74374498192b1 Reviewed-on: http://gerrit.cloudera.org:8080/3875 Reviewed-by: Adar Dembo Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/cee7bddf Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/cee7bddf Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/cee7bddf Branch: refs/heads/master Commit: cee7bddf03d6287002d21ffcf4e8330cb08c9c6d Parents: 0348499 Author: Todd Lipcon Authored: Tue Aug 9 10:49:04 2016 -0700 Committer: Todd Lipcon Committed: Tue Aug 9 22:03:58 2016 +0000 ---------------------------------------------------------------------- src/kudu/cfile/cfile-test-base.h | 12 ++++++--- src/kudu/cfile/cfile-test.cc | 47 +++++++++++++++++++++-------------- 2 files changed, 37 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/cee7bddf/src/kudu/cfile/cfile-test-base.h ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/cfile-test-base.h b/src/kudu/cfile/cfile-test-base.h index 2aa408f..a5e1b9b 100644 --- a/src/kudu/cfile/cfile-test-base.h +++ b/src/kudu/cfile/cfile-test-base.h @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -229,11 +230,16 @@ template class StringDataGenerator : public DataGenerator { public: explicit StringDataGenerator(const char* format) - : format_(format) { + : StringDataGenerator( + [=](size_t x) { return StringPrintf(format, x); }) { + } + + explicit StringDataGenerator(std::function formatter) + : formatter_(std::move(formatter)) { } Slice BuildTestValue(size_t block_index, size_t value) OVERRIDE { - data_buffers_[block_index] = StringPrintf(format_, value); + data_buffers_[block_index] = formatter_(value); return Slice(data_buffers_[block_index]); } @@ -244,7 +250,7 @@ class StringDataGenerator : public DataGenerator { private: std::vector data_buffers_; - const char* format_; + std::function formatter_; }; // Class for generating strings that contain duplicate http://git-wip-us.apache.org/repos/asf/kudu/blob/cee7bddf/src/kudu/cfile/cfile-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/cfile-test.cc b/src/kudu/cfile/cfile-test.cc index 7a54046..0e33283 100644 --- a/src/kudu/cfile/cfile-test.cc +++ b/src/kudu/cfile/cfile-test.cc @@ -169,7 +169,6 @@ class TestCFile : public CFileTestBase { // Read and verify several ColumnBlocks from this point in the file. int read_offset = target; for (int block = 0; block < 3 && iter->HasNext(); block++) { - SCOPED_TRACE(block); size_t n = cb.nrows(); ASSERT_OK_FAST(iter->CopyNextValues(&n, &cb)); ASSERT_EQ(n, std::min(num_entries - read_offset, cb.nrows())); @@ -177,7 +176,6 @@ class TestCFile : public CFileTestBase { // Verify that the block data is correct. generator->Build(read_offset, n); for (size_t j = 0; j < n; ++j) { - SCOPED_TRACE(j); bool expected_null = generator->TestValueShouldBeNull(read_offset + j); ASSERT_EQ(expected_null, cb.is_null(j)); if (!expected_null) { @@ -255,7 +253,14 @@ class TestCFile : public CFileTestBase { ASSERT_EQ(num_entries, count); } - void TestReadWriteStrings(EncodingType encoding, const char* format); + void TestReadWriteStrings(EncodingType encoding) { + TestReadWriteStrings(encoding, [](size_t val) { + return StringPrintf("hello %04zd", val); + }); + } + + void TestReadWriteStrings(EncodingType encoding, + std::function formatter); #ifdef NDEBUG void TestWrite100MFileStrings(EncodingType encoding) { @@ -474,12 +479,12 @@ void EncodeStringKey(const Schema &schema, const Slice& key, } void TestCFile::TestReadWriteStrings(EncodingType encoding, - const char* str_format = "hello %04d") { + std::function formatter) { Schema schema({ ColumnSchema("key", STRING) }, 1); const int nrows = 10000; BlockId block_id; - StringDataGenerator generator(str_format); + StringDataGenerator generator(formatter); WriteTestFile(&generator, encoding, NO_COMPRESSION, nrows, SMALL_BLOCKSIZE | WRITE_VALIDX, &block_id); @@ -504,7 +509,7 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding, Slice s; CopyOne(iter.get(), &s, &arena); - ASSERT_EQ(StringPrintf(str_format, 5000), s.ToString()); + ASSERT_EQ(formatter(5000), s.ToString()); // Seek to last key exactly, should succeed ASSERT_OK(iter->SeekToOrdinal(9999)); @@ -526,8 +531,7 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding, string buf; for (int i = 1; i < 10000; i++) { arena.Reset(); - SCOPED_TRACE(i); - SStringPrintf(&buf, str_format, i - 1); + buf = formatter(i - 1); buf.append(".5"); s = Slice(buf); EncodeStringKey(schema, s, &encoded_key); @@ -535,15 +539,14 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding, ASSERT_FALSE(exact); ASSERT_EQ(i, iter->GetCurrentOrdinal()); CopyOne(iter.get(), &s, &arena); - ASSERT_EQ(StringPrintf(str_format, i), s.ToString()); + ASSERT_EQ(formatter(i), s.ToString()); } // Seek exactly to each key // (seek to "hello 0000" through "hello 9999") for (int i = 0; i < 9999; i++) { arena.Reset(); - SCOPED_TRACE(i); - SStringPrintf(&buf, str_format, i); + buf = formatter(i); s = Slice(buf); EncodeStringKey(schema, s, &encoded_key); ASSERT_OK(iter->SeekAtOrAfter(*encoded_key, &exact)); @@ -556,14 +559,14 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding, // after last entry // (seek to "hello 9999.x") - buf = StringPrintf(str_format, 9999) + "x"; + buf = formatter(9999) + ".x"; s = Slice(buf); EncodeStringKey(schema, s, &encoded_key); EXPECT_TRUE(iter->SeekAtOrAfter(*encoded_key, &exact).IsNotFound()); // before first entry // (seek to "hello 000", which falls before "hello 0000") - buf = StringPrintf(str_format, 0); + buf = formatter(0); buf.resize(buf.size() - 1); s = Slice(buf); EncodeStringKey(schema, s, &encoded_key); @@ -571,13 +574,13 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding, EXPECT_FALSE(exact); EXPECT_EQ(0u, iter->GetCurrentOrdinal()); CopyOne(iter.get(), &s, &arena); - EXPECT_EQ(StringPrintf(str_format, 0), s.ToString()); + EXPECT_EQ(formatter(0), s.ToString()); // Seek to start of file by ordinal ASSERT_OK(iter->SeekToFirst()); ASSERT_EQ(0u, iter->GetCurrentOrdinal()); CopyOne(iter.get(), &s, &arena); - ASSERT_EQ(StringPrintf(str_format, 0), s.ToString()); + ASSERT_EQ(formatter(0), s.ToString()); // Reseek to start and fetch all data. // We fetch in 10 smaller chunks to avoid using too much RAM for the @@ -609,11 +612,17 @@ TEST_P(TestCFileBothCacheTypes, TestReadWriteStringsDictEncoding) { #ifndef THREAD_SANITIZER TEST_P(TestCFileBothCacheTypes, TestReadWriteLargeStrings) { // Pad the values out to a length of ~65KB. - const char* kFormat = "%066000d"; - TestReadWriteStrings(PLAIN_ENCODING, kFormat); + // We use this method instead of just a longer sprintf format since + // this is much more CPU-efficient (speeds up the test). + auto formatter = [](size_t val) { + string ret(66000, '0'); + StringAppendF(&ret, "%010zd", val); + return ret; + }; + TestReadWriteStrings(PLAIN_ENCODING, formatter); if (AllowSlowTests()) { - TestReadWriteStrings(DICT_ENCODING, kFormat); - TestReadWriteStrings(PREFIX_ENCODING, kFormat); + TestReadWriteStrings(DICT_ENCODING, formatter); + TestReadWriteStrings(PREFIX_ENCODING, formatter); } } #endif