Mailing-List: contact commits-help@kudu.apache.org; run by ezmlm
Precedence: bulk
Reply-To: dev@kudu.apache.org
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: danburkert@apache.org
To: commits@kudu.apache.org
Date: Wed, 10 Aug 2016 02:53:12 -0000
Message-Id: <ed2ba1da5762426e85d6ecc296183673@git.apache.org>
Subject: [1/2] kudu git commit: cfile-test: some test micro-optimization to
 avoid timeouts
archived-at: Wed, 10 Aug 2016 02:53:14 -0000

Repository: kudu
Updated Branches:
  refs/heads/master 0348499e3 -> acf093c18


cfile-test: some test micro-optimization to avoid timeouts

It turns out that SCOPED_TRACE is relatively slow. Removing some SCOPED_TRACE
in hot parts of the test speeds some of the test cases up by almost 2x in ASAN
builds on my box. Given that these tests are fairly deterministic given a seed,
and have been stable for a long time, leaving the SCOPED_TRACEs in has limited
value.

I only removed those that are in hot paths rather than removing all SCOPED_TRACEs.

I also optimized the "large strings" test to avoid using a large amount of
padding via the StringPrintf argument. The new implementation seems to run
almost twice as fast on my machine. Comparing the
'CacheTypes/TestCFileBothCacheTypes.TestReadWriteLargeStrings/0' in fast-test
ASAN mode before and after:

Before:

real    0m39.710s
user    0m28.808s
sys     0m3.828s

After:

real    0m21.274s
user    0m9.824s
sys     0m3.832s

Change-Id: I951f4ef47c5275ea9743b1be0ff74374498192b1
Reviewed-on: http://gerrit.cloudera.org:8080/3875
Reviewed-by: Adar Dembo <adar@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/cee7bddf
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/cee7bddf
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/cee7bddf

Branch: refs/heads/master
Commit: cee7bddf03d6287002d21ffcf4e8330cb08c9c6d
Parents: 0348499
Author: Todd Lipcon <todd@apache.org>
Authored: Tue Aug 9 10:49:04 2016 -0700
Committer: Todd Lipcon <todd@apache.org>
Committed: Tue Aug 9 22:03:58 2016 +0000

----------------------------------------------------------------------
 src/kudu/cfile/cfile-test-base.h | 12 ++++++---
 src/kudu/cfile/cfile-test.cc     | 47 +++++++++++++++++++++--------------
 2 files changed, 37 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/cee7bddf/src/kudu/cfile/cfile-test-base.h
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile-test-base.h b/src/kudu/cfile/cfile-test-base.h
index 2aa408f..a5e1b9b 100644
--- a/src/kudu/cfile/cfile-test-base.h
+++ b/src/kudu/cfile/cfile-test-base.h
@@ -20,6 +20,7 @@
 
 #include <glog/logging.h>
 #include <algorithm>
+#include <functional>
 #include <stdlib.h>
 #include <string>
 #include <vector>
@@ -229,11 +230,16 @@ template<bool HAS_NULLS>
 class StringDataGenerator : public DataGenerator<STRING, HAS_NULLS> {
  public:
   explicit StringDataGenerator(const char* format)
-  : format_(format) {
+      : StringDataGenerator(
+          [=](size_t x) { return StringPrintf(format, x); }) {
+  }
+
+  explicit StringDataGenerator(std::function<std::string(size_t)> formatter)
+      : formatter_(std::move(formatter)) {
   }
 
   Slice BuildTestValue(size_t block_index, size_t value) OVERRIDE {
-    data_buffers_[block_index] = StringPrintf(format_, value);
+    data_buffers_[block_index] = formatter_(value);
     return Slice(data_buffers_[block_index]);
   }
 
@@ -244,7 +250,7 @@ class StringDataGenerator : public DataGenerator<STRING, HAS_NULLS> {
 
  private:
   std::vector<std::string> data_buffers_;
-  const char* format_;
+  std::function<std::string(size_t)> formatter_;
 };
 
 // Class for generating strings that contain duplicate

http://git-wip-us.apache.org/repos/asf/kudu/blob/cee7bddf/src/kudu/cfile/cfile-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile-test.cc b/src/kudu/cfile/cfile-test.cc
index 7a54046..0e33283 100644
--- a/src/kudu/cfile/cfile-test.cc
+++ b/src/kudu/cfile/cfile-test.cc
@@ -169,7 +169,6 @@ class TestCFile : public CFileTestBase {
       // Read and verify several ColumnBlocks from this point in the file.
       int read_offset = target;
       for (int block = 0; block < 3 && iter->HasNext(); block++) {
-        SCOPED_TRACE(block);
         size_t n = cb.nrows();
         ASSERT_OK_FAST(iter->CopyNextValues(&n, &cb));
         ASSERT_EQ(n, std::min(num_entries - read_offset, cb.nrows()));
@@ -177,7 +176,6 @@ class TestCFile : public CFileTestBase {
         // Verify that the block data is correct.
         generator->Build(read_offset, n);
         for (size_t j = 0; j < n; ++j) {
-          SCOPED_TRACE(j);
           bool expected_null = generator->TestValueShouldBeNull(read_offset + j);
           ASSERT_EQ(expected_null, cb.is_null(j));
           if (!expected_null) {
@@ -255,7 +253,14 @@ class TestCFile : public CFileTestBase {
     ASSERT_EQ(num_entries, count);
   }
 
-  void TestReadWriteStrings(EncodingType encoding, const char* format);
+  void TestReadWriteStrings(EncodingType encoding) {
+    TestReadWriteStrings(encoding, [](size_t val) {
+        return StringPrintf("hello %04zd", val);
+      });
+  }
+
+  void TestReadWriteStrings(EncodingType encoding,
+                            std::function<string(size_t)> formatter);
 
 #ifdef NDEBUG
   void TestWrite100MFileStrings(EncodingType encoding) {
@@ -474,12 +479,12 @@ void EncodeStringKey(const Schema &schema, const Slice& key,
 }
 
 void TestCFile::TestReadWriteStrings(EncodingType encoding,
-                                     const char* str_format = "hello %04d") {
+                                     std::function<string(size_t)> formatter) {
   Schema schema({ ColumnSchema("key", STRING) }, 1);
 
   const int nrows = 10000;
   BlockId block_id;
-  StringDataGenerator<false> generator(str_format);
+  StringDataGenerator<false> generator(formatter);
   WriteTestFile(&generator, encoding, NO_COMPRESSION, nrows,
                 SMALL_BLOCKSIZE | WRITE_VALIDX, &block_id);
 
@@ -504,7 +509,7 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding,
   Slice s;
 
   CopyOne<STRING>(iter.get(), &s, &arena);
-  ASSERT_EQ(StringPrintf(str_format, 5000), s.ToString());
+  ASSERT_EQ(formatter(5000), s.ToString());
 
   // Seek to last key exactly, should succeed
   ASSERT_OK(iter->SeekToOrdinal(9999));
@@ -526,8 +531,7 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding,
   string buf;
   for (int i = 1; i < 10000; i++) {
     arena.Reset();
-    SCOPED_TRACE(i);
-    SStringPrintf(&buf, str_format, i - 1);
+    buf = formatter(i - 1);
     buf.append(".5");
     s = Slice(buf);
     EncodeStringKey(schema, s, &encoded_key);
@@ -535,15 +539,14 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding,
     ASSERT_FALSE(exact);
     ASSERT_EQ(i, iter->GetCurrentOrdinal());
     CopyOne<STRING>(iter.get(), &s, &arena);
-    ASSERT_EQ(StringPrintf(str_format, i), s.ToString());
+    ASSERT_EQ(formatter(i), s.ToString());
   }
 
   // Seek exactly to each key
   // (seek to "hello 0000" through "hello 9999")
   for (int i = 0; i < 9999; i++) {
     arena.Reset();
-    SCOPED_TRACE(i);
-    SStringPrintf(&buf, str_format, i);
+    buf = formatter(i);
     s = Slice(buf);
     EncodeStringKey(schema, s, &encoded_key);
     ASSERT_OK(iter->SeekAtOrAfter(*encoded_key, &exact));
@@ -556,14 +559,14 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding,
 
   // after last entry
   // (seek to "hello 9999.x")
-  buf = StringPrintf(str_format, 9999) + "x";
+  buf = formatter(9999) + ".x";
   s = Slice(buf);
   EncodeStringKey(schema, s, &encoded_key);
   EXPECT_TRUE(iter->SeekAtOrAfter(*encoded_key, &exact).IsNotFound());
 
   // before first entry
   // (seek to "hello 000", which falls before "hello 0000")
-  buf = StringPrintf(str_format, 0);
+  buf = formatter(0);
   buf.resize(buf.size() - 1);
   s = Slice(buf);
   EncodeStringKey(schema, s, &encoded_key);
@@ -571,13 +574,13 @@ void TestCFile::TestReadWriteStrings(EncodingType encoding,
   EXPECT_FALSE(exact);
   EXPECT_EQ(0u, iter->GetCurrentOrdinal());
   CopyOne<STRING>(iter.get(), &s, &arena);
-  EXPECT_EQ(StringPrintf(str_format, 0), s.ToString());
+  EXPECT_EQ(formatter(0), s.ToString());
 
   // Seek to start of file by ordinal
   ASSERT_OK(iter->SeekToFirst());
   ASSERT_EQ(0u, iter->GetCurrentOrdinal());
   CopyOne<STRING>(iter.get(), &s, &arena);
-  ASSERT_EQ(StringPrintf(str_format, 0), s.ToString());
+  ASSERT_EQ(formatter(0), s.ToString());
 
   // Reseek to start and fetch all data.
   // We fetch in 10 smaller chunks to avoid using too much RAM for the
@@ -609,11 +612,17 @@ TEST_P(TestCFileBothCacheTypes, TestReadWriteStringsDictEncoding) {
 #ifndef THREAD_SANITIZER
 TEST_P(TestCFileBothCacheTypes, TestReadWriteLargeStrings) {
   // Pad the values out to a length of ~65KB.
-  const char* kFormat = "%066000d";
-  TestReadWriteStrings(PLAIN_ENCODING, kFormat);
+  // We use this method instead of just a longer sprintf format since
+  // this is much more CPU-efficient (speeds up the test).
+  auto formatter = [](size_t val) {
+    string ret(66000, '0');
+    StringAppendF(&ret, "%010zd", val);
+    return ret;
+  };
+  TestReadWriteStrings(PLAIN_ENCODING, formatter);
   if (AllowSlowTests()) {
-    TestReadWriteStrings(DICT_ENCODING, kFormat);
-    TestReadWriteStrings(PREFIX_ENCODING, kFormat);
+    TestReadWriteStrings(DICT_ENCODING, formatter);
+    TestReadWriteStrings(PREFIX_ENCODING, formatter);
   }
 }
 #endif