kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aw...@apache.org
Subject [kudu] 01/02: [compression] Refactor unit tests and add simple benchmark test
Date Tue, 20 Aug 2019 04:32:54 GMT
This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 8c977a8a5a660b6ddd92b67776bc9698ce9fa947
Author: Yingchun Lai <405403881@qq.com>
AuthorDate: Sat Aug 17 09:08:24 2019 +0800

    [compression] Refactor unit tests and add simple benchmark test
    
    Refactor unit tests of compression-test to reduce redundant code,
    and also add simple benchmark tests for compression and uncompression
    to check whether an upgrade of a thirdparty compression library
    influences efficiency.
    
    Change-Id: If09817d223b98c825d0c8276c8f663b5c5b9eb12
    Reviewed-on: http://gerrit.cloudera.org:8080/14086
    Reviewed-by: Alexey Serbin <aserbin@cloudera.com>
    Tested-by: Alexey Serbin <aserbin@cloudera.com>
---
 src/kudu/util/compression/compression-test.cc | 97 +++++++++++++++++++++++----
 1 file changed, 84 insertions(+), 13 deletions(-)

diff --git a/src/kudu/util/compression/compression-test.cc b/src/kudu/util/compression/compression-test.cc
index 6b46a4f..9aa8635 100644
--- a/src/kudu/util/compression/compression-test.cc
+++ b/src/kudu/util/compression/compression-test.cc
@@ -15,28 +15,36 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <algorithm>
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
+#include <ostream>
+#include <string>
 #include <vector>
 
+#include <glog/logging.h>
 #include <gtest/gtest.h>
 
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/util/compression/compression.pb.h"
 #include "kudu/util/compression/compression_codec.h"
+#include "kudu/util/random.h"
+#include "kudu/util/random_util.h"
 #include "kudu/util/slice.h"
+#include "kudu/util/stopwatch.h"
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
 
 namespace kudu {
 
+using std::string;
 using std::vector;
 
 class TestCompression : public KuduTest {};
 
 static void TestCompressionCodec(CompressionType compression) {
-  const int kInputSize = 64;
+  constexpr int kInputSize = 64;
 
   const CompressionCodec* codec;
   uint8_t ibuffer[kInputSize];
@@ -60,15 +68,77 @@ static void TestCompressionCodec(CompressionType compression) {
   ASSERT_EQ(0, memcmp(ibuffer, ubuffer, kInputSize));
 
   // Compress slices and uncompress
-  vector<Slice> v;
-  v.emplace_back(ibuffer, 1);
-  for (int i = 1; i <= kInputSize; i += 7)
-    v.emplace_back(ibuffer + i, 7);
-  ASSERT_OK(codec->Compress(Slice(ibuffer, kInputSize), cbuffer.get(), &compressed));
+  vector<Slice> islices;
+  constexpr int kStep = 7;
+  for (int i = 0; i < kInputSize; i += kStep)
+    islices.emplace_back(ibuffer + i, std::min(kStep, kInputSize - i));
+  ASSERT_OK(codec->Compress(islices, cbuffer.get(), &compressed));
   ASSERT_OK(codec->Uncompress(Slice(cbuffer.get(), compressed), ubuffer, kInputSize));
   ASSERT_EQ(0, memcmp(ibuffer, ubuffer, kInputSize));
 }
 
+static void Benchmark(Random random, CompressionType compression) {
+  constexpr int kMaterialCount = 16;
+  constexpr int kInputSize = 8;
+  constexpr int kSliceCount = 1024;
+
+  // Prepare materials.
+  vector<string> materials;
+  materials.reserve(kMaterialCount);
+  for (int i = 0; i < kMaterialCount; ++i) {
+    materials.emplace_back(RandomString(kInputSize, &random));
+  }
+
+  // Prepare input slices.
+  vector<Slice> islices;
+  islices.reserve(kSliceCount);
+  for (int i = 0; i < kSliceCount; ++i) {
+    islices.emplace_back(Slice(materials[random.Uniform(kMaterialCount)]));
+  }
+
+  // Get the specified compression codec.
+  const CompressionCodec* codec;
+  GetCompressionCodec(compression, &codec);
+
+  // Allocate the compression buffer.
+  size_t max_compressed = codec->MaxCompressedLength(kSliceCount * kInputSize);
+  gscoped_array<uint8_t> cbuffer(new uint8_t[max_compressed]);
+
+  // Execute Compress.
+  size_t compressed;
+  {
+    uint64_t total_len = 0;
+    uint64_t compressed_len = 0;
+    Stopwatch sw;
+    sw.start();
+    while (sw.elapsed().wall_seconds() < 3) {
+      codec->Compress(islices, cbuffer.get(), &compressed);
+      total_len += kSliceCount * kInputSize;
+      compressed_len += compressed;
+    }
+    sw.stop();
+    double mbps = (total_len >> 20) / sw.elapsed().user_cpu_seconds();
+    LOG(INFO) << CompressionType_Name(compression) << " compress throughput:
"
+              << mbps << " MB/sec, ratio: " << static_cast<double>(compressed_len)
/ total_len;
+  }
+
+  // Execute Uncompress.
+  {
+    uint8_t ubuffer[kSliceCount * kInputSize];
+    uint64_t total_len = 0;
+    Stopwatch sw;
+    sw.start();
+    while (sw.elapsed().wall_seconds() < 3) {
+      codec->Uncompress(Slice(cbuffer.get(), compressed), ubuffer, kSliceCount * kInputSize);
+      total_len += kSliceCount * kInputSize;
+    }
+    sw.stop();
+    double mbps = (total_len >> 20) / sw.elapsed().user_cpu_seconds();
+    LOG(INFO) << CompressionType_Name(compression) << " uncompress throughput:
"
+              << mbps << " MB/sec";
+  }
+}
+
 TEST_F(TestCompression, TestNoCompressionCodec) {
   const CompressionCodec* codec;
   ASSERT_OK(GetCompressionCodec(NO_COMPRESSION, &codec));
@@ -76,15 +146,16 @@ TEST_F(TestCompression, TestNoCompressionCodec) {
 }
 
 TEST_F(TestCompression, TestSnappyCompressionCodec) {
-  TestCompressionCodec(SNAPPY);
-}
-
-TEST_F(TestCompression, TestLz4CompressionCodec) {
-  TestCompressionCodec(LZ4);
+  for (auto type : { SNAPPY, LZ4, ZLIB }) {
+    NO_FATALS(TestCompressionCodec(type));
+  }
 }
 
-TEST_F(TestCompression, TestZlibCompressionCodec) {
-  TestCompressionCodec(ZLIB);
+TEST_F(TestCompression, TestSimpleBenchmark) {
+  Random r(SeedRandom());
+  for (auto type : { SNAPPY, LZ4, ZLIB }) {
+    NO_FATALS(Benchmark(r, type));
+  }
 }
 
 } // namespace kudu


Mime
View raw message