kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From danburk...@apache.org
Subject [2/2] kudu git commit: Fix bug in partition key debug printing
Date Wed, 10 Aug 2016 06:06:26 GMT
Fix bug in partition key debug printing

This fixes a bug in compound-column range partition key debug printing. Also
adds a test for range partition encoding with compound columns.

Change-Id: Idac701428a9af03605cd1a156926bdbd1a5f5fac
Reviewed-on: http://gerrit.cloudera.org:8080/3879
Reviewed-by: Adar Dembo <adar@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/1dbfb755
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/1dbfb755
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/1dbfb755

Branch: refs/heads/master
Commit: 1dbfb75508040824cc261b1c9127c86727d42087
Parents: 0f899eb
Author: Dan Burkert <dan@cloudera.com>
Authored: Tue Aug 9 19:03:02 2016 -0700
Committer: Dan Burkert <dan@cloudera.com>
Committed: Wed Aug 10 06:06:12 2016 +0000

----------------------------------------------------------------------
 src/kudu/common/partition-test.cc | 106 ++++++++++++++++++++++++++-------
 src/kudu/common/partition.cc      |  51 +++++++---------
 src/kudu/common/partition.h       |  11 +---
 3 files changed, 110 insertions(+), 58 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/1dbfb755/src/kudu/common/partition-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/partition-test.cc b/src/kudu/common/partition-test.cc
index 11c93e7..1600e3f 100644
--- a/src/kudu/common/partition-test.cc
+++ b/src/kudu/common/partition-test.cc
@@ -122,6 +122,76 @@ void CheckCreateRangePartitions(const vector<pair<optional<string>,
optional<str
 
 } // namespace
 
+// Tests that missing values are correctly filled in with minimums when creating
+// range partition keys, and that completely missing keys are encoded as the
+// logical minimum and logical maximum for lower and upper bounds, respectively.
+TEST(PartitionTest, TestCompoundRangeKeyEncoding) {
+
+  // CREATE TABLE t (c1 STRING, c2 STRING, c3 STRING),
+  // PRIMARY KEY (c1, c2, c3)
+  // PARITITION BY RANGE (c1, c2, c3);
+  Schema schema({ ColumnSchema("c1", STRING),
+                  ColumnSchema("c2", STRING),
+                  ColumnSchema("c3", STRING) },
+                { ColumnId(0), ColumnId(1), ColumnId(2) }, 3);
+
+  PartitionSchemaPB schema_builder;
+  PartitionSchema partition_schema;
+  ASSERT_OK(PartitionSchema::FromPB(schema_builder, schema, &partition_schema));
+
+  ASSERT_EQ("range columns: [c1, c2, c3]", partition_schema.DebugString(schema));
+
+  vector<pair<KuduPartialRow, KuduPartialRow>> bounds;
+  vector<KuduPartialRow> splits;
+
+  { // [(_, _, _), (_, _, b))
+    KuduPartialRow lower(&schema);
+    KuduPartialRow upper(&schema);
+    ASSERT_OK(upper.SetStringCopy("c3", "b"));
+    bounds.emplace_back(std::move(lower), std::move(upper));
+  }
+
+  { // [(_, b, c), (d, _, f))
+    KuduPartialRow lower(&schema);
+    KuduPartialRow upper(&schema);
+    ASSERT_OK(lower.SetStringCopy("c2", "b"));
+    ASSERT_OK(lower.SetStringCopy("c3", "c"));
+    ASSERT_OK(upper.SetStringCopy("c1", "d"));
+    ASSERT_OK(upper.SetStringCopy("c3", "f"));
+    bounds.emplace_back(std::move(lower), std::move(upper));
+  }
+
+  { // [(e, _, _), (_, _, _))
+    KuduPartialRow lower(&schema);
+    KuduPartialRow upper(&schema);
+    ASSERT_OK(lower.SetStringCopy("c1", "e"));
+    bounds.emplace_back(std::move(lower), std::move(upper));
+  }
+
+  { // split: (_, _, a)
+    KuduPartialRow split(&schema);
+    ASSERT_OK(split.SetStringCopy("c3", "a"));
+    splits.emplace_back(std::move(split));
+  }
+
+  vector<Partition> partitions;
+  ASSERT_OK(partition_schema.CreatePartitions(splits, bounds, schema, &partitions));
+  ASSERT_EQ(4, partitions.size());
+
+  EXPECT_TRUE(partitions[0].hash_buckets().empty());
+
+  EXPECT_EQ("range: [(<start>), (string c1=, string c2=, string c3=a))",
+            partition_schema.PartitionDebugString(partitions[0], schema));
+  EXPECT_EQ("range: [(string c1=, string c2=, string c3=a), "
+                    "(string c1=, string c2=, string c3=b))",
+            partition_schema.PartitionDebugString(partitions[1], schema));
+  EXPECT_EQ("range: [(string c1=, string c2=b, string c3=c), "
+            "(string c1=d, string c2=, string c3=f))",
+            partition_schema.PartitionDebugString(partitions[2], schema));
+  EXPECT_EQ("range: [(string c1=e, string c2=, string c3=), (<end>))",
+            partition_schema.PartitionDebugString(partitions[3], schema));
+}
+
 TEST(PartitionTest, TestPartitionKeyEncoding) {
   // CREATE TABLE t (a INT32, b VARCHAR, c VARCHAR, PRIMARY KEY (a, b, c))
   // PARITITION BY [HASH BUCKET (a, b), HASH BUCKET (c), RANGE (a, b, c)];
@@ -440,8 +510,7 @@ TEST(PartitionTest, TestCreatePartitions) {
   EXPECT_EQ(string("a1\0\0b1\0\0c1", 10), partitions[0].range_key_end());
   EXPECT_EQ(string("", 0), partitions[0].partition_key_start());
   EXPECT_EQ(string("\0\0\0\0" "\0\0\0\0" "a1\0\0b1\0\0c1", 18), partitions[0].partition_key_end());
-  EXPECT_EQ("hash buckets: (0, 0), "
-            "range: [(<start>), (string a=a1, string b=b1, string c=c1))",
+  EXPECT_EQ("hash buckets: (0, 0), range: [(<start>), (string a=a1, string b=b1, string
c=c1))",
             partition_schema.PartitionDebugString(partitions[0], schema));
 
   EXPECT_EQ(0, partitions[1].hash_buckets()[0]);
@@ -451,8 +520,8 @@ TEST(PartitionTest, TestCreatePartitions) {
   EXPECT_EQ(string("\0\0\0\0" "\0\0\0\0" "a1\0\0b1\0\0c1", 18),
             partitions[1].partition_key_start());
   EXPECT_EQ(string("\0\0\0\0" "\0\0\0\0" "a2\0\0b2\0\0", 16), partitions[1].partition_key_end());
-  EXPECT_EQ("hash buckets: (0, 0), "
-            "range: [(string a=a1, string b=b1, string c=c1), (string a=a2, string b=b2,
<end>))",
+  EXPECT_EQ("hash buckets: (0, 0), range: [(string a=a1, string b=b1, string c=c1), "
+                                          "(string a=a2, string b=b2, string c=))",
             partition_schema.PartitionDebugString(partitions[1], schema));
 
   EXPECT_EQ(0, partitions[2].hash_buckets()[0]);
@@ -462,7 +531,7 @@ TEST(PartitionTest, TestCreatePartitions) {
   EXPECT_EQ(string("\0\0\0\0" "\0\0\0\0" "a2\0\0b2\0\0", 16), partitions[2].partition_key_start());
   EXPECT_EQ(string("\0\0\0\0" "\0\0\0\1", 8), partitions[2].partition_key_end());
   EXPECT_EQ("hash buckets: (0, 0), "
-            "range: [(string a=a2, string b=b2, <start>), (<end>))",
+            "range: [(string a=a2, string b=b2, string c=), (<end>))",
             partition_schema.PartitionDebugString(partitions[2], schema));
 
   EXPECT_EQ(0, partitions[3].hash_buckets()[0]);
@@ -482,8 +551,8 @@ TEST(PartitionTest, TestCreatePartitions) {
   EXPECT_EQ(string("\0\0\0\0" "\0\0\0\1" "a1\0\0b1\0\0c1", 18),
             partitions[4].partition_key_start());
   EXPECT_EQ(string("\0\0\0\0" "\0\0\0\1" "a2\0\0b2\0\0", 16), partitions[4].partition_key_end());
-  EXPECT_EQ("hash buckets: (0, 1), "
-            "range: [(string a=a1, string b=b1, string c=c1), (string a=a2, string b=b2,
<end>))",
+  EXPECT_EQ("hash buckets: (0, 1), range: [(string a=a1, string b=b1, string c=c1), "
+                                          "(string a=a2, string b=b2, string c=))",
             partition_schema.PartitionDebugString(partitions[4], schema));
 
   EXPECT_EQ(0, partitions[5].hash_buckets()[0]);
@@ -492,8 +561,7 @@ TEST(PartitionTest, TestCreatePartitions) {
   EXPECT_EQ(string("", 0), partitions[5].range_key_end());
   EXPECT_EQ(string("\0\0\0\0" "\0\0\0\1" "a2\0\0b2\0\0", 16), partitions[5].partition_key_start());
   EXPECT_EQ(string("\0\0\0\1", 4), partitions[5].partition_key_end());
-  EXPECT_EQ("hash buckets: (0, 1), "
-            "range: [(string a=a2, string b=b2, <start>), (<end>))",
+  EXPECT_EQ("hash buckets: (0, 1), range: [(string a=a2, string b=b2, string c=), (<end>))",
             partition_schema.PartitionDebugString(partitions[5], schema));
 
   EXPECT_EQ(1, partitions[6].hash_buckets()[0]);
@@ -502,8 +570,7 @@ TEST(PartitionTest, TestCreatePartitions) {
   EXPECT_EQ(string("a1\0\0b1\0\0c1", 10), partitions[6].range_key_end());
   EXPECT_EQ(string("\0\0\0\1", 4), partitions[6].partition_key_start());
   EXPECT_EQ(string("\0\0\0\1" "\0\0\0\0" "a1\0\0b1\0\0c1", 18), partitions[6].partition_key_end());
-  EXPECT_EQ("hash buckets: (1, 0), "
-            "range: [(<start>), (string a=a1, string b=b1, string c=c1))",
+  EXPECT_EQ("hash buckets: (1, 0), range: [(<start>), (string a=a1, string b=b1, string
c=c1))",
             partition_schema.PartitionDebugString(partitions[6], schema));
 
   EXPECT_EQ(1, partitions[7].hash_buckets()[0]);
@@ -513,8 +580,8 @@ TEST(PartitionTest, TestCreatePartitions) {
   EXPECT_EQ(string("\0\0\0\1" "\0\0\0\0" "a1\0\0b1\0\0c1", 18),
             partitions[7].partition_key_start());
   EXPECT_EQ(string("\0\0\0\1" "\0\0\0\0" "a2\0\0b2\0\0", 16), partitions[7].partition_key_end());
-  EXPECT_EQ("hash buckets: (1, 0), "
-            "range: [(string a=a1, string b=b1, string c=c1), (string a=a2, string b=b2,
<end>))",
+  EXPECT_EQ("hash buckets: (1, 0), range: [(string a=a1, string b=b1, string c=c1), "
+                                          "(string a=a2, string b=b2, string c=))",
             partition_schema.PartitionDebugString(partitions[7], schema));
 
   EXPECT_EQ(1, partitions[8].hash_buckets()[0]);
@@ -523,8 +590,7 @@ TEST(PartitionTest, TestCreatePartitions) {
   EXPECT_EQ(string("", 0), partitions[8].range_key_end());
   EXPECT_EQ(string("\0\0\0\1" "\0\0\0\0" "a2\0\0b2\0\0", 16), partitions[8].partition_key_start());
   EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1", 8), partitions[8].partition_key_end());
-  EXPECT_EQ("hash buckets: (1, 0), "
-            "range: [(string a=a2, string b=b2, <start>), (<end>))",
+  EXPECT_EQ("hash buckets: (1, 0), range: [(string a=a2, string b=b2, string c=), (<end>))",
             partition_schema.PartitionDebugString(partitions[8], schema));
 
   EXPECT_EQ(1, partitions[9].hash_buckets()[0]);
@@ -533,8 +599,7 @@ TEST(PartitionTest, TestCreatePartitions) {
   EXPECT_EQ(string("a1\0\0b1\0\0c1", 10), partitions[9].range_key_end());
   EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1", 8), partitions[9].partition_key_start());
   EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1" "a1\0\0b1\0\0c1", 18), partitions[9].partition_key_end());
-  EXPECT_EQ("hash buckets: (1, 1), "
-            "range: [(<start>), (string a=a1, string b=b1, string c=c1))",
+  EXPECT_EQ("hash buckets: (1, 1), range: [(<start>), (string a=a1, string b=b1, string
c=c1))",
             partition_schema.PartitionDebugString(partitions[9], schema));
 
   EXPECT_EQ(1, partitions[10].hash_buckets()[0]);
@@ -544,8 +609,8 @@ TEST(PartitionTest, TestCreatePartitions) {
   EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1" "a1\0\0b1\0\0c1", 18),
             partitions[10].partition_key_start());
   EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1" "a2\0\0b2\0\0", 16), partitions[10].partition_key_end());
-  EXPECT_EQ("hash buckets: (1, 1), "
-            "range: [(string a=a1, string b=b1, string c=c1), (string a=a2, string b=b2,
<end>))",
+  EXPECT_EQ("hash buckets: (1, 1), range: [(string a=a1, string b=b1, string c=c1), "
+                                          "(string a=a2, string b=b2, string c=))",
             partition_schema.PartitionDebugString(partitions[10], schema));
 
   EXPECT_EQ(1, partitions[11].hash_buckets()[0]);
@@ -554,8 +619,7 @@ TEST(PartitionTest, TestCreatePartitions) {
   EXPECT_EQ(string("", 0), partitions[11].range_key_end());
   EXPECT_EQ(string("\0\0\0\1" "\0\0\0\1" "a2\0\0b2\0\0", 16), partitions[11].partition_key_start());
   EXPECT_EQ(string("", 0), partitions[11].partition_key_end());
-  EXPECT_EQ("hash buckets: (1, 1), "
-            "range: [(string a=a2, string b=b2, <start>), (<end>))",
+  EXPECT_EQ("hash buckets: (1, 1), range: [(string a=a2, string b=b2, string c=), (<end>))",
             partition_schema.PartitionDebugString(partitions[11], schema));
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/1dbfb755/src/kudu/common/partition.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/partition.cc b/src/kudu/common/partition.cc
index d1a7733..e89000f 100644
--- a/src/kudu/common/partition.cc
+++ b/src/kudu/common/partition.cc
@@ -279,9 +279,11 @@ Status PartitionSchema::EncodeRangeBounds(const vector<pair<KuduPartialRow,
     if (first_upper.empty() || second_lower.empty() || first_upper > second_lower) {
       return Status::InvalidArgument(
           "overlapping range bounds",
-          strings::Substitute("first upper bound: ($0), second lower bound: ($1)",
-                              RangeKeyDebugString(first_upper, schema),
-                              RangeKeyDebugString(second_lower, schema)));
+          strings::Substitute("first bound: [($0), ($1)), second bound: [($2), ($3))",
+                              RangeKeyDebugString(range_partitions->at(i).first, schema),
+                              RangeKeyDebugString(range_partitions->at(i).second, schema),
+                              RangeKeyDebugString(range_partitions->at(i + 1).first, schema),
+                              RangeKeyDebugString(range_partitions->at(i + 1).second,
schema)));
     }
   }
 
@@ -549,24 +551,32 @@ string PartitionSchema::PartitionDebugString(const Partition& partition,
 
     s.append("range: [(");
 
-    vector<string> start_components;
     Slice encoded_range_key_start = partition.range_key_start();
     Status status;
     status = DecodeRangeKey(&encoded_range_key_start, &start_row, &arena);
     if (status.ok()) {
-      AppendRangeDebugStringComponentsOrString(start_row, "<start>", &start_components);
-      s.append(JoinStrings(start_components, ", "));
+      if (IsRangePartitionKeyEmpty(start_row)) {
+        s.append("<start>");
+      } else {
+        vector<string> start_components;
+        AppendRangeDebugStringComponentsOrMin(start_row, &start_components);
+        s.append(JoinStrings(start_components, ", "));
+      }
     } else {
       s.append(Substitute("<decode-error: $0>", status.ToString()));
     }
     s.append("), (");
 
-    vector<string> end_components;
     Slice encoded_range_key_end = partition.range_key_end();
     status = DecodeRangeKey(&encoded_range_key_end, &end_row, &arena);
     if (status.ok()) {
-      AppendRangeDebugStringComponentsOrString(end_row, "<end>", &end_components);
-      s.append(JoinStrings(end_components, ", "));
+      if (IsRangePartitionKeyEmpty(end_row)) {
+        s.append("<end>");
+      } else {
+        vector<string> end_components;
+        AppendRangeDebugStringComponentsOrMin(end_row, &end_components);
+        s.append(JoinStrings(end_components, ", "));
+      }
     } else {
       s.append(Substitute("<decode-error: $0>", status.ToString()));
     }
@@ -576,29 +586,12 @@ string PartitionSchema::PartitionDebugString(const Partition& partition,
   return s;
 }
 
-void PartitionSchema::AppendRangeDebugStringComponentsOrString(const KuduPartialRow&
row,
-                                                               const StringPiece default_string,
-                                                               vector<string>* components)
const {
+bool PartitionSchema::IsRangePartitionKeyEmpty(const KuduPartialRow& row) const {
   ConstContiguousRow const_row(row.schema(), row.row_data_);
-
   for (ColumnId column_id : range_schema_.column_ids) {
-    string column;
-    int32_t column_idx = row.schema()->find_column_by_id(column_id);
-    if (column_idx == Schema::kColumnNotFound) {
-      components->push_back("<unknown-column>");
-      continue;
-    }
-    const ColumnSchema& column_schema = row.schema()->column(column_idx);
-
-    if (!row.IsColumnSet(column_idx)) {
-      components->push_back(default_string.as_string());
-      break;
-    } else {
-      column_schema.DebugCellAppend(const_row.cell(column_idx), &column);
-    }
-
-    components->push_back(column);
+    if (row.IsColumnSet(row.schema()->find_column_by_id(column_id))) return false;
   }
+  return true;
 }
 
 void PartitionSchema::AppendRangeDebugStringComponentsOrMin(const KuduPartialRow& row,

http://git-wip-us.apache.org/repos/asf/kudu/blob/1dbfb755/src/kudu/common/partition.h
----------------------------------------------------------------------
diff --git a/src/kudu/common/partition.h b/src/kudu/common/partition.h
index 1466934..347d809 100644
--- a/src/kudu/common/partition.h
+++ b/src/kudu/common/partition.h
@@ -237,14 +237,9 @@ class PartitionSchema {
   template<typename Row>
   Status EncodeKeyImpl(const Row& row, string* buf) const;
 
-  // Appends the stringified range partition components of a partial row to a
-  // vector.
-  //
-  // If any columns of the range partition do not exist in the partial row,
-  // processing stops and the provided default string piece is appended to the vector.
-  void AppendRangeDebugStringComponentsOrString(const KuduPartialRow& row,
-                                                StringPiece default_string,
-                                                std::vector<std::string>* components)
const;
+  // Returns true if all of the columns in the range partition key are unset in
+  // the row.
+  bool IsRangePartitionKeyEmpty(const KuduPartialRow& row) const;
 
   // Appends the stringified range partition components of a partial row to a
   // vector.


Mime
View raw message