kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From danburk...@apache.org
Subject kudu git commit: KUDU-2055 [part 2]: Add util to construct sorted disjoint interval list
Date Thu, 28 Sep 2017 23:49:53 GMT
Repository: kudu
Updated Branches:
  refs/heads/master 54aef924a -> aeb1d5389


KUDU-2055 [part 2]: Add util to construct sorted disjoint interval list

This patch adds a utility to construct a sorted disjoint interval list
in-place given a list of intervals. The operation to construct such one
is O(nlg n + n) where 'n' is the number of intervals. This util can be
used when log block manager coalesces block deletions belonging to the
same container.

For example, given the input interval list:
   [------2-------)         [-----1-----)
       [--3--)    [---5--)    [----4----)

The output sorted disjoint interval list is:
   [----------1----------)  [-----2-----)

It also adds unit test to verify given overlapped, duplicate, invalid
intervals, the implementation works as expected.

Change-Id: I61a813c047be4882f246eaf404598e7e18fcac87
Reviewed-on: http://gerrit.cloudera.org:8080/8041
Tested-by: Kudu Jenkins
Reviewed-by: Dan Burkert <danburkert@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/aeb1d538
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/aeb1d538
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/aeb1d538

Branch: refs/heads/master
Commit: aeb1d53892fee01b93cff304cc91a95268eb7a74
Parents: 54aef92
Author: hahao <hao.hao@cloudera.com>
Authored: Thu Sep 21 14:12:16 2017 -0700
Committer: Dan Burkert <danburkert@apache.org>
Committed: Thu Sep 28 23:49:32 2017 +0000

----------------------------------------------------------------------
 src/kudu/util/CMakeLists.txt                    |  1 +
 .../util/sorted_disjoint_interval_list-test.cc  | 98 ++++++++++++++++++++
 src/kudu/util/sorted_disjoint_interval_list.h   | 95 +++++++++++++++++++
 3 files changed, 194 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/aeb1d538/src/kudu/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/util/CMakeLists.txt b/src/kudu/util/CMakeLists.txt
index 8f58a26..e862aea 100644
--- a/src/kudu/util/CMakeLists.txt
+++ b/src/kudu/util/CMakeLists.txt
@@ -381,6 +381,7 @@ ADD_KUDU_TEST(rwc_lock-test)
 ADD_KUDU_TEST(safe_math-test)
 ADD_KUDU_TEST(scoped_cleanup-test)
 ADD_KUDU_TEST(slice-test)
+ADD_KUDU_TEST(sorted_disjoint_interval_list-test)
 ADD_KUDU_TEST(spinlock_profiling-test)
 ADD_KUDU_TEST(stack_watchdog-test)
 ADD_KUDU_TEST(status-test)

http://git-wip-us.apache.org/repos/asf/kudu/blob/aeb1d538/src/kudu/util/sorted_disjoint_interval_list-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/sorted_disjoint_interval_list-test.cc b/src/kudu/util/sorted_disjoint_interval_list-test.cc
new file mode 100644
index 0000000..8e0fe70
--- /dev/null
+++ b/src/kudu/util/sorted_disjoint_interval_list-test.cc
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <utility>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "kudu/util/sorted_disjoint_interval_list.h"
+#include "kudu/util/status.h"
+#include "kudu/util/test_macros.h"
+#include "kudu/util/test_util.h"
+
+using std::vector;
+
+namespace kudu {
+
+class TestSortedDisjointIntervalList : public KuduTest {
+};
+
+typedef int PointType;
+typedef std::pair<PointType, PointType> ClosedInterval;
+
+TEST_F(TestSortedDisjointIntervalList, TestBasic) {
+  // Coalesce an empty interval list.
+  vector<ClosedInterval> intervals = {};
+  ASSERT_OK(CoalesceIntervals<PointType>(&intervals));
+  vector<ClosedInterval> expected = {};
+  ASSERT_EQ(expected, intervals);
+
+  // Coalesce an interval list with length 0 interval.
+  intervals = {{26, 26}};
+  ASSERT_OK(CoalesceIntervals<PointType>(&intervals));
+  expected = {{26, 26}};
+  ASSERT_EQ(expected, intervals);
+
+  // Coalesce an interval list with a single interval.
+  intervals = {{33, 69}};
+  ASSERT_OK(CoalesceIntervals<PointType>(&intervals));
+  expected = {{33, 69}};
+  ASSERT_EQ(expected, intervals);
+
+  // Coalesce an interval list with adjacent ranges.
+  intervals = {{4, 7}, {3, 4}, {1, 2}, {-23, 1}};
+  ASSERT_OK(CoalesceIntervals<PointType>(&intervals));
+  expected = {{-23, 2}, {3, 7}};
+  ASSERT_EQ(expected, intervals);
+}
+
+TEST_F(TestSortedDisjointIntervalList, TestOverlappedIntervals) {
+  vector<ClosedInterval> intervals = {{4, 7}, {3, 9}};
+  ASSERT_OK(CoalesceIntervals<PointType>(&intervals));
+  vector<ClosedInterval> expected = {{3, 9}};
+  ASSERT_EQ(expected, intervals);
+
+  intervals = {{4, 7}, {3, 9}, {-23, 1},
+               {4, 350}, {369, 400}};
+  ASSERT_OK(CoalesceIntervals<PointType>(&intervals));
+  expected = {{-23, 1}, {3, 350}, {369, 400}};
+  ASSERT_EQ(expected, intervals);
+}
+
+TEST_F(TestSortedDisjointIntervalList, TestDuplicateIntervals) {
+  vector<ClosedInterval> intervals = {{1, 2}, {4, 7},
+                                      {1, 2}, {1, 2}};
+  ASSERT_OK(CoalesceIntervals<PointType>(&intervals));
+  const vector<ClosedInterval> expected = {{1, 2}, {4, 7}};
+  ASSERT_EQ(expected, intervals);
+}
+
+TEST_F(TestSortedDisjointIntervalList, TestInvalidIntervals) {
+  vector<ClosedInterval> intervals = {{1, 2}, {10, 2},
+                                      {4, 7}, {40, 7}};
+  ASSERT_TRUE(CoalesceIntervals<PointType>(&intervals).IsInvalidArgument());
+}
+
+TEST_F(TestSortedDisjointIntervalList, TestSingleElementIntervals) {
+  vector<ClosedInterval> intervals = {{0, 0}, {0, 1}, {1, 2}};
+  ASSERT_OK(CoalesceIntervals<PointType>(&intervals));
+  const vector<ClosedInterval> expected = {{0, 2}};
+  ASSERT_EQ(expected, intervals);
+}
+
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/aeb1d538/src/kudu/util/sorted_disjoint_interval_list.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/sorted_disjoint_interval_list.h b/src/kudu/util/sorted_disjoint_interval_list.h
new file mode 100644
index 0000000..d3180a9
--- /dev/null
+++ b/src/kudu/util/sorted_disjoint_interval_list.h
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include <glog/logging.h>
+
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/status.h"
+
+namespace kudu {
+
+// Constructs a sorted disjoint interval list in-place given a list of intervals.
+// The result is written back to the given 'intervals'.
+//
+// Returns an error if the list contains any invalid intervals.
+//
+// Sorted disjoint interval list is a data structure holding a group of sorted
+// non-overlapping ranges. The operation to construct such one is O(nlg n + n)
+// where 'n' is the number of intervals.
+//
+// For example, given the input interval list:
+//
+//   [------2-------)         [-----1-----)
+//       [--3--)    [---5--)    [----4----)
+//
+// The output sorted disjoint interval list:
+//
+//   [----------1----------)  [-----2-----)
+//
+//
+// This method assumes that all intervals are "half-open" intervals -- the
+// intervals are inclusive of their start point and exclusive of end point,
+// e.g., [3, 6). Note that interval with the same start and end point is
+// considered to be valid in this implementation.
+// It also assumes 'PointType' has a proper defined comparator.
+template<typename PointType>
+Status CoalesceIntervals(std::vector<std::pair<PointType, PointType>>* intervals)
{
+  if (intervals->empty()) return Status::OK();
+
+  // Sort the intervals to prepare for coalescing overlapped ranges.
+  for (const auto& interval : *intervals) {
+    if (interval.first > interval.second) {
+      return Status::InvalidArgument(strings::Substitute("invalid interval: [$0, $1)",
+                                                         interval.first,
+                                                         interval.second));
+    }
+  }
+  std::sort(intervals->begin(), intervals->end());
+
+  // Traverse the intervals to coalesce overlapped intervals. During the process,
+  // uses 'head', 'tail' to track the start and end point of the current disjoint
+  // interval.
+  auto head = intervals->begin();
+  auto tail = head;
+  while (++tail != intervals->end()) {
+    // If interval 'head' and 'tail' overlap with each other, coalesce them and move
+    // to next. Otherwise, the two intervals are disjoint.
+    if (head->second >= tail->first) {
+      if (tail->second > head->second) head->second = std::move(tail->second);
+    } else {
+      // The two intervals are disjoint. If the 'head' previously already coalesced
+      // some intervals, 'head' and 'tail' will not be adjacent. If so, move 'tail'
+      // to the next 'head' to make sure we do not include any of the previously-coalesced
+      // intervals.
+      ++head;
+      if (head != tail) *head = std::move(*tail);
+    }
+  }
+
+  // Truncate the rest useless elements, if any.
+  intervals->erase(++head, tail);
+  return Status::OK();
+}
+
+} // namespace kudu


Mime
View raw message