This is an automated email from the ASF dual-hosted git repository.
elserj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push:
new 2bae04f HBASE-22144 Correct MultiRowRangeFilter to work with reverse scans
2bae04f is described below
commit 2bae04fb2be7db30fdf4b974fa95fff137245eef
Author: Josh Elser <elserj@apache.org>
AuthorDate: Mon Apr 1 18:21:12 2019 -0400
HBASE-22144 Correct MultiRowRangeFilter to work with reverse scans
Signed-off-by: Toshihiro Suzuki <brfrn169@gmail.com>
---
.../hadoop/hbase/filter/MultiRowRangeFilter.java | 346 ++++++++++++++++-----
.../hbase/filter/TestMultiRowRangeFilter.java | 136 +++++++-
2 files changed, 408 insertions(+), 74 deletions(-)
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java
b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java
index 7692be0..32e22d8 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/MultiRowRangeFilter.java
@@ -51,21 +51,29 @@ import org.apache.hadoop.hbase.util.Bytes;
@InterfaceAudience.Public
public class MultiRowRangeFilter extends FilterBase {
- private List<RowRange> rangeList;
-
private static final int ROW_BEFORE_FIRST_RANGE = -1;
- private boolean EXCLUSIVE = false;
+
+ private final List<RowRange> rangeList;
+ private final RangeIteration ranges;
+
private boolean done = false;
- private boolean initialized = false;
private int index;
- private RowRange range;
+ private BasicRowRange range;
private ReturnCode currentReturnCode;
/**
* @param list A list of <code>RowRange</code>
*/
public MultiRowRangeFilter(List<RowRange> list) {
- this.rangeList = sortAndMerge(list);
+ // We don't use rangeList anywhere else, but keeping it lets us pay a little
+ // memory to avoid touching the serialization logic.
+ this.rangeList = Collections.unmodifiableList(sortAndMerge(list));
+ this.ranges = new RangeIteration(rangeList);
+ }
+
+ public List<RowRange> getRowRanges() {
+ // Used by hbase-rest
+ return this.rangeList;
}
@Override
@@ -73,13 +81,17 @@ public class MultiRowRangeFilter extends FilterBase {
return done;
}
- public List<RowRange> getRowRanges() {
- return this.rangeList;
- }
-
@Override
public boolean filterRowKey(Cell firstRowCell) {
if (filterAllRemaining()) return true;
+
+ // N.b. We can only do this after we're iterating over records. If we try to do
+ // it before, the Scan (and this filter) may not yet be fully initialized. This is a
+ // wart on Filter and something that'd be nice to clean up (like CP's in HBase2.0)
+ if (!ranges.isInitialized()) {
+ ranges.initialize(isReversed());
+ }
+
// If it is the first time of running, calculate the current range index for
// the row key. If index is out of bound which happens when the start row
// user sets is after the largest stop row of the ranges, stop the scan.
@@ -87,32 +99,31 @@ public class MultiRowRangeFilter extends FilterBase {
byte[] rowArr = firstRowCell.getRowArray();
int length = firstRowCell.getRowLength();
int offset = firstRowCell.getRowOffset();
- if (!initialized
- || !range.contains(rowArr, offset, length)) {
+ if (!ranges.hasFoundFirstRange() || !range.contains(rowArr, offset, length)) {
byte[] rowkey = CellUtil.cloneRow(firstRowCell);
- index = getNextRangeIndex(rowkey);
- if (index >= rangeList.size()) {
+ index = ranges.getNextRangeIndex(rowkey);
+ if (ranges.isIterationComplete(index)) {
done = true;
currentReturnCode = ReturnCode.NEXT_ROW;
return false;
}
if(index != ROW_BEFORE_FIRST_RANGE) {
- range = rangeList.get(index);
+ range = ranges.get(index);
} else {
- range = rangeList.get(0);
+ range = ranges.get(0);
}
- if (EXCLUSIVE) {
- EXCLUSIVE = false;
+ if (ranges.isExclusive()) {
+ ranges.resetExclusive();
currentReturnCode = ReturnCode.NEXT_ROW;
return false;
}
- if (!initialized) {
+ if (!ranges.hasFoundFirstRange()) {
if(index != ROW_BEFORE_FIRST_RANGE) {
currentReturnCode = ReturnCode.INCLUDE;
} else {
currentReturnCode = ReturnCode.SEEK_NEXT_USING_HINT;
}
- initialized = true;
+ ranges.setFoundFirstRange();
} else {
if (range.contains(rowArr, offset, length)) {
currentReturnCode = ReturnCode.INCLUDE;
@@ -140,8 +151,9 @@ public class MultiRowRangeFilter extends FilterBase {
@Override
public Cell getNextCellHint(Cell currentKV) {
// skip to the next range's start row
- return PrivateCellUtil.createFirstOnRow(range.startRow, 0,
- (short) range.startRow.length);
+ // #getComparisonData lets us avoid the `if (reversed)` branch
+ byte[] comparisonData = range.getComparisonData();
+ return PrivateCellUtil.createFirstOnRow(comparisonData, 0, (short) comparisonData.length);
}
/**
@@ -220,37 +232,6 @@ public class MultiRowRangeFilter extends FilterBase {
}
/**
- * calculate the position where the row key in the ranges list.
- *
- * @param rowKey the row key to calculate
- * @return index the position of the row key
- */
- private int getNextRangeIndex(byte[] rowKey) {
- RowRange temp = new RowRange(rowKey, true, null, true);
- int index = Collections.binarySearch(rangeList, temp);
- if (index < 0) {
- int insertionPosition = -index - 1;
- // check if the row key in the range before the insertion position
- if (insertionPosition != 0 && rangeList.get(insertionPosition - 1).contains(rowKey))
{
- return insertionPosition - 1;
- }
- // check if the row key is before the first range
- if (insertionPosition == 0 && !rangeList.get(insertionPosition).contains(rowKey))
{
- return ROW_BEFORE_FIRST_RANGE;
- }
- if (!initialized) {
- initialized = true;
- }
- return insertionPosition;
- }
- // the row key equals one of the start keys, and the the range exclude the start key
- if(rangeList.get(index).startRowInclusive == false) {
- EXCLUSIVE = true;
- }
- return index;
- }
-
- /**
* sort the ranges and if the ranges with overlap, then merge them.
*
* @param ranges the list of ranges to sort and merge.
@@ -420,21 +401,20 @@ public class MultiRowRangeFilter extends FilterBase {
throw new IllegalArgumentException(sb.toString());
}
- @InterfaceAudience.Public
- public static class RowRange implements Comparable<RowRange> {
- private byte[] startRow;
- private boolean startRowInclusive = true;
- private byte[] stopRow;
- private boolean stopRowInclusive = false;
+ private static abstract class BasicRowRange implements Comparable<BasicRowRange>
{
+ protected byte[] startRow;
+ protected boolean startRowInclusive = true;
+ protected byte[] stopRow;
+ protected boolean stopRowInclusive = false;
- public RowRange() {
+ public BasicRowRange() {
}
/**
* If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin
at the
* start row of the table. If the stopRow is empty or null, set it to
* HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table.
*/
- public RowRange(String startRow, boolean startRowInclusive, String stopRow,
+ public BasicRowRange(String startRow, boolean startRowInclusive, String stopRow,
boolean stopRowInclusive) {
this((startRow == null || startRow.isEmpty()) ? HConstants.EMPTY_BYTE_ARRAY :
Bytes.toBytes(startRow), startRowInclusive,
@@ -442,7 +422,7 @@ public class MultiRowRangeFilter extends FilterBase {
Bytes.toBytes(stopRow), stopRowInclusive);
}
- public RowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow,
+ public BasicRowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow,
boolean stopRowInclusive) {
this.startRow = (startRow == null) ? HConstants.EMPTY_BYTE_ARRAY : startRow;
this.startRowInclusive = startRowInclusive;
@@ -500,13 +480,6 @@ public class MultiRowRangeFilter extends FilterBase {
}
}
- @Override
- @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS",
- justification="This compareTo is not of this Object, but of referenced RowRange")
- public int compareTo(RowRange other) {
- return Bytes.compareTo(this.startRow, other.startRow);
- }
-
public boolean isValid() {
return Bytes.equals(startRow, HConstants.EMPTY_BYTE_ARRAY)
|| Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY)
@@ -516,13 +489,13 @@ public class MultiRowRangeFilter extends FilterBase {
@Override
public boolean equals(Object obj){
- if (!(obj instanceof RowRange)) {
+ if (!(obj instanceof BasicRowRange)) {
return false;
}
if (this == obj) {
return true;
}
- RowRange rr = (RowRange) obj;
+ BasicRowRange rr = (BasicRowRange) obj;
return Bytes.equals(this.stopRow, rr.getStopRow()) &&
Bytes.equals(this.startRow, this.getStartRow()) &&
this.startRowInclusive == rr.isStartRowInclusive() &&
@@ -536,6 +509,235 @@ public class MultiRowRangeFilter extends FilterBase {
this.startRowInclusive,
this.stopRowInclusive);
}
+
+ /**
+ * Returns the data to be used to compare {@code this} to another object.
+ */
+ public abstract byte[] getComparisonData();
+
+ /**
+ * Returns whether the bounding row used for binary-search is inclusive or not.
+ *
+ * For forward scans, we would check the starRow, but we would check the stopRow for
+ * the reverse scan case.
+ */
+ public abstract boolean isSearchRowInclusive();
+
+ @Override
+ public int compareTo(BasicRowRange other) {
+ byte[] left;
+ byte[] right;
+ if (isAscendingOrder()) {
+ left = this.getComparisonData();
+ right = other.getComparisonData();
+ } else {
+ left = other.getComparisonData();
+ right = this.getComparisonData();
+ }
+ return Bytes.compareTo(left, right);
+ }
+
+ public abstract boolean isAscendingOrder();
+ }
+
+ /**
+ * Internal RowRange that reverses the sort-order to handle reverse scans.
+ */
+ @InterfaceAudience.Private
+ private static class ReversedRowRange extends BasicRowRange {
+ public ReversedRowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow,
+ boolean stopRowInclusive) {
+ super(startRow, startRowInclusive, stopRow, stopRowInclusive);
+ }
+
+ @Override
+ public byte[] getComparisonData() {
+ return this.stopRow;
+ }
+
+ @Override
+ public boolean isSearchRowInclusive() {
+ return this.stopRowInclusive;
+ }
+
+ @Override
+ public boolean isAscendingOrder() {
+ return false;
+ }
+ }
+
+ @InterfaceAudience.Public
+ public static class RowRange extends BasicRowRange {
+ public RowRange() {
+ }
+ /**
+ * If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin
at the
+ * start row of the table. If the stopRow is empty or null, set it to
+ * HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table.
+ */
+ public RowRange(String startRow, boolean startRowInclusive, String stopRow,
+ boolean stopRowInclusive) {
+ super(startRow, startRowInclusive, stopRow, stopRowInclusive);
+ }
+
+ public RowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow,
+ boolean stopRowInclusive) {
+ super(startRow, startRowInclusive, stopRow, stopRowInclusive);
+ }
+
+ @Override
+ public byte[] getComparisonData() {
+ return startRow;
+ }
+
+ @Override
+ public boolean isSearchRowInclusive() {
+ return startRowInclusive;
+ }
+
+ @Override
+ public boolean isAscendingOrder() {
+ return true;
+ }
+ }
+
+ /**
+ * Abstraction over the ranges of rows to return from this filter, regardless of forward
or
+ * reverse scans being used. This Filter can use this class, agnostic of iteration direction,
+ * as the same algorithm can be applied in both cases.
+ */
+ @InterfaceAudience.Private
+ private static class RangeIteration {
+ private boolean exclusive = false;
+ private boolean initialized = false;
+ private boolean foundFirstRange = false;
+ private boolean reversed = false;
+ private final List<RowRange> sortedAndMergedRanges;
+ private List<? extends BasicRowRange> ranges;
+
+ public RangeIteration(List<RowRange> sortedAndMergedRanges) {
+ this.sortedAndMergedRanges = sortedAndMergedRanges;
+ }
+
+ void initialize(boolean reversed) {
+ // Avoid double initialization
+ assert !this.initialized;
+ this.reversed = reversed;
+ if (reversed) {
+ // If we are doing a reverse scan, we can reverse the ranges (both the elements in
+ // the list as well as their start/stop key), and use the same algorithm.
+ this.ranges = flipAndReverseRanges(sortedAndMergedRanges);
+ } else {
+ this.ranges = sortedAndMergedRanges;
+ }
+ this.initialized = true;
+ }
+
+ /**
+ * Rebuilds the sorted ranges (by startKey) into an equivalent sorted list of ranges,
only by
+ * stopKey instead. Descending order and the ReversedRowRange compareTo implementation
make
+ * sure that we can use Collections.binarySearch().
+ */
+ static List<ReversedRowRange> flipAndReverseRanges(List<RowRange> ranges)
{
+ List<ReversedRowRange> flippedRanges = new ArrayList<>(ranges.size());
+ for (int i = ranges.size() - 1; i >= 0; i--) {
+ RowRange origRange = ranges.get(i);
+ ReversedRowRange newRowRange = new ReversedRowRange(
+ origRange.startRow, origRange.startRowInclusive, origRange.stopRow,
+ origRange.isStopRowInclusive());
+ flippedRanges.add(newRowRange);
+ }
+ return flippedRanges;
+ }
+
+ /**
+ * Calculates the position where the given rowkey fits in the ranges list.
+ *
+ * @param rowKey the row key to calculate
+ * @return index the position of the row key
+ */
+ public int getNextRangeIndex(byte[] rowKey) {
+ BasicRowRange temp;
+ if (reversed) {
+ temp = new ReversedRowRange(null, true, rowKey, true);
+ } else {
+ temp = new RowRange(rowKey, true, null, true);
+ }
+ // Because we make sure that `ranges` has the correct natural ordering (given it containing
+ // RowRange or ReverseRowRange objects). This keeps us from having to have two different
+ // implementations below.
+ final int index = Collections.binarySearch(ranges, temp);
+ if (index < 0) {
+ int insertionPosition = -index - 1;
+ // check if the row key in the range before the insertion position
+ if (insertionPosition != 0 && ranges.get(insertionPosition - 1).contains(rowKey))
{
+ return insertionPosition - 1;
+ }
+ // check if the row key is before the first range
+ if (insertionPosition == 0 && !ranges.get(insertionPosition).contains(rowKey))
{
+ return ROW_BEFORE_FIRST_RANGE;
+ }
+ if (!foundFirstRange) {
+ foundFirstRange = true;
+ }
+ return insertionPosition;
+ }
+ // the row key equals one of the start keys, and the the range exclude the start key
+ if(ranges.get(index).isSearchRowInclusive() == false) {
+ exclusive = true;
+ }
+ return index;
+ }
+
+ /**
+ * Sets {@link #foundFirstRange} to {@code true}, indicating that we found a matching
row range.
+ */
+ public void setFoundFirstRange() {
+ this.foundFirstRange = true;
+ }
+
+ /**
+ * Gets the RowRange at the given offset.
+ */
+ @SuppressWarnings("unchecked")
+ public <T extends BasicRowRange> T get(int i) {
+ return (T) ranges.get(i);
+ }
+
+ /**
+ * Returns true if the first matching row range was found.
+ */
+ public boolean hasFoundFirstRange() {
+ return foundFirstRange;
+ }
+
+ /**
+ * Returns true if the current range's key is exclusive
+ */
+ public boolean isExclusive() {
+ return exclusive;
+ }
+
+ /**
+ * Resets the exclusive flag.
+ */
+ public void resetExclusive() {
+ exclusive = false;
+ }
+
+ /**
+ * Returns true if this class has been initialized by calling {@link #initialize(boolean)}.
+ */
+ public boolean isInitialized() {
+ return initialized;
+ }
+
+ /**
+ * Returns true if we exhausted searching all row ranges.
+ */
+ public boolean isIterationComplete(int index) {
+ return index >= ranges.size();
+ }
}
@Override
@@ -545,6 +747,6 @@ public class MultiRowRangeFilter extends FilterBase {
@Override
public int hashCode() {
- return Objects.hash(this.rangeList);
+ return Objects.hash(this.ranges);
}
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java
index 695b9f5c..4cfc02c 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestMultiRowRangeFilter.java
@@ -507,6 +507,134 @@ public class TestMultiRowRangeFilter {
ht.close();
}
+ @Test
+ public void testReverseMultiRowRangeFilterWithinTable() throws IOException {
+ tableName = TableName.valueOf(name.getMethodName());
+ Table ht = TEST_UTIL.createTable(tableName, family);
+ generateRows(numRows, ht, family, qf, value);
+
+ Scan scan = new Scan();
+ scan.setReversed(true);
+ List<RowRange> ranges = Arrays.asList(
+ new RowRange(Bytes.toBytes(20), true, Bytes.toBytes(30), true),
+ new RowRange(Bytes.toBytes(50), true, Bytes.toBytes(60), true)
+ );
+ MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges);
+ scan.setFilter(filter);
+
+ List<Integer> expectedResults = new ArrayList<>();
+ for (int i = 60; i >= 50; i--) {
+ expectedResults.add(i);
+ }
+ for (int i = 30; i >= 20; i--) {
+ expectedResults.add(i);
+ }
+
+ List<Cell> results = getResults(ht, scan);
+ List<Integer> actualResults = new ArrayList<>();
+ StringBuilder sb = new StringBuilder();
+ for (Cell result : results) {
+ int observedValue = Bytes.toInt(
+ result.getRowArray(), result.getRowOffset(), result.getRowLength());
+ actualResults.add(observedValue);
+ if (sb.length() > 0) {
+ sb.append(", ");
+ }
+ sb.append(observedValue);
+ }
+ assertEquals("Saw results: " + sb.toString(), 22, results.size());
+ }
+
+ @Test
+ public void testReverseMultiRowRangeFilterIncludingMaxRow() throws IOException {
+ tableName = TableName.valueOf(name.getMethodName());
+ Table ht = TEST_UTIL.createTable(tableName, family);
+ for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) {
+ byte[] row = Bytes.toBytes(rowkey);
+ Put p = new Put(row);
+ p.addColumn(family, qf, value);
+ ht.put(p);
+ }
+ TEST_UTIL.flush();
+
+ Scan scan = new Scan();
+ scan.setReversed(true);
+ List<RowRange> ranges = Arrays.asList(
+ new RowRange(Bytes.toBytes("b"), true, Bytes.toBytes("c"), true),
+ new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("h"), true)
+ );
+ MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges);
+ scan.setFilter(filter);
+
+ List<String> expected = Arrays.asList("h", "g", "f", "c", "b");
+ List<String> actual = new ArrayList<>();
+ for (Cell cell : getResults(ht, scan)) {
+ actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
+ }
+
+ assertEquals(expected, actual);
+ }
+
+ @Test
+ public void testReverseMultiRowRangeFilterIncludingMinRow() throws IOException {
+ tableName = TableName.valueOf(name.getMethodName());
+ Table ht = TEST_UTIL.createTable(tableName, family);
+ for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) {
+ byte[] row = Bytes.toBytes(rowkey);
+ Put p = new Put(row);
+ p.addColumn(family, qf, value);
+ ht.put(p);
+ }
+ TEST_UTIL.flush();
+
+ Scan scan = new Scan();
+ scan.setReversed(true);
+ List<RowRange> ranges = Arrays.asList(
+ new RowRange(Bytes.toBytes("a"), true, Bytes.toBytes("c"), true),
+ new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("g"), true)
+ );
+ MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges);
+ scan.setFilter(filter);
+
+ List<String> expected = Arrays.asList("g", "f", "c", "b", "a");
+ List<String> actual = new ArrayList<>();
+ for (Cell cell : getResults(ht, scan)) {
+ actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
+ }
+
+ assertEquals(expected, actual);
+ }
+
+ @Test
+ public void testReverseMultiRowRangeFilterIncludingMinAndMaxRow() throws IOException {
+ tableName = TableName.valueOf(name.getMethodName());
+ Table ht = TEST_UTIL.createTable(tableName, family);
+ for (String rowkey : Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h")) {
+ byte[] row = Bytes.toBytes(rowkey);
+ Put p = new Put(row);
+ p.addColumn(family, qf, value);
+ ht.put(p);
+ }
+ TEST_UTIL.flush();
+
+ Scan scan = new Scan();
+ scan.setReversed(true);
+ List<RowRange> ranges = Arrays.asList(
+ new RowRange(Bytes.toBytes("a"), true, Bytes.toBytes("c"), true),
+ new RowRange(Bytes.toBytes("f"), true, Bytes.toBytes("h"), true)
+ );
+ MultiRowRangeFilter filter = new MultiRowRangeFilter(ranges);
+ scan.setFilter(filter);
+
+ List<String> expected = Arrays.asList("h", "g", "f", "c", "b", "a");
+ List<String> actual = new ArrayList<>();
+ for (Cell cell : getResults(ht, scan)) {
+ actual.add(Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
+ }
+
+ assertEquals(expected, actual);
+ }
+
private void generateRows(int numberOfRows, Table ht, byte[] family, byte[] qf, byte[]
value)
throws IOException {
for (int i = 0; i < numberOfRows; i++) {
@@ -539,7 +667,7 @@ public class TestMultiRowRangeFilter {
return kvList;
}
- private int getResultsSize(Table ht, Scan scan) throws IOException {
+ private List<Cell> getResults(Table ht, Scan scan) throws IOException {
ResultScanner scanner = ht.getScanner(scan);
List<Cell> results = new ArrayList<>();
Result r;
@@ -549,6 +677,10 @@ public class TestMultiRowRangeFilter {
}
}
scanner.close();
- return results.size();
+ return results;
+ }
+
+ private int getResultsSize(Table ht, Scan scan) throws IOException {
+ return getResults(ht, scan).size();
}
}
|