hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From te...@apache.org
Subject hbase git commit: HBASE-19051 Add new split algorithm for num string
Date Sat, 21 Oct 2017 15:22:48 GMT
Repository: hbase
Updated Branches:
  refs/heads/branch-1 4bf71c3a1 -> ae6a95165


HBASE-19051 Add new split algorithm for num string

Signed-off-by: tedyu <yuzhihong@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/ae6a9516
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/ae6a9516
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/ae6a9516

Branch: refs/heads/branch-1
Commit: ae6a951658c2c12952c5f08d5cf3b021fd9a8d5f
Parents: 4bf71c3
Author: xiaowen147 <xiaowen147@gmail.com>
Authored: Sat Oct 21 09:46:07 2017 +0800
Committer: tedyu <yuzhihong@gmail.com>
Committed: Sat Oct 21 08:22:36 2017 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/util/RegionSplitter.java       | 80 +++++++++++++++-----
 .../hadoop/hbase/util/TestRegionSplitter.java   | 58 +++++++++++++-
 2 files changed, 118 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/ae6a9516/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java
index a5dcaa5..d0f01f8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java
@@ -258,6 +258,12 @@ public class RegionSplitter {
    * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs
    * myTable HexStringSplit
    * </ul>
+   * <li>create a table named 'myTable' with 50 pre-split regions,
+   * assuming the keys are decimal-encoded ASCII:
+   * <ul>
+   * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 50
+   * myTable DecimalStringSplit
+   * </ul>
    * <li>perform a rolling split of 'myTable' (i.e. 60 =&gt; 120 regions), # 2
    * outstanding splits at a time, assuming keys are uniformly distributed
    * bytes:
@@ -267,9 +273,9 @@ public class RegionSplitter {
    * </ul>
    * </ul>
    *
-   * There are two SplitAlgorithms built into RegionSplitter, HexStringSplit
-   * and UniformSplit. These are different strategies for choosing region
-   * boundaries. See their source code for details.
+   * There are three SplitAlgorithms built into RegionSplitter, HexStringSplit,
+   * DecimalStringSplit, and UniformSplit. These are different strategies for
+   * choosing region boundaries. See their source code for details.
    *
    * @param args
    *          Usage: RegionSplitter &lt;TABLE&gt; &lt;SPLITALGORITHM&gt;
@@ -337,9 +343,10 @@ public class RegionSplitter {
     if (2 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption("h")) {
       new HelpFormatter().printHelp("RegionSplitter <TABLE> <SPLITALGORITHM>\n"+
           "SPLITALGORITHM is a java class name of a class implementing " +
-          "SplitAlgorithm, or one of the special strings HexStringSplit " +
-          "or UniformSplit, which are built-in split algorithms. " +
+          "SplitAlgorithm, or one of the special strings HexStringSplit or " +
+          "DecimalStringSplit or UniformSplit, which are built-in split algorithms. " +
           "HexStringSplit treats keys as hexadecimal ASCII, and " +
+          "DecimalStringSplit treats keys as decimal ASCII, and " +
           "UniformSplit treats keys as arbitrary bytes.", opt);
       return;
     }
@@ -644,6 +651,8 @@ public class RegionSplitter {
     // their simple class name instead of a fully qualified class name.
     if(splitClassName.equals(HexStringSplit.class.getSimpleName())) {
       splitClass = HexStringSplit.class;
+    } else if (splitClassName.equals(DecimalStringSplit.class.getSimpleName())) {
+      splitClass = DecimalStringSplit.class;
     } else if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
       splitClass = UniformSplit.class;
     } else {
@@ -877,15 +886,52 @@ public class RegionSplitter {
    * Since this split algorithm uses hex strings as keys, it is easy to read &amp;
    * write in the shell but takes up more space and may be non-intuitive.
    */
-  public static class HexStringSplit implements SplitAlgorithm {
+  public static class HexStringSplit extends NumberStringSplit {
     final static String DEFAULT_MIN_HEX = "00000000";
     final static String DEFAULT_MAX_HEX = "FFFFFFFF";
+    final static int RADIX_HEX = 16;
+
+    public HexStringSplit() {
+      super(DEFAULT_MIN_HEX, DEFAULT_MAX_HEX, RADIX_HEX);
+    }
 
-    String firstRow = DEFAULT_MIN_HEX;
-    BigInteger firstRowInt = BigInteger.ZERO;
-    String lastRow = DEFAULT_MAX_HEX;
-    BigInteger lastRowInt = new BigInteger(lastRow, 16);
-    int rowComparisonLength = lastRow.length();
+  }
+
+  /**
+   * The format of a DecimalStringSplit region boundary is the ASCII representation of
+   * reversed sequential number, or any other uniformly distributed decimal value.
+   * Row are decimal-encoded long values in the range
+   * <b>"00000000" =&gt; "99999999"</b> and are left-padded with zeros to
keep the
+   * same order lexicographically as if they were binary.
+   */
+  public static class DecimalStringSplit extends NumberStringSplit {
+    final static String DEFAULT_MIN_DEC = "00000000";
+    final static String DEFAULT_MAX_DEC = "99999999";
+    final static int RADIX_DEC = 10;
+
+    public DecimalStringSplit() {
+      super(DEFAULT_MIN_DEC, DEFAULT_MAX_DEC, RADIX_DEC);
+    }
+
+  }
+
+  public abstract static class NumberStringSplit implements SplitAlgorithm {
+
+    String firstRow;
+    BigInteger firstRowInt;
+    String lastRow;
+    BigInteger lastRowInt;
+    int rowComparisonLength;
+    int radix;
+
+    NumberStringSplit(String minRow, String maxRow, int radix) {
+      this.firstRow = minRow;
+      this.lastRow = maxRow;
+      this.radix = radix;
+      this.firstRowInt = BigInteger.ZERO;
+      this.lastRowInt = new BigInteger(lastRow, this.radix);
+      this.rowComparisonLength = lastRow.length();
+    }
 
     public byte[] split(byte[] start, byte[] end) {
       BigInteger s = convertToBigInteger(start);
@@ -924,18 +970,18 @@ public class RegionSplitter {
 
     public void setFirstRow(String userInput) {
       firstRow = userInput;
-      firstRowInt = new BigInteger(firstRow, 16);
+      firstRowInt = new BigInteger(firstRow, radix);
     }
 
     public void setLastRow(String userInput) {
       lastRow = userInput;
-      lastRowInt = new BigInteger(lastRow, 16);
+      lastRowInt = new BigInteger(lastRow, radix);
       // Precondition: lastRow > firstRow, so last's length is the greater
       rowComparisonLength = lastRow.length();
     }
 
     public byte[] strToRow(String in) {
-      return convertToByte(new BigInteger(in, 16));
+      return convertToByte(new BigInteger(in, radix));
     }
 
     public String rowToStr(byte[] row) {
@@ -988,8 +1034,8 @@ public class RegionSplitter {
      * @param pad padding length
      * @return byte corresponding to input BigInteger
      */
-    public static byte[] convertToByte(BigInteger bigInteger, int pad) {
-      String bigIntegerString = bigInteger.toString(16);
+    public byte[] convertToByte(BigInteger bigInteger, int pad) {
+      String bigIntegerString = bigInteger.toString(radix);
       bigIntegerString = StringUtils.leftPad(bigIntegerString, pad, '0');
       return Bytes.toBytes(bigIntegerString);
     }
@@ -1011,7 +1057,7 @@ public class RegionSplitter {
      * @return the corresponding BigInteger
      */
     public BigInteger convertToBigInteger(byte[] row) {
-      return (row.length > 0) ? new BigInteger(Bytes.toString(row), 16)
+      return (row.length > 0) ? new BigInteger(Bytes.toString(row), radix)
           : BigInteger.ZERO;
     }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/ae6a9516/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java
index e343588..0b2f216 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.util.RegionSplitter.HexStringSplit;
+import org.apache.hadoop.hbase.util.RegionSplitter.DecimalStringSplit;
 import org.apache.hadoop.hbase.util.RegionSplitter.SplitAlgorithm;
 import org.apache.hadoop.hbase.util.RegionSplitter.UniformSplit;
 import org.junit.AfterClass;
@@ -137,7 +138,7 @@ public class TestRegionSplitter {
 
         byte[][] twoRegionsSplits = splitter.split(2);
         assertEquals(1, twoRegionsSplits.length);
-    assertArrayEquals(twoRegionsSplits[0], "80000000".getBytes());
+        assertArrayEquals("80000000".getBytes(), twoRegionsSplits[0]);
 
         byte[][] threeRegionsSplits = splitter.split(3);
         assertEquals(2, threeRegionsSplits.length);
@@ -157,11 +158,53 @@ public class TestRegionSplitter {
 
         // Halfway between 00... and 20... should be 10...
         splitPoint = splitter.split(firstRow, "20000000".getBytes());
-        assertArrayEquals(splitPoint, "10000000".getBytes());
+        assertArrayEquals("10000000".getBytes(), splitPoint);
 
         // Halfway between df... and ff... should be ef....
         splitPoint = splitter.split("dfffffff".getBytes(), lastRow);
-        assertArrayEquals(splitPoint,"efffffff".getBytes());
+        assertArrayEquals("efffffff".getBytes(), splitPoint);
+    }
+
+    /**
+     * Unit tests for the DecimalStringSplit algorithm. Makes sure it divides up the
+     * space of keys in the way that we expect.
+     */
+    @Test
+    public void unitTestDecimalStringSplit() {
+        DecimalStringSplit splitter = new DecimalStringSplit();
+        // Check splitting while starting from scratch
+
+        byte[][] twoRegionsSplits = splitter.split(2);
+        assertEquals(1, twoRegionsSplits.length);
+        assertArrayEquals("50000000".getBytes(), twoRegionsSplits[0]);
+
+        byte[][] threeRegionsSplits = splitter.split(3);
+        assertEquals(2, threeRegionsSplits.length);
+        byte[] expectedSplit0 = "33333333".getBytes();
+        assertArrayEquals(expectedSplit0, threeRegionsSplits[0]);
+        byte[] expectedSplit1 = "66666666".getBytes();
+        assertArrayEquals(expectedSplit1, threeRegionsSplits[1]);
+
+        // Check splitting existing regions that have start and end points
+        byte[] splitPoint = splitter.split("10000000".getBytes(), "30000000".getBytes());
+        assertArrayEquals("20000000".getBytes(), splitPoint);
+
+        byte[] lastRow = "99999999".getBytes();
+        assertArrayEquals(lastRow, splitter.lastRow());
+        byte[] firstRow = "00000000".getBytes();
+        assertArrayEquals(firstRow, splitter.firstRow());
+
+        // Halfway between 00... and 20... should be 10...
+        splitPoint = splitter.split(firstRow, "20000000".getBytes());
+        assertArrayEquals("10000000".getBytes(), splitPoint);
+
+        // Halfway between 00... and 19... should be 09...
+        splitPoint = splitter.split(firstRow, "19999999".getBytes());
+        assertArrayEquals("09999999".getBytes(), splitPoint);
+
+        // Halfway between 79... and 99... should be 89....
+        splitPoint = splitter.split("79999999".getBytes(), lastRow);
+        assertArrayEquals("89999999".getBytes(), splitPoint);
     }
 
     /**
@@ -223,6 +266,15 @@ public class TestRegionSplitter {
     assertFalse(splitFailsPrecondition(algo, "0", "A", 11)); // should be fine
     assertTrue(splitFailsPrecondition(algo, "0", "A", 12)); // too granular
 
+    algo = new DecimalStringSplit();
+    assertFalse(splitFailsPrecondition(algo)); // default settings are fine
+    assertFalse(splitFailsPrecondition(algo, "00", "99")); // custom is fine
+    assertTrue(splitFailsPrecondition(algo, "99", "00")); // range error
+    assertTrue(splitFailsPrecondition(algo, "99", "99")); // range error
+    assertFalse(splitFailsPrecondition(algo, "0", "2", 3)); // should be fine
+    assertFalse(splitFailsPrecondition(algo, "0", "9", 10)); // should be fine
+    assertTrue(splitFailsPrecondition(algo, "0", "9", 11)); // too granular
+
     algo = new UniformSplit();
     assertFalse(splitFailsPrecondition(algo)); // default settings are fine
     assertFalse(splitFailsPrecondition(algo, "\\x00", "\\xAA")); // custom is fine


Mime
View raw message