hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nspiegelb...@apache.org
Subject svn commit: r1186575 - in /hbase/trunk: CHANGES.txt src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java
Date Thu, 20 Oct 2011 00:09:58 GMT
Author: nspiegelberg
Date: Thu Oct 20 00:09:57 2011
New Revision: 1186575

URL: http://svn.apache.org/viewvc?rev=1186575&view=rev
Log:
HBASE-4489 Better key splitting in RegionSplitter

Added:
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java
Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java

Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1186575&r1=1186574&r2=1186575&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Thu Oct 20 00:09:57 2011
@@ -17,6 +17,7 @@ Release 0.93.0 - Unreleased
    HBASE-4102  atomicAppend: A put that appends to the latest version of a cell (Lars H)
    HBASE-4469  Avoid top row seek by looking up bloomfilter (liyin via jgray)
    HBASE-4418  Show all the hbase configuration in the web ui
+   HBASE-4489  Better key splitting in RegionSplitter
 
   BUG FIXES
    HBASE-4488  Store could miss rows during flush (Lars H via jgray)

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java?rev=1186575&r1=1186574&r2=1186575&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java Thu Oct 20
00:09:57 2011
@@ -35,6 +35,7 @@ import org.apache.commons.cli.HelpFormat
 import org.apache.commons.cli.OptionBuilder;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
+import org.apache.commons.lang.ArrayUtils;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -138,10 +139,11 @@ public class RegionSplitter {
   /**
    * A generic interface for the RegionSplitter code to use for all it's
    * functionality. Note that the original authors of this code use
-   * {@link MD5StringSplit} to partition their table and set it as default, but
+   * {@link HexStringSplit} to partition their table and set it as default, but
    * provided this for your custom algorithm. To use, create a new derived class
-   * from this interface and call the RegionSplitter class with the argument: <br>
-   * <b>-D split.algorithm=<your_class_path></b>
+   * from this interface and call {@link RegionSplitter#createPresplitTable} or
+   * {@link RegionSplitter#rollingSplit(String, String, Configuration)} with the
+   * argument splitClassName giving the name of your class.
    */
   public static interface SplitAlgorithm {
     /**
@@ -158,12 +160,13 @@ public class RegionSplitter {
     /**
      * Split an entire table.
      *
-     * @param numberOfSplits
+     * @param numRegions
      *          number of regions to split the table into
      *
-     * @return array of split keys for the initial regions of the table
+     * @return array of split keys for the initial regions of the table. The length of the
+     * returned array should be numRegions-1.
      */
-    byte[][] split(int numberOfSplits);
+    byte[][] split(int numRegions);
 
     /**
      * In HBase, the first row is represented by an empty byte array. This might
@@ -208,21 +211,29 @@ public class RegionSplitter {
    * <p>
    * <ul>
    * <li>create a table named 'myTable' with 60 pre-split regions containing 2
-   * column families 'test' & 'rs' bin/hbase
+   * column families 'test' & 'rs', assuming the keys are hex-encoded ASCII:
    * <ul>
-   * <li>org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs myTable
+   * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs
+   * myTable HexStringSplit
    * </ul>
    * <li>perform a rolling split of 'myTable' (i.e. 60 => 120 regions), # 2
-   * outstanding splits at a time bin/hbase
+   * outstanding splits at a time, assuming keys are uniformly distributed
+   * bytes:
    * <ul>
-   * <li>org.apache.hadoop.hbase.util.RegionSplitter -r -o 2 myTable
+   * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -r -o 2 myTable
+   * UniformSplit
    * </ul>
    * </ul>
    *
+   * There are two SplitAlgorithms built into RegionSplitter, HexStringSplit
+   * and UniformSplit. These are different strategies for choosing region
+   * boundaries. See their source code for details.
+   *
    * @param args
-   *          Usage: RegionSplitter &lt;TABLE&gt; &lt;-c &lt;# regions&gt;
-f
-   *          &lt;family:family:...&gt; | -r [-o &lt;# outstanding
-   *          splits&gt;]&gt; [-D &lt;conf.param=value&gt;]
+   *          Usage: RegionSplitter &lt;TABLE&gt; &lt;SPLITALGORITHM&gt;
+   *          &lt;-c &lt;# regions&gt; -f &lt;family:family:...&gt; |
-r
+   *          [-o &lt;# outstanding splits&gt;]&gt;
+   *          [-D &lt;conf.param=value&gt;]
    * @throws IOException
    *           HBase IO problem
    * @throws InterruptedException
@@ -277,35 +288,35 @@ public class RegionSplitter {
     boolean rollingSplit = cmd.hasOption("r");
     boolean oneOperOnly = createTable ^ rollingSplit;
 
-    if (1 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption("h")) {
-      new HelpFormatter().printHelp("RegionSplitter <TABLE>", opt);
+    if (2 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption("h")) {
+      new HelpFormatter().printHelp("RegionSplitter <TABLE> <SPLITALGORITHM>\n"+
+		  "SPLITALGORITHM is a java class name of a class implementing " +
+		  "SplitAlgorithm, or one of the special strings HexStringSplit " +
+		  "or UniformSplit, which are built-in split algorithms. " +
+		  "HexStringSplit treats keys as hexadecimal ASCII, and " +
+		  "UniformSplit treats keys as arbitrary bytes.", opt);
       return;
     }
     String tableName = cmd.getArgs()[0];
+    String splitClass = cmd.getArgs()[1];
 
     if (createTable) {
       conf.set("split.count", cmd.getOptionValue("c"));
-      createPresplitTable(tableName, cmd.getOptionValue("f").split(":"), conf);
+      createPresplitTable(tableName, splitClass, cmd.getOptionValue("f").split(":"), conf);
     }
 
     if (rollingSplit) {
       if (cmd.hasOption("o")) {
         conf.set("split.outstanding", cmd.getOptionValue("o"));
       }
-      rollingSplit(tableName, conf);
+      rollingSplit(tableName, splitClass, conf);
     }
   }
 
-  static void createPresplitTable(String tableName, String[] columnFamilies,
-      Configuration conf) throws IOException, InterruptedException {
-    Class<? extends SplitAlgorithm> splitClass = conf.getClass(
-        "split.algorithm", MD5StringSplit.class, SplitAlgorithm.class);
-    SplitAlgorithm splitAlgo;
-    try {
-      splitAlgo = splitClass.newInstance();
-    } catch (Exception e) {
-      throw new IOException("Problem loading split algorithm: ", e);
-    }
+  static void createPresplitTable(String tableName, String splitClassName,
+          String[] columnFamilies, Configuration conf) throws IOException,
+          InterruptedException {
+    SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClassName);
     final int splitCount = conf.getInt("split.count", 0);
     Preconditions.checkArgument(splitCount > 1, "Split count must be > 1");
 
@@ -340,16 +351,9 @@ public class RegionSplitter {
     LOG.debug("Finished creating table with " + splitCount + " regions");
   }
 
-  static void rollingSplit(String tableName, Configuration conf)
-      throws IOException, InterruptedException {
-    Class<? extends SplitAlgorithm> splitClass = conf.getClass(
-        "split.algorithm", MD5StringSplit.class, SplitAlgorithm.class);
-    SplitAlgorithm splitAlgo;
-    try {
-      splitAlgo = splitClass.newInstance();
-    } catch (Exception e) {
-      throw new IOException("Problem loading split algorithm: ", e);
-    }
+  static void rollingSplit(String tableName, String splitClassName,
+          Configuration conf) throws IOException, InterruptedException {
+    SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClassName);
     final int minOS = conf.getInt("split.outstanding", 2);
 
     HTable table = new HTable(conf, tableName);
@@ -541,6 +545,41 @@ public class RegionSplitter {
     fs.delete(splitFile, false);
   }
 
+  /**
+   * @throws IOException if the specified SplitAlgorithm class couldn't be
+   * instantiated
+   */
+  static SplitAlgorithm newSplitAlgoInstance(Configuration conf,
+          String splitClassName) throws IOException {
+    Class<?> splitClass;
+
+    // For split algorithms builtin to RegionSplitter, the user can specify
+    // their simple class name instead of a fully qualified class name.
+    if(splitClassName.equals(HexStringSplit.class.getSimpleName())) {
+      splitClass = HexStringSplit.class;
+    } else if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
+      splitClass = UniformSplit.class;
+    } else {
+      try {
+        splitClass = conf.getClassByName(splitClassName);
+      } catch (ClassNotFoundException e) {
+        throw new IOException("Couldn't load split class " + splitClassName, e);
+      }
+      if(splitClass == null) {
+        throw new IOException("Failed loading split class " + splitClassName);
+      }
+      if(!SplitAlgorithm.class.isAssignableFrom(splitClass)) {
+        throw new IOException(
+                "Specified split class doesn't implement SplitAlgorithm");
+      }
+    }
+    try {
+      return splitClass.asSubclass(SplitAlgorithm.class).newInstance();
+    } catch (Exception e) {
+      throw new IOException("Problem loading split algorithm: ", e);
+    }
+  }
+
   static LinkedList<Pair<byte[], byte[]>> splitScan(
       LinkedList<Pair<byte[], byte[]>> regionList, HTable table,
       SplitAlgorithm splitAlgo)
@@ -714,16 +753,20 @@ public class RegionSplitter {
   }
 
   /**
-   * MD5StringSplit is the default {@link SplitAlgorithm} for creating pre-split
-   * tables. The format of MD5StringSplit is the ASCII representation of an MD5
-   * checksum. Row are long values in the range <b>"00000000" => "7FFFFFFF"</b>
-   * and are left-padded with zeros to keep the same order lexographically as if
-   * they were binary.
+   * HexStringSplit is one possible {@link SplitAlgorithm} for choosing region
+   * boundaries. The format of a HexStringSplit region boundary is the
+   * ASCII representation of an MD5 checksum, or any other uniformly distributed
+   * bytes. Row are hex-encoded long values in the range <b>"00000000" =>
+   * "FFFFFFFF"</b> and are left-padded with zeros to keep the same order
+   * lexicographically as if they were binary.
+   *
+   * This split algorithm is only appropriate if you will use hex strings as
+   * keys.
    */
-  public static class MD5StringSplit implements SplitAlgorithm {
-    final static String MAXMD5 = "7FFFFFFF";
-    final static BigInteger MAXMD5_INT = new BigInteger(MAXMD5, 16);
-    final static int rowComparisonLength = MAXMD5.length();
+  public static class HexStringSplit implements SplitAlgorithm {
+    final static String MAXHEX = "FFFFFFFF";
+    final static BigInteger MAXHEX_INT = new BigInteger(MAXHEX, 16);
+    final static int rowComparisonLength = MAXHEX.length();
 
     public byte[] split(byte[] start, byte[] end) {
       BigInteger s = convertToBigInteger(start);
@@ -734,10 +777,10 @@ public class RegionSplitter {
 
     public byte[][] split(int n) {
       BigInteger[] splits = new BigInteger[n - 1];
-      BigInteger sizeOfEachSplit = MAXMD5_INT.divide(BigInteger.valueOf(n));
+      BigInteger sizeOfEachSplit = MAXHEX_INT.divide(BigInteger.valueOf(n));
       for (int i = 1; i < n; i++) {
         // NOTE: this means the last region gets all the slop.
-        // This is not a big deal if we're assuming n << MAXMD5
+        // This is not a big deal if we're assuming n << MAXHEX
         splits[i - 1] = sizeOfEachSplit.multiply(BigInteger.valueOf(i));
       }
       return convertToBytes(splits);
@@ -748,7 +791,7 @@ public class RegionSplitter {
     }
 
     public byte[] lastRow() {
-      return convertToByte(MAXMD5_INT);
+      return convertToByte(MAXHEX_INT);
     }
 
     public byte[] strToRow(String in) {
@@ -806,4 +849,54 @@ public class RegionSplitter {
     }
   }
 
+  /**
+   * A SplitAlgorithm that divides the space of possible keys evenly. Useful
+   * when the keys are approximately uniform random bytes (e.g. hashes).
+   * You probably shouldn't use this if your keys are ASCII, or if your keys
+   * tend to have similar prefixes.
+   */
+  public static class UniformSplit implements SplitAlgorithm {
+	static final byte xFF = (byte)0xFF;
+    static final byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY;
+    static final byte[] lastRowBytes =
+            new byte[] {xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF};
+    public byte[] split(byte[] start, byte[] end) {
+      return Bytes.split(start, end, 1)[1];
+    }
+
+    @Override
+    public byte[][] split(int numRegions) {
+      byte[][] splitKeysPlusEndpoints = Bytes.split(firstRowBytes, lastRowBytes,
+              numRegions-1);
+      byte[][] splitAtKeys = new byte[splitKeysPlusEndpoints.length-2][];
+      System.arraycopy(splitKeysPlusEndpoints, 1, splitAtKeys, 0,
+              splitKeysPlusEndpoints.length-2);
+      return splitAtKeys;
+    }
+
+    @Override
+    public byte[] firstRow() {
+      return firstRowBytes;
+    }
+
+    @Override
+    public byte[] lastRow() {
+      return lastRowBytes;
+    }
+
+    @Override
+    public byte[] strToRow(String input) {
+      return Bytes.toBytesBinary(input);
+    }
+
+    @Override
+    public String rowToStr(byte[] row) {
+      return Bytes.toStringBinary(row);
+    }
+
+    @Override
+    public String separator() {
+      return ",";
+    }
+  }
 }

Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java?rev=1186575&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java Thu Oct
20 00:09:57 2011
@@ -0,0 +1,294 @@
+/**
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HServerAddress;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.util.RegionSplitter.HexStringSplit;
+import org.apache.hadoop.hbase.util.RegionSplitter.UniformSplit;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Tests for {@link RegionSplitter}, which can create a pre-split table or do a
+ * rolling split of an existing table.
+ */
+public class TestRegionSplitter {
+    private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
+    private final static String HEX_SPLIT_CLASS_NAME =
+            "org.apache.hadoop.hbase.util.RegionSplitter$HexStringSplit";
+    private final static String UNIFORM_SPLIT_CLASS_NAME =
+            "org.apache.hadoop.hbase.util.RegionSplitter$UniformSplit";
+    private final static String CF_NAME = "SPLIT_TEST_CF";
+    private final static byte xFF = (byte)0xff;
+
+    @BeforeClass
+    public static void setup() throws Exception {
+        UTIL.startMiniCluster();
+    }
+
+    @AfterClass
+    public static void teardown() throws IOException {
+        UTIL.shutdownMiniCluster();
+    }
+
+    /**
+     * Test creating a pre-split table using the HexStringSplit algorithm.
+     */
+    @Test
+    public void testCreatePresplitTableHex() throws Exception {
+        final List<byte[]> expectedBounds = new ArrayList<byte[]>();
+        expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
+        expectedBounds.add("0fffffff".getBytes());
+        expectedBounds.add("1ffffffe".getBytes());
+        expectedBounds.add("2ffffffd".getBytes());
+        expectedBounds.add("3ffffffc".getBytes());
+        expectedBounds.add("4ffffffb".getBytes());
+        expectedBounds.add("5ffffffa".getBytes());
+        expectedBounds.add("6ffffff9".getBytes());
+        expectedBounds.add("7ffffff8".getBytes());
+        expectedBounds.add("8ffffff7".getBytes());
+        expectedBounds.add("9ffffff6".getBytes());
+        expectedBounds.add("affffff5".getBytes());
+        expectedBounds.add("bffffff4".getBytes());
+        expectedBounds.add("cffffff3".getBytes());
+        expectedBounds.add("dffffff2".getBytes());
+        expectedBounds.add("effffff1".getBytes());
+        expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
+
+        // Do table creation/pre-splitting and verification of region boundaries
+        preSplitTableAndVerify(expectedBounds, HEX_SPLIT_CLASS_NAME,
+                "NewHexPresplitTable");
+    }
+
+    /**
+     * Test creating a pre-split table using the UniformSplit algorithm.
+     */
+    @Test
+    public void testCreatePresplitTableUniform() throws Exception {
+        List<byte[]> expectedBounds = new ArrayList<byte[]>();
+        expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
+        expectedBounds.add(new byte[] {      0x0f, xFF, xFF, xFF, xFF, xFF, xFF,        xFF});
+        expectedBounds.add(new byte[] {      0x1f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfe});
+        expectedBounds.add(new byte[] {      0x2f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfd});
+        expectedBounds.add(new byte[] {      0x3f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfc});
+        expectedBounds.add(new byte[] {      0x4f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfb});
+        expectedBounds.add(new byte[] {      0x5f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfa});
+        expectedBounds.add(new byte[] {      0x6f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf9});
+        expectedBounds.add(new byte[] {      0x7f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf8});
+        expectedBounds.add(new byte[] {(byte)0x8f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf7});
+        expectedBounds.add(new byte[] {(byte)0x9f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf6});
+        expectedBounds.add(new byte[] {(byte)0xaf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf5});
+        expectedBounds.add(new byte[] {(byte)0xbf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf4});
+        expectedBounds.add(new byte[] {(byte)0xcf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf3});
+        expectedBounds.add(new byte[] {(byte)0xdf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf2});
+        expectedBounds.add(new byte[] {(byte)0xef, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf1});
+        expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
+
+        // Do table creation/pre-splitting and verification of region boundaries
+        preSplitTableAndVerify(expectedBounds,
+                "org.apache.hadoop.hbase.util.RegionSplitter$UniformSplit",
+                "NewUniformPresplitTable");
+    }
+
+    /**
+     * Unit tests for the HexStringSplit algorithm. Makes sure it divides up the
+     * space of keys in the way that we expect.
+     */
+    @Test
+    public void unitTestHexStringSplit() {
+        HexStringSplit splitter = new HexStringSplit();
+        // Check splitting while starting from scratch
+
+        byte[][] twoRegionsSplits = splitter.split(2);
+        assertEquals(1, twoRegionsSplits.length);
+        assertArrayEquals(twoRegionsSplits[0], "7fffffff".getBytes());
+
+        byte[][] threeRegionsSplits = splitter.split(3);
+        assertEquals(2, threeRegionsSplits.length);
+        byte[] expectedSplit0 = "55555555".getBytes();
+        assertArrayEquals(expectedSplit0, threeRegionsSplits[0]);
+        byte[] expectedSplit1 = "aaaaaaaa".getBytes();
+        assertArrayEquals(expectedSplit1, threeRegionsSplits[1]);
+
+        // Check splitting existing regions that have start and end points
+        byte[] splitPoint = splitter.split("10000000".getBytes(), "30000000".getBytes());
+        assertArrayEquals("20000000".getBytes(), splitPoint);
+
+        byte[] lastRow = "ffffffff".getBytes();
+        assertArrayEquals(lastRow, splitter.lastRow());
+        byte[] firstRow = "00000000".getBytes();
+        assertArrayEquals(firstRow, splitter.firstRow());
+
+        // Halfway between 00... and 20... should be 10...
+        splitPoint = splitter.split(firstRow, "20000000".getBytes());
+        assertArrayEquals(splitPoint, "10000000".getBytes());
+
+        // Halfway between 5f... and 7f... should be 6f....
+        splitPoint = splitter.split("dfffffff".getBytes(), lastRow);
+        assertArrayEquals(splitPoint,"efffffff".getBytes());
+    }
+
+    /**
+     * Unit tests for the UniformSplit algorithm. Makes sure it divides up the space of
+     * keys in the way that we expect.
+     */
+    @Test
+    public void unitTestUniformSplit() {
+        UniformSplit splitter = new UniformSplit();
+
+        // Check splitting while starting from scratch
+        try {
+            splitter.split(1);
+            throw new AssertionError("Splitting into <2 regions should have thrown exception");
+        } catch (IllegalArgumentException e) { }
+
+        byte[][] twoRegionsSplits = splitter.split(2);
+        assertEquals(1, twoRegionsSplits.length);
+        assertArrayEquals(twoRegionsSplits[0],
+                new byte[] {0x7f, xFF, xFF, xFF, xFF, xFF, xFF, xFF});
+
+        byte[][] threeRegionsSplits = splitter.split(3);
+        assertEquals(2, threeRegionsSplits.length);
+        byte[] expectedSplit0 = new byte[] {0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55};
+        assertArrayEquals(expectedSplit0, threeRegionsSplits[0]);
+        byte[] expectedSplit1 = new byte[] {(byte)0xAA, (byte)0xAA, (byte)0xAA, (byte)0xAA,
+                (byte)0xAA, (byte)0xAA, (byte)0xAA, (byte)0xAA};
+        assertArrayEquals(expectedSplit1, threeRegionsSplits[1]);
+
+        // Check splitting existing regions that have start and end points
+        byte[] splitPoint = splitter.split(new byte[] {0x10}, new byte[] {0x30});
+        assertArrayEquals(new byte[] {0x20}, splitPoint);
+
+        byte[] lastRow = new byte[] {xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF};
+        assertArrayEquals(lastRow, splitter.lastRow());
+        byte[] firstRow = ArrayUtils.EMPTY_BYTE_ARRAY;
+        assertArrayEquals(firstRow, splitter.firstRow());
+
+        splitPoint = splitter.split(firstRow, new byte[] {0x20});
+        assertArrayEquals(splitPoint, new byte[] {0x10});
+
+        splitPoint = splitter.split(new byte[] {(byte)0xdf, xFF, xFF, xFF, xFF,
+                xFF, xFF, xFF}, lastRow);
+        assertArrayEquals(splitPoint,
+                new byte[] {(byte)0xef, xFF, xFF, xFF, xFF, xFF, xFF, xFF});
+    }
+
+    /**
+     * Creates a pre-split table with expectedBounds.size()+1 regions, then
+     * verifies that the region boundaries are the same as the expected
+     * region boundaries in expectedBounds.
+     * @throws Various junit assertions
+     */
+    private void preSplitTableAndVerify(List<byte[]> expectedBounds,
+            String splitAlgo, String tableName) throws Exception {
+        final int numRegions = expectedBounds.size()-1;
+        final Configuration conf = UTIL.getConfiguration();
+        conf.setInt("split.count", numRegions);
+        RegionSplitter.createPresplitTable(tableName, splitAlgo,
+                new String[] {CF_NAME}, conf);
+        verifyBounds(expectedBounds, tableName);
+    }
+
+    private void rollingSplitAndVerify(String tableName, String splitAlgo,
+            List<byte[]> expectedBounds)  throws Exception {
+        final Configuration conf = UTIL.getConfiguration();
+
+        // Set this larger than the number of splits so RegionSplitter won't block
+        conf.setInt("split.outstanding", 5);
+        RegionSplitter.rollingSplit(tableName, splitAlgo, conf);
+        verifyBounds(expectedBounds, tableName);
+    }
+
+    private void verifyBounds(List<byte[]> expectedBounds, String tableName)
+            throws Exception {
+        // Get region boundaries from the cluster and verify their endpoints
+        final Configuration conf = UTIL.getConfiguration();
+        final int numRegions = expectedBounds.size()-1;
+        final HTable hTable = new HTable(conf, tableName.getBytes());
+        final Map<HRegionInfo, HServerAddress> regionInfoMap =
+                hTable.getRegionsInfo();
+        assertEquals(numRegions, regionInfoMap.size());
+        for (Map.Entry<HRegionInfo, HServerAddress> entry:
+           regionInfoMap.entrySet()) {
+            final HRegionInfo regionInfo = entry.getKey();
+            byte[] regionStart = regionInfo.getStartKey();
+            byte[] regionEnd = regionInfo.getEndKey();
+
+            // This region's start key should be one of the region boundaries
+            int startBoundaryIndex = indexOfBytes(expectedBounds, regionStart);
+            assertNotSame(-1, startBoundaryIndex);
+
+            // This region's end key should be the region boundary that comes
+            // after the starting boundary.
+            byte[] expectedRegionEnd = expectedBounds.get(
+                    startBoundaryIndex+1);
+            assertEquals(0, Bytes.compareTo(regionEnd, expectedRegionEnd));
+        }
+    }
+
+    /**
+     * List.indexOf() doesn't really work for a List<byte[]>, because byte[]
+     * doesn't override equals(). This method checks whether a list contains
+     * a given element by checking each element using the byte array
+     * comparator.
+     * @return the index of the first element that equals compareTo, or -1
+     * if no elements are equal.
+     */
+    static private int indexOfBytes(List<byte[]> list,  byte[] compareTo) {
+        int listIndex = 0;
+        for(byte[] elem: list) {
+            if(Bytes.BYTES_COMPARATOR.compare(elem, compareTo) == 0) {
+                return listIndex;
+            }
+            listIndex++;
+        }
+        return -1;
+    }
+
+    /**
+     * Inserts some meaningless data into a CF so the regions can be split.
+     */
+    static void insertSomeData(String table) throws IOException {
+        HTable hTable = new HTable(table);
+        for(byte b=Byte.MIN_VALUE; b<Byte.MAX_VALUE; b++) {
+            byte[] whateverBytes = new byte[] {b};
+            Put p = new Put(whateverBytes);
+            p.add(CF_NAME.getBytes(), whateverBytes, whateverBytes);
+            hTable.put(p);
+        }
+    }
+}



Mime
View raw message