Return-Path: Delivered-To: apmail-hadoop-hbase-commits-archive@minotaur.apache.org Received: (qmail 76398 invoked from network); 27 Apr 2009 18:05:56 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 27 Apr 2009 18:05:56 -0000 Received: (qmail 96806 invoked by uid 500); 27 Apr 2009 18:05:56 -0000 Delivered-To: apmail-hadoop-hbase-commits-archive@hadoop.apache.org Received: (qmail 96762 invoked by uid 500); 27 Apr 2009 18:05:56 -0000 Mailing-List: contact hbase-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hbase-dev@hadoop.apache.org Delivered-To: mailing list hbase-commits@hadoop.apache.org Received: (qmail 96753 invoked by uid 99); 27 Apr 2009 18:05:56 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 27 Apr 2009 18:05:56 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 27 Apr 2009 18:05:55 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id A5BA023888AF; Mon, 27 Apr 2009 18:05:35 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r769076 - in /hadoop/hbase/trunk: CHANGES.txt src/java/org/apache/hadoop/hbase/util/Bytes.java src/test/org/apache/hadoop/hbase/util/TestBytes.java Date: Mon, 27 Apr 2009 18:05:35 -0000 To: hbase-commits@hadoop.apache.org From: stack@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090427180535.A5BA023888AF@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: stack Date: Mon Apr 27 18:05:34 2009 New Revision: 769076 URL: http://svn.apache.org/viewvc?rev=769076&view=rev Log: HBASE-1183 New MR splitting algorithm and other new features need a way to split a key range in N chunks Modified: hadoop/hbase/trunk/CHANGES.txt hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java Modified: hadoop/hbase/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=769076&r1=769075&r2=769076&view=diff ============================================================================== --- hadoop/hbase/trunk/CHANGES.txt (original) +++ hadoop/hbase/trunk/CHANGES.txt Mon Apr 27 18:05:34 2009 @@ -167,6 +167,8 @@ (Evgeny Ryabitskiy via Stack) HBASE-1260 Bytes utility class changes: remove usage of ByteBuffer and provide additional ByteBuffer primitives (Jon Gray via Stack) + HBASE-1183 New MR splitting algorithm and other new features need a way to + split a key range in N chunks (Jon Gray via Stack) Release 0.19.0 - 01/21/2009 INCOMPATIBLE CHANGES Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java?rev=769076&r1=769075&r2=769076&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java Mon Apr 27 18:05:34 2009 @@ -25,6 +25,7 @@ import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.util.Comparator; +import java.math.BigInteger; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; @@ -749,8 +750,108 @@ return result; } + /** + * @param a + * @param length + * @return First length bytes from a + */ + public static byte [] head(final byte [] a, final int length) { + if(a.length < length) return null; + byte [] result = new byte[length]; + System.arraycopy(a, 0, result, 0, length); + return result; + } /** + * @param a + * @param length + * @return Last length bytes from a + */ + public static byte [] tail(final byte [] a, final int length) { + if(a.length < length) return null; + byte [] result = new byte[length]; + System.arraycopy(a, a.length - length, result, 0, length); + return result; + } + + /** + * @param a + * @param length + * @return Value in a plus length prepended 0 bytes + */ + public static byte [] padHead(final byte [] a, final int length) { + byte [] padding = new byte[length]; + for(int i=0;ia plus length appended 0 bytes + */ + public static byte [] padTail(final byte [] a, final int length) { + byte [] padding = new byte[length]; + for(int i=0;i 1) { + throw new IllegalArgumentException("b > a"); + } + if (num <= 0) throw new IllegalArgumentException("num cannot be < 0"); + byte [] prependHeader = {1, 0}; + BigInteger startBI = new BigInteger(add(prependHeader, aPadded)); + BigInteger stopBI = new BigInteger(add(prependHeader, bPadded)); + BigInteger diffBI = stopBI.subtract(startBI); + BigInteger splitsBI = BigInteger.valueOf(num + 1); + if(diffBI.compareTo(splitsBI) <= 0) return null; + BigInteger intervalBI = null; + try { + intervalBI = diffBI.divide(splitsBI); + } catch(Exception e) { + return null; + } + + byte [][] result = new byte[num+2][]; + result[0] = a; + + for (int i = 1; i <= num; i++) { + BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger.valueOf(i))); + byte [] padded = curBI.toByteArray(); + if (padded[1] == 0) + padded = tail(padded,padded.length-2); + else + padded = tail(padded,padded.length-1); + result[i] = padded; + } + result[num+1] = b; + return result; + } + + /** * @param t * @return Array of byte arrays made from passed array of Text */ Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java?rev=769076&r1=769075&r2=769076&view=diff ============================================================================== --- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java (original) +++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java Mon Apr 27 18:05:34 2009 @@ -24,6 +24,40 @@ import junit.framework.TestCase; public class TestBytes extends TestCase { + public void testSplit() throws Exception { + byte [] lowest = Bytes.toBytes("AAA"); + byte [] middle = Bytes.toBytes("CCC"); + byte [] highest = Bytes.toBytes("EEE"); + byte [][] parts = Bytes.split(lowest, highest, 1); + for (int i = 0; i < parts.length; i++) { + System.out.println(Bytes.toString(parts[i])); + } + assertEquals(3, parts.length); + assertTrue(Bytes.equals(parts[1], middle)); + // Now divide into three parts. Change highest so split is even. + highest = Bytes.toBytes("DDD"); + parts = Bytes.split(lowest, highest, 2); + for (int i = 0; i < parts.length; i++) { + System.out.println(Bytes.toString(parts[i])); + } + assertEquals(4, parts.length); + // Assert that 3rd part is 'CCC'. + assertTrue(Bytes.equals(parts[2], middle)); + } + + public void testSplit2() throws Exception { + // More split tests. + byte [] lowest = Bytes.toBytes("http://A"); + byte [] highest = Bytes.toBytes("http://z"); + byte [] middle = Bytes.toBytes("http://["); + byte [][] parts = Bytes.split(lowest, highest, 1); + for (int i = 0; i < parts.length; i++) { + System.out.println(Bytes.toString(parts[i])); + } + assertEquals(2, parts.length); + assertTrue(Bytes.equals(parts[1], middle)); + } + public void testToLong() throws Exception { long [] longs = {-1l, 123l, 122232323232l}; for (int i = 0; i < longs.length; i++) {