Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 91055D791 for ; Tue, 26 Jun 2012 23:44:22 +0000 (UTC) Received: (qmail 80270 invoked by uid 500); 26 Jun 2012 23:44:22 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 80233 invoked by uid 500); 26 Jun 2012 23:44:22 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 80226 invoked by uid 99); 26 Jun 2012 23:44:22 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 26 Jun 2012 23:44:22 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 26 Jun 2012 23:44:20 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id C8C032388A02 for ; Tue, 26 Jun 2012 23:44:00 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1354293 - in /hbase/branches/0.94/src: main/java/org/apache/hadoop/hbase/KeyValue.java test/java/org/apache/hadoop/hbase/TestKeyValue.java Date: Tue, 26 Jun 2012 23:44:00 -0000 To: commits@hbase.apache.org From: tedyu@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120626234400.C8C032388A02@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: tedyu Date: Tue Jun 26 23:43:59 2012 New Revision: 1354293 URL: http://svn.apache.org/viewvc?rev=1354293&view=rev Log: HBASE-6200 KeyComparator.compareWithoutRow can be wrong when families have the same prefix (Jieshan) Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/KeyValue.java hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/TestKeyValue.java Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/KeyValue.java URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/KeyValue.java?rev=1354293&r1=1354292&r2=1354293&view=diff ============================================================================== --- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/KeyValue.java (original) +++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/KeyValue.java Tue Jun 26 23:43:59 2012 @@ -2102,33 +2102,29 @@ public class KeyValue implements Writabl } /** - * Compare column, timestamp, and key type (everything except the row). - * This method is used both in the normal comparator and the "same-prefix" - * comparator. Note that we are assuming that row portions of both KVs have - * already been parsed and found identical, and we don't validate that - * assumption here. - * @param commonPrefix the length of the common prefix of the two - * key-values being compared, including row length and row + * Compare columnFamily, qualifier, timestamp, and key type (everything + * except the row). This method is used both in the normal comparator and + * the "same-prefix" comparator. Note that we are assuming that row portions + * of both KVs have already been parsed and found identical, and we don't + * validate that assumption here. + * @param commonPrefix + * the length of the common prefix of the two key-values being + * compared, including row length and row */ private int compareWithoutRow(int commonPrefix, byte[] left, int loffset, int llength, byte[] right, int roffset, int rlength, short rowlength) { - // Compare column family. Start comparing past row and family length. - int lcolumnoffset = ROW_LENGTH_SIZE + FAMILY_LENGTH_SIZE + - rowlength + loffset; - int rcolumnoffset = ROW_LENGTH_SIZE + FAMILY_LENGTH_SIZE + - rowlength + roffset; - int lcolumnlength = llength - TIMESTAMP_TYPE_SIZE - - (lcolumnoffset - loffset); - int rcolumnlength = rlength - TIMESTAMP_TYPE_SIZE - - (rcolumnoffset - roffset); - - // If row matches, and no column in the 'left' AND put type is 'minimum', - // then return that left is larger than right. - - // This supports 'last key on a row' - the magic is if there is no column - // in the left operand, and the left operand has a type of '0' - magical - // value, then we say the left is bigger. This will let us seek to the - // last key in a row. + /*** + * KeyValue Format and commonLength: + * |_keyLen_|_valLen_|_rowLen_|_rowKey_|_famiLen_|_fami_|_Quali_|.... + * ------------------|-------commonLength--------|-------------- + */ + int commonLength = ROW_LENGTH_SIZE + FAMILY_LENGTH_SIZE + rowlength; + + // commonLength + TIMESTAMP_TYPE_SIZE + int commonLengthWithTSAndType = TIMESTAMP_TYPE_SIZE + commonLength; + // ColumnFamily + Qualifier length. + int lcolumnlength = llength - commonLengthWithTSAndType; + int rcolumnlength = rlength - commonLengthWithTSAndType; byte ltype = left[loffset + (llength - 1)]; byte rtype = right[roffset + (rlength - 1)]; @@ -2136,7 +2132,7 @@ public class KeyValue implements Writabl // If the column is not specified, the "minimum" key type appears the // latest in the sorted order, regardless of the timestamp. This is used // for specifying the last key/value in a given row, because there is no - // "lexicographically last column" (it would be infinitely long). The + // "lexicographically last column" (it would be infinitely long). The // "maximum" key type does not need this behavior. if (lcolumnlength == 0 && ltype == Type.Minimum.getCode()) { // left is "bigger", i.e. it appears later in the sorted order @@ -2146,20 +2142,38 @@ public class KeyValue implements Writabl return -1; } + int lfamilyoffset = commonLength + loffset; + int rfamilyoffset = commonLength + roffset; + + // Column family length. + int lfamilylength = left[lfamilyoffset - 1]; + int rfamilylength = right[rfamilyoffset - 1]; + // If left family size is not equal to right family size, we need not + // compare the qualifiers. + boolean sameFamilySize = (lfamilylength == rfamilylength); int common = 0; if (commonPrefix > 0) { - common = Math.max(0, commonPrefix - - rowlength - ROW_LENGTH_SIZE - FAMILY_LENGTH_SIZE); - common = Math.min(common, Math.min(lcolumnlength, rcolumnlength)); + common = Math.max(0, commonPrefix - commonLength); + if (!sameFamilySize) { + // Common should not be larger than Math.min(lfamilylength, + // rfamilylength). + common = Math.min(common, Math.min(lfamilylength, rfamilylength)); + } else { + common = Math.min(common, Math.min(lcolumnlength, rcolumnlength)); + } } - - final int comparisonResult = Bytes.compareTo( - left, lcolumnoffset + common, lcolumnlength - common, - right, rcolumnoffset + common, rcolumnlength - common); - if (comparisonResult != 0) { - return comparisonResult; + if (!sameFamilySize) { + // comparing column family is enough. + return Bytes.compareTo(left, lfamilyoffset + common, lfamilylength + - common, right, rfamilyoffset + common, rfamilylength - common); + } + // Compare family & qualifier together. + final int comparison = Bytes.compareTo(left, lfamilyoffset + common, + lcolumnlength - common, right, rfamilyoffset + common, + rcolumnlength - common); + if (comparison != 0) { + return comparison; } - return compareTimestampAndType(left, loffset, llength, right, roffset, rlength, ltype, rtype); } Modified: hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/TestKeyValue.java URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/TestKeyValue.java?rev=1354293&r1=1354292&r2=1354293&view=diff ============================================================================== --- hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/TestKeyValue.java (original) +++ hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/TestKeyValue.java Tue Jun 26 23:43:59 2012 @@ -341,6 +341,65 @@ public class TestKeyValue extends TestCa assertTrue(cmp > 0); } + private void assertKVLessWithoutRow(KeyValue.KeyComparator c, int common, KeyValue less, + KeyValue greater) { + int cmp = c.compareIgnoringPrefix(common, less.getBuffer(), less.getOffset() + + KeyValue.ROW_OFFSET, less.getKeyLength(), greater.getBuffer(), + greater.getOffset() + KeyValue.ROW_OFFSET, greater.getKeyLength()); + assertTrue(cmp < 0); + cmp = c.compareIgnoringPrefix(common, greater.getBuffer(), greater.getOffset() + + KeyValue.ROW_OFFSET, greater.getKeyLength(), less.getBuffer(), + less.getOffset() + KeyValue.ROW_OFFSET, less.getKeyLength()); + assertTrue(cmp > 0); + } + + public void testCompareWithoutRow() { + final KeyValue.KeyComparator c = KeyValue.KEY_COMPARATOR; + byte[] row = Bytes.toBytes("row"); + + byte[] fa = Bytes.toBytes("fa"); + byte[] fami = Bytes.toBytes("fami"); + byte[] fami1 = Bytes.toBytes("fami1"); + + byte[] qual0 = Bytes.toBytes(""); + byte[] qual1 = Bytes.toBytes("qf1"); + byte[] qual2 = Bytes.toBytes("qf2"); + long ts = 1; + + // 'fa:' + KeyValue kv_0 = new KeyValue(row, fa, qual0, ts, Type.Put); + // 'fami:' + KeyValue kv0_0 = new KeyValue(row, fami, qual0, ts, Type.Put); + // 'fami:qf1' + KeyValue kv0_1 = new KeyValue(row, fami, qual1, ts, Type.Put); + // 'fami:qf2' + KeyValue kv0_2 = new KeyValue(row, fami, qual2, ts, Type.Put); + // 'fami1:' + KeyValue kv1_0 = new KeyValue(row, fami1, qual0, ts, Type.Put); + + // 'fami:qf1' < 'fami:qf2' + assertKVLessWithoutRow(c, 0, kv0_1, kv0_2); + // 'fami:qf1' < 'fami1:' + assertKVLessWithoutRow(c, 0, kv0_1, kv1_0); + + // Test comparison by skipping the same prefix bytes. + /*** + * KeyValue Format and commonLength: + * |_keyLen_|_valLen_|_rowLen_|_rowKey_|_famiLen_|_fami_|_Quali_|.... + * ------------------|-------commonLength--------|-------------- + */ + int commonLength = KeyValue.ROW_LENGTH_SIZE + KeyValue.FAMILY_LENGTH_SIZE + + row.length; + // 'fa:' < 'fami:'. They have commonPrefix + 2 same prefix bytes. + assertKVLessWithoutRow(c, commonLength + 2, kv_0, kv0_0); + // 'fami:' < 'fami:qf1'. They have commonPrefix + 4 same prefix bytes. + assertKVLessWithoutRow(c, commonLength + 4, kv0_0, kv0_1); + // 'fami:qf1' < 'fami1:'. They have commonPrefix + 4 same prefix bytes. + assertKVLessWithoutRow(c, commonLength + 4, kv0_1, kv1_0); + // 'fami:qf1' < 'fami:qf2'. They have commonPrefix + 6 same prefix bytes. + assertKVLessWithoutRow(c, commonLength + 6, kv0_1, kv0_2); + } + public void testFirstLastOnRow() { final KVComparator c = KeyValue.COMPARATOR; long ts = 1;