hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ecl...@apache.org
Subject [01/50] [abbrv] hbase git commit: HBASE-15287 mapreduce.RowCounter returns incorrect result with binary row key inputs (Matt Warhaftig) [Forced Update!]
Date Mon, 25 Apr 2016 21:12:45 GMT
Repository: hbase
Updated Branches:
  refs/heads/HBASE-14850 4fa416223 -> 8afef174d (forced update)


HBASE-15287 mapreduce.RowCounter returns incorrect result with binary row key inputs (Matt
Warhaftig)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/e9211e41
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/e9211e41
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/e9211e41

Branch: refs/heads/HBASE-14850
Commit: e9211e415a208a3f16b1d6a5d9fc730824e0df92
Parents: d815211
Author: tedyu <yuzhihong@gmail.com>
Authored: Sat Apr 16 12:46:21 2016 -0700
Committer: tedyu <yuzhihong@gmail.com>
Committed: Sat Apr 16 12:46:21 2016 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/mapred/GroupingTableMap.java   |  2 +-
 .../hadoop/hbase/mapreduce/CellCounter.java     |  2 +-
 .../hadoop/hbase/mapreduce/CopyTable.java       |  4 +-
 .../apache/hadoop/hbase/mapreduce/Export.java   |  6 +--
 .../hbase/mapreduce/GroupingTableMapper.java    |  2 +-
 .../hadoop/hbase/mapreduce/RowCounter.java      |  4 +-
 .../mapreduce/SimpleTotalOrderPartitioner.java  |  2 +-
 .../hbase/mapreduce/TableInputFormat.java       |  4 +-
 .../hadoop/hbase/mapreduce/TestCellCounter.java | 42 +++++++++++++++++++-
 .../hadoop/hbase/mapreduce/TestCopyTable.java   | 18 ++++-----
 .../hbase/mapreduce/TestImportExport.java       | 16 +++++++-
 .../hadoop/hbase/mapreduce/TestRowCounter.java  |  5 ++-
 12 files changed, 79 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
index 6cd0602..ee6da75 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapred/GroupingTableMap.java
@@ -154,6 +154,6 @@ implements TableMap<ImmutableBytesWritable,Result> {
       }
       sb.append(Bytes.toString(vals[i]));
     }
-    return new ImmutableBytesWritable(Bytes.toBytes(sb.toString()));
+    return new ImmutableBytesWritable(Bytes.toBytesBinary(sb.toString()));
   }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
index 00f197c..aaa32bd 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java
@@ -241,7 +241,7 @@ public class CellCounter extends Configured implements Tool {
       String regexPattern = filterCriteria.substring(1, filterCriteria.length());
       rowFilter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(regexPattern));
     } else {
-      rowFilter = new PrefixFilter(Bytes.toBytes(filterCriteria));
+      rowFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria));
     }
     return rowFilter;
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
index dd8b891..c1e8a82 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
@@ -111,11 +111,11 @@ public class CopyTable extends Configured implements Tool {
     }
 
     if (startRow != null) {
-      scan.setStartRow(Bytes.toBytes(startRow));
+      scan.setStartRow(Bytes.toBytesBinary(startRow));
     }
 
     if (stopRow != null) {
-      scan.setStopRow(Bytes.toBytes(stopRow));
+      scan.setStopRow(Bytes.toBytesBinary(stopRow));
     }
 
     if(families != null) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
index 66c0057..56d229a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java
@@ -100,10 +100,10 @@ public class Export extends Configured implements Tool {
     s.setCacheBlocks(false);
     // set Start and Stop row
     if (conf.get(TableInputFormat.SCAN_ROW_START) != null) {
-      s.setStartRow(Bytes.toBytes(conf.get(TableInputFormat.SCAN_ROW_START)));
+      s.setStartRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_START)));
     }
     if (conf.get(TableInputFormat.SCAN_ROW_STOP) != null) {
-      s.setStopRow(Bytes.toBytes(conf.get(TableInputFormat.SCAN_ROW_STOP)));
+      s.setStopRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_STOP)));
     }
     // Set Scan Column Family
     boolean raw = Boolean.parseBoolean(conf.get(RAW_SCAN));
@@ -142,7 +142,7 @@ public class Export extends Configured implements Tool {
       String regexPattern = filterCriteria.substring(1, filterCriteria.length());
       exportFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regexPattern));
     } else {
-      exportFilter = new PrefixFilter(Bytes.toBytes(filterCriteria));
+      exportFilter = new PrefixFilter(Bytes.toBytesBinary(filterCriteria));
     }
     return exportFilter;
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
index e9c8927..8a9fa49 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/GroupingTableMapper.java
@@ -145,7 +145,7 @@ extends TableMapper<ImmutableBytesWritable,Result> implements Configurable
{
       }
       sb.append(Bytes.toString(vals[i]));
     }
-    return new ImmutableBytesWritable(Bytes.toBytes(sb.toString()));
+    return new ImmutableBytesWritable(Bytes.toBytesBinary(sb.toString()));
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
index 8522a61..720d4b1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
@@ -147,10 +147,10 @@ public class RowCounter extends Configured implements Tool {
     Scan scan = new Scan();
     scan.setCacheBlocks(false);
     if (startKey != null && !startKey.equals("")) {
-      scan.setStartRow(Bytes.toBytes(startKey));
+      scan.setStartRow(Bytes.toBytesBinary(startKey));
     }
     if (endKey != null && !endKey.equals("")) {
-      scan.setStopRow(Bytes.toBytes(endKey));
+      scan.setStopRow(Bytes.toBytesBinary(endKey));
     }
     if (sb.length() > 0) {
       for (String columnName : sb.toString().trim().split(" ")) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
index fe967ff..2257054 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/SimpleTotalOrderPartitioner.java
@@ -94,7 +94,7 @@ implements Configurable {
     }
     LOG.warn("Using deprecated configuration " + deprecatedKey +
         " - please use static accessor methods instead.");
-    return Bytes.toBytes(oldStyleVal);
+    return Bytes.toBytesBinary(oldStyleVal);
   }
   
   @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
index 814d82c..ebeb158 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableInputFormat.java
@@ -129,11 +129,11 @@ implements Configurable {
         scan = new Scan();
 
         if (conf.get(SCAN_ROW_START) != null) {
-          scan.setStartRow(Bytes.toBytes(conf.get(SCAN_ROW_START)));
+          scan.setStartRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_START)));
         }
 
         if (conf.get(SCAN_ROW_STOP) != null) {
-          scan.setStopRow(Bytes.toBytes(conf.get(SCAN_ROW_STOP)));
+          scan.setStopRow(Bytes.toBytesBinary(conf.get(SCAN_ROW_STOP)));
         }
 
         if (conf.get(SCAN_COLUMNS) != null) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
index bd2f82a..60e9b33 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCellCounter.java
@@ -47,8 +47,8 @@ import static org.junit.Assert.fail;
 @Category({MapReduceTests.class, LargeTests.class})
 public class TestCellCounter {
   private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
-  private static final byte[] ROW1 = Bytes.toBytes("row1");
-  private static final byte[] ROW2 = Bytes.toBytes("row2");
+  private static final byte[] ROW1 = Bytes.toBytesBinary("\\x01row1");
+  private static final byte[] ROW2 = Bytes.toBytesBinary("\\x01row2");
   private static final String FAMILY_A_STRING = "a";
   private static final String FAMILY_B_STRING = "b";
   private static final byte[] FAMILY_A = Bytes.toBytes(FAMILY_A_STRING);
@@ -112,6 +112,44 @@ public class TestCellCounter {
   }
 
   /**
+   * Test CellCounter all data should print to output
+   */
+  @Test(timeout = 300000)
+  public void testCellCounterPrefix() throws Exception {
+    TableName sourceTable = TableName.valueOf("testCellCounterPrefix");
+    byte[][] families = { FAMILY_A, FAMILY_B };
+    Table t = UTIL.createTable(sourceTable, families);
+    try {
+      Put p = new Put(ROW1);
+      p.addColumn(FAMILY_A, QUALIFIER, now, Bytes.toBytes("Data11"));
+      p.addColumn(FAMILY_B, QUALIFIER, now + 1, Bytes.toBytes("Data12"));
+      p.addColumn(FAMILY_A, QUALIFIER, now + 2, Bytes.toBytes("Data13"));
+      t.put(p);
+      p = new Put(ROW2);
+      p.addColumn(FAMILY_B, QUALIFIER, now, Bytes.toBytes("Dat21"));
+      p.addColumn(FAMILY_A, QUALIFIER, now + 1, Bytes.toBytes("Data22"));
+      p.addColumn(FAMILY_B, QUALIFIER, now + 2, Bytes.toBytes("Data23"));
+      t.put(p);
+      String[] args = { sourceTable.getNameAsString(), FQ_OUTPUT_DIR.toString(), ";", "\\x01row1"
};
+      runCount(args);
+      FileInputStream inputStream =
+          new FileInputStream(OUTPUT_DIR + File.separator + "part-r-00000");
+      String data = IOUtils.toString(inputStream);
+      inputStream.close();
+      assertTrue(data.contains("Total Families Across all Rows" + "\t" + "2"));
+      assertTrue(data.contains("Total Qualifiers across all Rows" + "\t" + "2"));
+      assertTrue(data.contains("Total ROWS" + "\t" + "1"));
+      assertTrue(data.contains("b;q" + "\t" + "1"));
+      assertTrue(data.contains("a;q" + "\t" + "1"));
+      assertTrue(data.contains("row1;a;q_Versions" + "\t" + "1"));
+      assertTrue(data.contains("row1;b;q_Versions" + "\t" + "1"));
+    } finally {
+      t.close();
+      FileUtil.fullyDelete(new File(OUTPUT_DIR));
+    }
+  }
+
+  /**
    * Test CellCounter with time range all data should print to output
    */
   @Test (timeout=300000)

http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
index 628ca08..a57b394 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestCopyTable.java
@@ -119,7 +119,7 @@ public class TestCopyTable {
   public void testCopyTable() throws Exception {
     doCopyTableTest(false);
   }
-  
+
   /**
    * Simple end-to-end test with bulkload.
    */
@@ -127,16 +127,16 @@ public class TestCopyTable {
   public void testCopyTableWithBulkload() throws Exception {
     doCopyTableTest(true);
   }
-  
+
   @Test
   public void testStartStopRow() throws Exception {
     final TableName TABLENAME1 = TableName.valueOf("testStartStopRow1");
     final TableName TABLENAME2 = TableName.valueOf("testStartStopRow2");
     final byte[] FAMILY = Bytes.toBytes("family");
     final byte[] COLUMN1 = Bytes.toBytes("c1");
-    final byte[] ROW0 = Bytes.toBytes("row0");
-    final byte[] ROW1 = Bytes.toBytes("row1");
-    final byte[] ROW2 = Bytes.toBytes("row2");
+    final byte[] ROW0 = Bytes.toBytesBinary("\\x01row0");
+    final byte[] ROW1 = Bytes.toBytesBinary("\\x01row1");
+    final byte[] ROW2 = Bytes.toBytesBinary("\\x01row2");
 
     Table t1 = TEST_UTIL.createTable(TABLENAME1, FAMILY);
     Table t2 = TEST_UTIL.createTable(TABLENAME2, FAMILY);
@@ -156,8 +156,8 @@ public class TestCopyTable {
     assertEquals(
       0,
       ToolRunner.run(new Configuration(TEST_UTIL.getConfiguration()),
-          copy, new String[] { "--new.name=" + TABLENAME2, "--startrow=row1",
-          "--stoprow=row2", TABLENAME1.getNameAsString() }));
+        copy, new String[] { "--new.name=" + TABLENAME2, "--startrow=\\x01row1",
+            "--stoprow=\\x01row2", TABLENAME1.getNameAsString() }));
 
     // verify the data was copied into table 2
     // row1 exist, row0, row2 do not exist
@@ -169,11 +169,11 @@ public class TestCopyTable {
     g = new Get(ROW0);
     r = t2.get(g);
     assertEquals(0, r.size());
-    
+
     g = new Get(ROW2);
     r = t2.get(g);
     assertEquals(0, r.size());
-    
+
     t1.close();
     t2.close();
     TEST_UTIL.deleteTable(TABLENAME1);

http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
index 094fe1c..50146fd 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java
@@ -93,8 +93,9 @@ import org.mockito.stubbing.Answer;
 public class TestImportExport {
   private static final Log LOG = LogFactory.getLog(TestImportExport.class);
   private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
-  private static final byte[] ROW1 = Bytes.toBytes("row1");
-  private static final byte[] ROW2 = Bytes.toBytes("row2");
+  private static final byte[] ROW1 = Bytes.toBytesBinary("\\x32row1");
+  private static final byte[] ROW2 = Bytes.toBytesBinary("\\x32row2");
+  private static final byte[] ROW3 = Bytes.toBytesBinary("\\x32row3");
   private static final String FAMILYA_STRING = "a";
   private static final String FAMILYB_STRING = "b";
   private static final byte[] FAMILYA = Bytes.toBytes(FAMILYA_STRING);
@@ -181,9 +182,17 @@ public class TestImportExport {
       p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
       p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
       t.put(p);
+      p = new Put(ROW3);
+      p.addColumn(FAMILYA, QUAL, now, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 1, QUAL);
+      p.addColumn(FAMILYA, QUAL, now + 2, QUAL);
+      t.put(p);
     }
 
       String[] args = new String[] {
+          // Only export row1 & row2.
+          "-D" + TableInputFormat.SCAN_ROW_START + "=\\x32row1",
+          "-D" + TableInputFormat.SCAN_ROW_STOP + "=\\x32row3",
           EXPORT_TABLE,
           FQ_OUTPUT_DIR,
           "1000", // max number of key versions per key to export
@@ -207,6 +216,9 @@ public class TestImportExport {
         g.setMaxVersions();
         r = t.get(g);
         assertEquals(3, r.size());
+        g = new Get(ROW3);
+        r = t.get(g);
+        assertEquals(0, r.size());
       }
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/e9211e41/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
index 6657d0f..4a719dc 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRowCounter.java
@@ -149,7 +149,7 @@ public class TestRowCounter {
   @Test
   public void testRowCounterColumnAndRowRange() throws Exception {
     String[] args = new String[] {
-            TABLE_NAME, "--range=rov,rox", COL_FAM + ":" + COL1
+            TABLE_NAME, "--range=\\x00rov,\\x00rox", COL_FAM + ":" + COL1
     };
     runRowCount(args, 8);
   }
@@ -245,7 +245,8 @@ public class TestRowCounter {
     // write few rows with two columns
     int i = 0;
     for (; i < TOTAL_ROWS - ROWS_WITH_ONE_COL; i++) {
-      byte[] row = Bytes.toBytes("row" + i);
+      // Use binary rows values to test for HBASE-15287.
+      byte[] row = Bytes.toBytesBinary("\\x00row" + i);
       Put put = new Put(row);
       put.addColumn(family, col1, value);
       put.addColumn(family, col2, value);


Mime
View raw message