hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From te...@apache.org
Subject hbase git commit: HBASE-16011 TableSnapshotScanner and TableSnapshotInputFormat can produce duplicate rows if split table.
Date Fri, 26 May 2017 14:34:45 GMT
Repository: hbase
Updated Branches:
  refs/heads/branch-1.2 cb136d8d2 -> 13efd4118


HBASE-16011 TableSnapshotScanner and TableSnapshotInputFormat can produce duplicate rows if
split table.

Signed-off-by: tedyu <yuzhihong@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/13efd411
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/13efd411
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/13efd411

Branch: refs/heads/branch-1.2
Commit: 13efd41188fb722f8ff0bcb9af637c9a4f99c1d3
Parents: cb136d8
Author: huzheng <openinx@gmail.com>
Authored: Wed May 24 20:31:57 2017 +0800
Committer: tedyu <yuzhihong@gmail.com>
Committed: Fri May 26 07:34:36 2017 -0700

----------------------------------------------------------------------
 .../hbase/client/TableSnapshotScanner.java      |  3 +
 .../mapreduce/TableSnapshotInputFormatImpl.java |  3 +
 .../hbase/client/TestTableSnapshotScanner.java  | 50 ++++++++++++++
 .../mapreduce/TestTableSnapshotInputFormat.java | 70 ++++++++++++++++++++
 .../hbase/snapshot/SnapshotTestingUtils.java    | 13 +++-
 5 files changed, 138 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/13efd411/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
index 4601ae4..7f8d6a7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/TableSnapshotScanner.java
@@ -129,6 +129,9 @@ public class TableSnapshotScanner extends AbstractClientScanner {
     htd = meta.getTableDescriptor();
     regions = new ArrayList<HRegionInfo>(restoredRegions.size());
     for (HRegionInfo hri: restoredRegions) {
+      if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
+        continue;
+      }
       if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(),
           hri.getStartKey(), hri.getEndKey())) {
         regions.add(hri);

http://git-wip-us.apache.org/repos/asf/hbase/blob/13efd411/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
index 75c6fc5..c182556 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/TableSnapshotInputFormatImpl.java
@@ -313,6 +313,9 @@ public class TableSnapshotInputFormatImpl {
     List<InputSplit> splits = new ArrayList<InputSplit>();
     for (HRegionInfo hri : regionManifests) {
       // load region descriptor
+      if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
+        continue;
+      }
 
       if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
           hri.getEndKey())) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/13efd411/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
index 0e2b670..5b23a57 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
@@ -110,6 +110,56 @@ public class TestTableSnapshotScanner {
   }
 
   @Test
+  public void testNoDuplicateResultsWhenSplitting() throws Exception {
+    setupCluster();
+    TableName tableName = TableName.valueOf("testNoDuplicateResultsWhenSplitting");
+    String snapshotName = "testSnapshotBug";
+    try {
+      if (UTIL.getHBaseAdmin().tableExists(tableName)) {
+        UTIL.deleteTable(tableName);
+      }
+
+      UTIL.createTable(tableName, FAMILIES);
+      Admin admin = UTIL.getHBaseAdmin();
+
+      // put some stuff in the table
+      Table table = UTIL.getConnection().getTable(tableName);
+      UTIL.loadTable(table, FAMILIES);
+
+      // split to 2 regions
+      admin.split(tableName, Bytes.toBytes("eee"));
+      TestTableSnapshotInputFormat.blockUntilSplitFinished(UTIL, tableName, 2);
+
+      Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
+      FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());
+
+      SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName,
+        Arrays.asList(FAMILIES), null, snapshotName, rootDir, fs, true);
+
+      // load different values
+      byte[] value = Bytes.toBytes("after_snapshot_value");
+      UTIL.loadTable(table, FAMILIES, value);
+
+      // cause flush to create new files in the region
+      admin.flush(tableName);
+      table.close();
+
+      Path restoreDir = UTIL.getDataTestDirOnTestFS(snapshotName);
+      Scan scan = new Scan().setStartRow(bbb).setStopRow(yyy); // limit the scan
+
+      TableSnapshotScanner scanner =
+          new TableSnapshotScanner(UTIL.getConfiguration(), restoreDir, snapshotName, scan);
+
+      verifyScanner(scanner, bbb, yyy);
+      scanner.close();
+    } finally {
+      UTIL.getHBaseAdmin().deleteSnapshot(snapshotName);
+      UTIL.deleteTable(tableName);
+      tearDownCluster();
+    }
+  }
+
+  @Test
   public void testWithSingleRegion() throws Exception {
     testScanner(UTIL, "testWithSingleRegion", 1, false);
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/13efd411/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
index 1b80590..b88e25d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestTableSnapshotInputFormat.java
@@ -22,14 +22,20 @@ import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.CategoryBasedTimeout;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Result;
@@ -37,6 +43,7 @@ import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.TableSnapshotRegionSplit;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.Job;
@@ -212,6 +219,69 @@ public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBa
     }
   }
 
+  public static void blockUntilSplitFinished(HBaseTestingUtility util, TableName tableName,
+      int expectedRegionSize) throws Exception {
+    for (int i = 0; i < 100; i++) {
+      List<HRegionInfo> hRegionInfoList = util.getHBaseAdmin().getTableRegions(tableName);
+      if (hRegionInfoList.size() >= expectedRegionSize) {
+        break;
+      }
+      Thread.sleep(1000);
+    }
+  }
+
+  @Test
+  public void testNoDuplicateResultsWhenSplitting() throws Exception {
+    setupCluster();
+    TableName tableName = TableName.valueOf("testNoDuplicateResultsWhenSplitting");
+    String snapshotName = "testSnapshotBug";
+    try {
+      if (UTIL.getHBaseAdmin().tableExists(tableName)) {
+        UTIL.deleteTable(tableName);
+      }
+
+      UTIL.createTable(tableName, FAMILIES);
+      HBaseAdmin admin = UTIL.getHBaseAdmin();
+
+      // put some stuff in the table
+      Table table = UTIL.getConnection().getTable(tableName);
+      UTIL.loadTable(table, FAMILIES);
+
+      // split to 2 regions
+      admin.split(tableName, Bytes.toBytes("eee"));
+      blockUntilSplitFinished(UTIL, tableName, 2);
+
+      Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
+      FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());
+
+      SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName, Arrays.asList(FAMILIES),
+        null, snapshotName, rootDir, fs, true);
+
+      // load different values
+      byte[] value = Bytes.toBytes("after_snapshot_value");
+      UTIL.loadTable(table, FAMILIES, value);
+
+      // cause flush to create new files in the region
+      admin.flush(tableName);
+      table.close();
+
+      Job job = new Job(UTIL.getConfiguration());
+      Path tmpTableDir = UTIL.getRandomDir();
+      // limit the scan
+      Scan scan = new Scan().setStartRow(getStartRow()).setStopRow(getEndRow());
+
+      TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan,
+        TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class,
job, false,
+        tmpTableDir);
+
+      verifyWithMockedMapReduce(job, 2, 2, getStartRow(), getEndRow());
+    } finally {
+      UTIL.getHBaseAdmin().deleteSnapshot(snapshotName);
+      UTIL.deleteTable(tableName);
+      tearDownCluster();
+    }
+  }
+
   private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits,
       byte[] startRow, byte[] stopRow)
       throws IOException, InterruptedException {

http://git-wip-us.apache.org/repos/asf/hbase/blob/13efd411/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java
index 1e67718..e73ad12 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java
@@ -245,7 +245,18 @@ public final class SnapshotTestingUtils {
     List<HRegionInfo> regions = admin.getTableRegions(tableName);
     // remove the non-default regions
     RegionReplicaUtil.removeNonDefaultRegions(regions);
-    assertEquals(regions.size(), regionManifests.size());
+
+    // if create snapshot when table splitting, parent region will be included to the snapshot
+    // region manifest. we should exclude the parent regions.
+    int regionCountExclusiveSplitParent = 0;
+    for (SnapshotRegionManifest snapshotRegionManifest : regionManifests.values()) {
+      HRegionInfo hri = HRegionInfo.convert(snapshotRegionManifest.getRegionInfo());
+      if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
+        continue;
+      }
+      regionCountExclusiveSplitParent++;
+    }
+    assertEquals(regions.size(), regionCountExclusiveSplitParent);
 
     // Verify Regions (redundant check, see MasterSnapshotVerifier)
     for (HRegionInfo info : regions) {


Mime
View raw message