hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nspiegelb...@apache.org
Subject svn commit: r1189285 - in /hbase/trunk: CHANGES.txt src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java
Date Wed, 26 Oct 2011 16:11:11 GMT
Author: nspiegelberg
Date: Wed Oct 26 16:11:11 2011
New Revision: 1189285

URL: http://svn.apache.org/viewvc?rev=1189285&view=rev
Log:
HBASE-4645 Edits Log recovery losing data across column families

Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java

Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1189285&r1=1189284&r2=1189285&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Wed Oct 26 16:11:11 2011
@@ -403,6 +403,7 @@ Release 0.92.0 - Unreleased
    HBASE-4670  Fix javadoc warnings
    HBASE-4367  Deadlock in MemStore flusher due to JDK internally synchronizing
                on current thread
+   HBASE-4645  Edits Log recovery losing data across column families
 
   TESTS
    HBASE-4450  test for number of blocks read: to serve as baseline for expected

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=1189285&r1=1189284&r2=1189285&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java Wed Oct 26
16:11:11 2011
@@ -447,20 +447,39 @@ public class HRegion implements HeapSize
     status.setStatus("Cleaning up temporary data from old regions");
     cleanupTmpDir();
 
-    // Load in all the HStores.  Get maximum seqid.
+    // Load in all the HStores.
+    // Get minimum of the maxSeqId across all the store.
+    //
+    // Context: During replay we want to ensure that we do not lose any data. So, we
+    // have to be conservative in how we replay logs. For each store, we calculate
+    // the maxSeqId up to which the store was flushed. But, since different stores
+    // could have a different maxSeqId, we choose the
+    // minimum across all the stores.
+    // This could potentially result in duplication of data for stores that are ahead
+    // of others. ColumnTrackers in the ScanQueryMatchers do the de-duplication, so we
+    // do not have to worry.
+    // TODO: If there is a store that was never flushed in a long time, we could replay
+    // a lot of data. Currently, this is not a problem because we flush all the stores at
+    // the same time. If we move to per-cf flushing, we might want to revisit this and send
+    // in a vector of maxSeqIds instead of sending in a single number, which has to be the
+    // min across all the max.
+    long minSeqId = -1;
     long maxSeqId = -1;
     for (HColumnDescriptor c : this.htableDescriptor.getFamilies()) {
       status.setStatus("Instantiating store for column family " + c);
       Store store = instantiateHStore(this.tableDir, c);
       this.stores.put(c.getName(), store);
       long storeSeqId = store.getMaxSequenceId();
-      if (storeSeqId > maxSeqId) {
+      if (minSeqId == -1 || storeSeqId < minSeqId) {
+        minSeqId = storeSeqId;
+      }
+      if (maxSeqId == -1 || storeSeqId > maxSeqId) {
         maxSeqId = storeSeqId;
       }
     }
     // Recover any edits if available.
-    maxSeqId = replayRecoveredEditsIfAny(
-        this.regiondir, maxSeqId, reporter, status);
+    maxSeqId = Math.max(maxSeqId, replayRecoveredEditsIfAny(
+        this.regiondir, minSeqId, reporter, status));
 
     status.setStatus("Cleaning up detritus from prior splits");
     // Get rid of any splits or merges that were lost in-progress.  Clean out

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java?rev=1189285&r1=1189284&r2=1189285&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java
(original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java
Wed Oct 26 16:11:11 2011
@@ -276,24 +276,20 @@ public class TestWALReplay {
     Result result = region.get(g, null);
     assertEquals(countPerFamily * htd.getFamilies().size(),
       result.size());
-    // Now close the region, split the log, reopen the region and assert that
-    // replay of log has no effect, that our seqids are calculated correctly so
+    // Now close the region (without flush), split the log, reopen the region and assert
that
+    // replay of log has the correct effect, that our seqids are calculated correctly so
     // all edits in logs are seen as 'stale'/old.
-    region.close();
+    region.close(true);
     wal.close();
     runWALSplit(this.conf);
     HLog wal2 = createWAL(this.conf);
-    HRegion region2 = new HRegion(basedir, wal2, this.fs, this.conf, hri, htd, null) {
-      @Override
-      protected boolean restoreEdit(Store s, KeyValue kv) {
-        super.restoreEdit(s, kv);
-        throw new RuntimeException("Called when it should not have been!");
-      }
-    };
+    HRegion region2 = new HRegion(basedir, wal2, this.fs, this.conf, hri, htd, null);
     long seqid2 = region2.initialize();
     // HRegionServer usually does this. It knows the largest seqid across all regions.
     wal2.setSequenceNumber(seqid2);
     assertTrue(seqid + result.size() < seqid2);
+    final Result result1b = region2.get(g, null);
+    assertEquals(result.size(), result1b.size());
 
     // Next test.  Add more edits, then 'crash' this region by stealing its wal
     // out from under it and assert that replay of the log adds the edits back
@@ -344,6 +340,88 @@ public class TestWALReplay {
   }
 
   /**
+   * Test that we recover correctly when there is a failure in between the
+   * flushes. i.e. Some stores got flushed but others did not.
+   *
+   * Unfortunately, there is no easy hook to flush at a store level. The way
+   * we get around this is by flushing at the region level, and then deleting
+   * the recently flushed store file for one of the Stores. This would put us
+   * back in the situation where all but that store got flushed and the region
+   * died.
+   *
+   * We restart Region again, and verify that the edits were replayed.
+   *
+   * @throws IOException
+   * @throws IllegalAccessException
+   * @throws NoSuchFieldException
+   * @throws IllegalArgumentException
+   * @throws SecurityException
+   */
+  @Test
+  public void testReplayEditsAfterPartialFlush()
+  throws IOException, SecurityException, IllegalArgumentException,
+      NoSuchFieldException, IllegalAccessException, InterruptedException {
+    final String tableNameStr = "testReplayEditsWrittenViaHRegion";
+    final HRegionInfo hri = createBasic3FamilyHRegionInfo(tableNameStr);
+    final Path basedir = new Path(this.hbaseRootDir, tableNameStr);
+    deleteDir(basedir);
+    final byte[] rowName = Bytes.toBytes(tableNameStr);
+    final int countPerFamily = 10;
+    final HTableDescriptor htd = createBasic3FamilyHTD(tableNameStr);
+    HRegion region3 = HRegion.createHRegion(hri,
+            hbaseRootDir, this.conf, htd);
+
+    // Write countPerFamily edits into the three families.  Do a flush on one
+    // of the families during the load of edits so its seqid is not same as
+    // others to test we do right thing when different seqids.
+    HLog wal = createWAL(this.conf);
+    HRegion region = new HRegion(basedir, wal, this.fs, this.conf, hri, htd, null);
+    long seqid = region.initialize();
+    // HRegionServer usually does this. It knows the largest seqid across all regions.
+    wal.setSequenceNumber(seqid);
+    for (HColumnDescriptor hcd: htd.getFamilies()) {
+      addRegionEdits(rowName, hcd.getName(), countPerFamily, this.ee, region, "x");
+    }
+
+    // Now assert edits made it in.
+    final Get g = new Get(rowName);
+    Result result = region.get(g, null);
+    assertEquals(countPerFamily * htd.getFamilies().size(),
+      result.size());
+
+    // Let us flush the region
+    region.flushcache();
+    region.close(true);
+    wal.close();
+
+    // delete the store files in the second column family to simulate a failure
+    // in between the flushcache();
+    // we have 3 families. killing the middle one ensures that taking the maximum
+    // will make us fail.
+    int cf_count = 0;
+    for (HColumnDescriptor hcd: htd.getFamilies()) {
+      cf_count++;
+      if (cf_count == 2) {
+        this.fs.delete(new Path(region.getRegionDir(), Bytes.toString(hcd.getName()))
+            , true);
+      }
+    }
+
+
+    // Let us try to split and recover
+    runWALSplit(this.conf);
+    HLog wal2 = createWAL(this.conf);
+    HRegion region2 = new HRegion(basedir, wal2, this.fs, this.conf, hri, htd, null);
+    long seqid2 = region2.initialize();
+    // HRegionServer usually does this. It knows the largest seqid across all regions.
+    wal2.setSequenceNumber(seqid2);
+    assertTrue(seqid + result.size() < seqid2);
+
+    final Result result1b = region2.get(g, null);
+    assertEquals(result.size(), result1b.size());
+  }
+
+  /**
    * Create an HRegion with the result of a HLog split and test we only see the
    * good edits
    * @throws Exception



Mime
View raw message