hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From te...@apache.org
Subject hbase git commit: HBASE-17850 Backup system repair utility (Vladimir Rodionov)
Date Mon, 22 May 2017 23:26:55 GMT
Repository: hbase
Updated Branches:
  refs/heads/master f1a999032 -> 28d619b22


HBASE-17850 Backup system repair utility (Vladimir Rodionov)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/28d619b2
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/28d619b2
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/28d619b2

Branch: refs/heads/master
Commit: 28d619b22bec1b93259a96bfaba3ec1aecb026a2
Parents: f1a9990
Author: tedyu <yuzhihong@gmail.com>
Authored: Mon May 22 16:25:59 2017 -0700
Committer: tedyu <yuzhihong@gmail.com>
Committed: Mon May 22 16:25:59 2017 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/backup/BackupDriver.java       |   2 +
 .../apache/hadoop/hbase/backup/BackupInfo.java  |   2 +-
 .../hbase/backup/BackupRestoreConstants.java    |   2 +-
 .../hbase/backup/impl/BackupAdminImpl.java      |   4 +-
 .../hbase/backup/impl/BackupCommands.java       |  70 +++++++
 .../impl/IncrementalTableBackupClient.java      |   5 +-
 .../hbase/backup/impl/TableBackupClient.java    |  24 ++-
 .../hadoop/hbase/backup/TestBackupBase.java     | 184 +++++++++++++++++++
 .../hadoop/hbase/backup/TestBackupRepair.java   |  91 +++++++++
 .../backup/TestFullBackupWithFailures.java      | 141 +-------------
 .../TestIncrementalBackupWithFailures.java      | 161 ++++++++++++++++
 11 files changed, 544 insertions(+), 142 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/28d619b2/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupDriver.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupDriver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupDriver.java
index cc5cc95..e2cdb2f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupDriver.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupDriver.java
@@ -109,6 +109,8 @@ public class BackupDriver extends AbstractHBaseTool {
       type = BackupCommand.PROGRESS;
     } else if (BackupCommand.SET.name().equalsIgnoreCase(cmd)) {
       type = BackupCommand.SET;
+    } else if (BackupCommand.REPAIR.name().equalsIgnoreCase(cmd)) {
+      type = BackupCommand.REPAIR;
     } else {
       System.out.println("Unsupported command for backup: " + cmd);
       printToolUsage();

http://git-wip-us.apache.org/repos/asf/hbase/blob/28d619b2/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java
index c7d2848..f6a1fe4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupInfo.java
@@ -96,7 +96,7 @@ public class BackupInfo implements Comparable<BackupInfo> {
   /**
    * Backup phase
    */
-  private BackupPhase phase;
+  private BackupPhase phase = BackupPhase.REQUEST;
 
   /**
    * Backup failure message

http://git-wip-us.apache.org/repos/asf/hbase/blob/28d619b2/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java
index 80f022f..48e70a1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/BackupRestoreConstants.java
@@ -117,7 +117,7 @@ public interface BackupRestoreConstants {
 
   public static enum BackupCommand {
     CREATE, CANCEL, DELETE, DESCRIBE, HISTORY, STATUS, CONVERT, MERGE, STOP, SHOW, HELP,
PROGRESS,
-    SET, SET_ADD, SET_REMOVE, SET_DELETE, SET_DESCRIBE, SET_LIST
+    SET, SET_ADD, SET_REMOVE, SET_DELETE, SET_DESCRIBE, SET_LIST, REPAIR
   }
 
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/28d619b2/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java
index 3a54e20..30cabfd 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java
@@ -93,9 +93,7 @@ public class BackupAdminImpl implements BackupAdmin {
 
   @Override
   public int deleteBackups(String[] backupIds) throws IOException {
-    // TODO: requires Fault tolerance support, failure will leave system
-    // in a non-consistent state
-    // see HBASE-15227
+
     int totalDeleted = 0;
     Map<String, HashSet<TableName>> allTablesMap = new HashMap<String, HashSet<TableName>>();
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/28d619b2/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
index 211a706..56ace8d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupCommands.java
@@ -58,6 +58,7 @@ import org.apache.hadoop.hbase.backup.util.BackupUtils;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 
 import com.google.common.collect.Lists;
 
@@ -77,6 +78,7 @@ public final class BackupCommands  {
       + "  history    show history of all successful backups\n"
       + "  progress   show the progress of the latest backup request\n"
       + "  set        backup set management\n"
+      + "  repair     repair backup system table"
       + "Run \'hbase backup COMMAND -h\' to see help message for each command\n";
 
   public static final String CREATE_CMD_USAGE =
@@ -99,6 +101,8 @@ public final class BackupCommands  {
   public static final String DELETE_CMD_USAGE = "Usage: hbase backup delete <backup_id>\n"
       + "  backup_id       Backup image id\n";
 
+  public static final String REPAIR_CMD_USAGE = "Usage: hbase backup repair\n";
+
   public static final String CANCEL_CMD_USAGE = "Usage: hbase backup cancel <backup_id>\n"
       + "  backup_id       Backup image id\n";
 
@@ -191,6 +195,9 @@ public final class BackupCommands  {
     case SET:
       cmd = new BackupSetCommand(conf, cmdline);
       break;
+    case REPAIR:
+      cmd = new RepairCommand(conf, cmdline);
+      break;
     case HELP:
     default:
       cmd = new HelpCommand(conf, cmdline);
@@ -509,6 +516,9 @@ public final class BackupCommands  {
       try (BackupAdminImpl admin = new BackupAdminImpl(conn);) {
         int deleted = admin.deleteBackups(backupIds);
         System.out.println("Deleted " + deleted + " backups. Total requested: " + args.length);
+      } catch (IOException e) {
+        System.err.println("Delete command FAILED. Please run backup repair tool to restore
backup system integrity");
+        throw e;
       }
 
     }
@@ -519,6 +529,66 @@ public final class BackupCommands  {
     }
   }
 
+  private static class RepairCommand extends Command {
+
+    RepairCommand(Configuration conf, CommandLine cmdline) {
+      super(conf);
+      this.cmdline = cmdline;
+    }
+
+    @Override
+    public void execute() throws IOException {
+      super.execute();
+
+      String[] args = cmdline == null ? null : cmdline.getArgs();
+      if (args != null && args.length > 1) {
+        System.err.println("ERROR: wrong number of arguments: " + args.length);
+        printUsage();
+        throw new IOException(INCORRECT_USAGE);
+      }
+
+      Configuration conf = getConf() != null ? getConf() : HBaseConfiguration.create();
+      try (final Connection conn = ConnectionFactory.createConnection(conf);
+          final BackupSystemTable sysTable = new BackupSystemTable(conn);) {
+
+        // Failed backup
+        BackupInfo backupInfo;
+        List<BackupInfo> list = sysTable.getBackupInfos(BackupState.RUNNING);
+        if (list.size() == 0) {
+          // No failed sessions found
+          System.out.println("REPAIR status: no failed sessions found.");
+          return;
+        }
+        backupInfo = list.get(0);
+        // If this is a cancel exception, then we've already cleaned.
+        // set the failure timestamp of the overall backup
+        backupInfo.setCompleteTs(EnvironmentEdgeManager.currentTime());
+        // set failure message
+        backupInfo.setFailedMsg("REPAIR status: repaired after failure:\n" + backupInfo);
+        // set overall backup status: failed
+        backupInfo.setState(BackupState.FAILED);
+        // compose the backup failed data
+        String backupFailedData =
+            "BackupId=" + backupInfo.getBackupId() + ",startts=" + backupInfo.getStartTs()
+                + ",failedts=" + backupInfo.getCompleteTs() + ",failedphase="
+                + backupInfo.getPhase() + ",failedmessage=" + backupInfo.getFailedMsg();
+        System.out.println(backupFailedData);
+        TableBackupClient.cleanupAndRestoreBackupSystem(conn, backupInfo, conf);
+        // If backup session is updated to FAILED state - means we
+        // processed recovery already.
+        sysTable.updateBackupInfo(backupInfo);
+        sysTable.finishBackupSession();
+        System.out.println("REPAIR status: finished repair failed session:\n "+ backupInfo);
+
+      }
+    }
+
+    @Override
+    protected void printUsage() {
+      System.out.println(REPAIR_CMD_USAGE);
+    }
+  }
+
   // TODO Cancel command
 
   private static class CancelCommand extends Command {

http://git-wip-us.apache.org/repos/asf/hbase/blob/28d619b2/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
index eb8490a..6d48c32 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java
@@ -39,7 +39,6 @@ import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.backup.BackupCopyJob;
 import org.apache.hadoop.hbase.backup.BackupInfo;
 import org.apache.hadoop.hbase.backup.BackupInfo.BackupPhase;
-import org.apache.hadoop.hbase.backup.BackupInfo.BackupState;
 import org.apache.hadoop.hbase.backup.BackupRequest;
 import org.apache.hadoop.hbase.backup.BackupRestoreFactory;
 import org.apache.hadoop.hbase.backup.BackupType;
@@ -64,6 +63,9 @@ import org.apache.hadoop.util.Tool;
 public class IncrementalTableBackupClient extends TableBackupClient {
   private static final Log LOG = LogFactory.getLog(IncrementalTableBackupClient.class);
 
+  protected IncrementalTableBackupClient() {
+  }
+
   public IncrementalTableBackupClient(final Connection conn, final String backupId,
       BackupRequest request) throws IOException {
     super(conn, backupId, request);
@@ -241,7 +243,6 @@ public class IncrementalTableBackupClient extends TableBackupClient {
     // set overall backup status: complete. Here we make sure to complete the backup.
     // After this checkpoint, even if entering cancel process, will let the backup finished
     try {
-      backupInfo.setState(BackupState.COMPLETE);
       // Set the previousTimestampMap which is before this current log roll to the manifest.
       HashMap<TableName, HashMap<String, Long>> previousTimestampMap =
           backupManager.readLogTimestampMap();

http://git-wip-us.apache.org/repos/asf/hbase/blob/28d619b2/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java
index 1673e5e..4e1f277 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java
@@ -44,6 +44,8 @@ import org.apache.hadoop.hbase.client.SnapshotDescription;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.FSUtils;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * Base class for backup operation. Concrete implementation for
  * full and incremental backup are delegated to corresponding sub-classes:
@@ -55,6 +57,9 @@ public abstract class TableBackupClient {
 
   public static final String BACKUP_CLIENT_IMPL_CLASS = "backup.client.impl.class";
 
+  @VisibleForTesting
+  public static final String BACKUP_TEST_MODE_STAGE = "backup.test.mode.stage";
+
   private static final Log LOG = LogFactory.getLog(TableBackupClient.class);
 
   protected Configuration conf;
@@ -441,7 +446,6 @@ public abstract class TableBackupClient {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Backup " + backupInfo.getBackupId() + " finished: " + backupCompleteData);
     }
-    backupManager.updateBackupInfo(backupInfo);
 
     // when full backup is done:
     // - delete HBase snapshot
@@ -454,6 +458,8 @@ public abstract class TableBackupClient {
       cleanupDistCpLog(backupInfo, conf);
     }
     deleteBackupTableSnapshot(conn, conf);
+    backupManager.updateBackupInfo(backupInfo);
+
     // Finish active session
     backupManager.finishBackupSession();
 
@@ -466,4 +472,20 @@ public abstract class TableBackupClient {
    */
   public abstract void execute() throws IOException;
 
+  @VisibleForTesting
+  protected Stage getTestStage() {
+    return Stage.valueOf("stage_"+ conf.getInt(BACKUP_TEST_MODE_STAGE, 0));
+  }
+
+  @VisibleForTesting
+  protected void failStageIf(Stage stage) throws IOException {
+    Stage current = getTestStage();
+    if (current == stage) {
+      throw new IOException("Failed stage " + stage+" in testing");
+    }
+  }
+
+  public static enum Stage {
+    stage_0, stage_1, stage_2, stage_3, stage_4
+  }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/28d619b2/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java
index e6bd73e..b8006d7 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestBackupBase.java
@@ -20,8 +20,10 @@ package org.apache.hadoop.hbase.backup;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.Map.Entry;
 
 import org.apache.commons.logging.Log;
@@ -38,10 +40,17 @@ import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.NamespaceDescriptor;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.backup.BackupInfo.BackupPhase;
 import org.apache.hadoop.hbase.backup.BackupInfo.BackupState;
 import org.apache.hadoop.hbase.backup.impl.BackupAdminImpl;
 import org.apache.hadoop.hbase.backup.impl.BackupManager;
 import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
+import org.apache.hadoop.hbase.backup.impl.FullTableBackupClient;
+import org.apache.hadoop.hbase.backup.impl.IncrementalBackupManager;
+import org.apache.hadoop.hbase.backup.impl.IncrementalTableBackupClient;
+import org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager;
+import org.apache.hadoop.hbase.backup.util.BackupUtils;
+import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.client.Durability;
@@ -55,6 +64,7 @@ import org.apache.hadoop.hbase.security.UserProvider;
 import org.apache.hadoop.hbase.security.access.SecureTestUtil;
 import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.wal.WALFactory;
 import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
 import org.junit.AfterClass;
@@ -94,6 +104,180 @@ public class TestBackupBase {
   protected static String provider = "defaultProvider";
   protected static boolean secure = false;
 
+  protected static boolean autoRestoreOnFailure = true;
+
+  static class IncrementalTableBackupClientForTest extends IncrementalTableBackupClient
+  {
+
+    public IncrementalTableBackupClientForTest() {
+    }
+
+    public IncrementalTableBackupClientForTest(Connection conn,
+        String backupId, BackupRequest request) throws IOException {
+      super(conn, backupId, request);
+    }
+
+    @Override
+    public void execute() throws IOException
+    {
+      // case INCREMENTAL_COPY:
+      try {
+        // case PREPARE_INCREMENTAL:
+        failStageIf(Stage.stage_0);
+        beginBackup(backupManager, backupInfo);
+
+        failStageIf(Stage.stage_1);
+        backupInfo.setPhase(BackupPhase.PREPARE_INCREMENTAL);
+        LOG.debug("For incremental backup, current table set is "
+            + backupManager.getIncrementalBackupTableSet());
+        newTimestamps = ((IncrementalBackupManager) backupManager).getIncrBackupLogFileMap();
+        // copy out the table and region info files for each table
+        BackupUtils.copyTableRegionInfo(conn, backupInfo, conf);
+        // convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT
+        convertWALsToHFiles(backupInfo);
+        incrementalCopyHFiles(backupInfo);
+        failStageIf(Stage.stage_2);
+        // Save list of WAL files copied
+        backupManager.recordWALFiles(backupInfo.getIncrBackupFileList());
+
+        // case INCR_BACKUP_COMPLETE:
+        // set overall backup status: complete. Here we make sure to complete the backup.
+        // After this checkpoint, even if entering cancel process, will let the backup finished
+        // Set the previousTimestampMap which is before this current log roll to the manifest.
+        HashMap<TableName, HashMap<String, Long>> previousTimestampMap =
+            backupManager.readLogTimestampMap();
+        backupInfo.setIncrTimestampMap(previousTimestampMap);
+
+        // The table list in backupInfo is good for both full backup and incremental backup.
+        // For incremental backup, it contains the incremental backup table set.
+        backupManager.writeRegionServerLogTimestamp(backupInfo.getTables(), newTimestamps);
+        failStageIf(Stage.stage_3);
+
+        HashMap<TableName, HashMap<String, Long>> newTableSetTimestampMap =
+            backupManager.readLogTimestampMap();
+
+        Long newStartCode =
+            BackupUtils.getMinValue(BackupUtils.getRSLogTimestampMins(newTableSetTimestampMap));
+        backupManager.writeBackupStartCode(newStartCode);
+
+        handleBulkLoad(backupInfo.getTableNames());
+        failStageIf(Stage.stage_4);
+
+        // backup complete
+        completeBackup(conn, backupInfo, backupManager, BackupType.INCREMENTAL, conf);
+
+      } catch (Exception e) {
+        failBackup(conn, backupInfo, backupManager, e, "Unexpected Exception : ",
+          BackupType.INCREMENTAL, conf);
+        throw new IOException(e);
+      }
+
+    }
+  }
+
+  static class FullTableBackupClientForTest extends FullTableBackupClient
+  {
+
+
+    public FullTableBackupClientForTest() {
+    }
+
+    public FullTableBackupClientForTest(Connection conn, String backupId, BackupRequest request)
+        throws IOException {
+      super(conn, backupId, request);
+    }
+
+    @Override
+    public void execute() throws IOException
+    {
+      // Get the stage ID to fail on
+      try (Admin admin = conn.getAdmin();) {
+        // Begin BACKUP
+        beginBackup(backupManager, backupInfo);
+        failStageIf(Stage.stage_0);
+        String savedStartCode = null;
+        boolean firstBackup = false;
+        // do snapshot for full table backup
+        savedStartCode = backupManager.readBackupStartCode();
+        firstBackup = savedStartCode == null || Long.parseLong(savedStartCode) == 0L;
+        if (firstBackup) {
+          // This is our first backup. Let's put some marker to system table so that we can
hold the logs
+          // while we do the backup.
+          backupManager.writeBackupStartCode(0L);
+        }
+        failStageIf(Stage.stage_1);
+        // We roll log here before we do the snapshot. It is possible there is duplicate
data
+        // in the log that is already in the snapshot. But if we do it after the snapshot,
we
+        // could have data loss.
+        // A better approach is to do the roll log on each RS in the same global procedure
as
+        // the snapshot.
+        LOG.info("Execute roll log procedure for full backup ...");
+
+        Map<String, String> props = new HashMap<String, String>();
+        props.put("backupRoot", backupInfo.getBackupRootDir());
+        admin.execProcedure(LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_SIGNATURE,
+          LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_NAME, props);
+        failStageIf(Stage.stage_2);
+        newTimestamps = backupManager.readRegionServerLastLogRollResult();
+        if (firstBackup) {
+          // Updates registered log files
+          // We record ALL old WAL files as registered, because
+          // this is a first full backup in the system and these
+          // files are not needed for next incremental backup
+          List<String> logFiles = BackupUtils.getWALFilesOlderThan(conf, newTimestamps);
+          backupManager.recordWALFiles(logFiles);
+        }
+
+        // SNAPSHOT_TABLES:
+        backupInfo.setPhase(BackupPhase.SNAPSHOT);
+        for (TableName tableName : tableList) {
+          String snapshotName =
+              "snapshot_" + Long.toString(EnvironmentEdgeManager.currentTime()) + "_"
+                  + tableName.getNamespaceAsString() + "_" + tableName.getQualifierAsString();
+
+          snapshotTable(admin, tableName, snapshotName);
+          backupInfo.setSnapshotName(tableName, snapshotName);
+        }
+        failStageIf(Stage.stage_3);
+        // SNAPSHOT_COPY:
+        // do snapshot copy
+        LOG.debug("snapshot copy for " + backupId);
+        snapshotCopy(backupInfo);
+        // Updates incremental backup table set
+        backupManager.addIncrementalBackupTableSet(backupInfo.getTables());
+
+        // BACKUP_COMPLETE:
+        // set overall backup status: complete. Here we make sure to complete the backup.
+        // After this checkpoint, even if entering cancel process, will let the backup finished
+        backupInfo.setState(BackupState.COMPLETE);
+        // The table list in backupInfo is good for both full backup and incremental backup.
+        // For incremental backup, it contains the incremental backup table set.
+        backupManager.writeRegionServerLogTimestamp(backupInfo.getTables(), newTimestamps);
+
+        HashMap<TableName, HashMap<String, Long>> newTableSetTimestampMap =
+            backupManager.readLogTimestampMap();
+
+        Long newStartCode =
+            BackupUtils.getMinValue(BackupUtils
+                .getRSLogTimestampMins(newTableSetTimestampMap));
+        backupManager.writeBackupStartCode(newStartCode);
+        failStageIf(Stage.stage_4);
+        // backup complete
+        completeBackup(conn, backupInfo, backupManager, BackupType.FULL, conf);
+
+      } catch (Exception e) {
+
+        if(autoRestoreOnFailure) {
+          failBackup(conn, backupInfo, backupManager, e, "Unexpected BackupException : ",
+            BackupType.FULL, conf);
+        }
+        throw new IOException(e);
+      }
+    }
+
+  }
+
+
   /**
    * @throws java.lang.Exception
    */

http://git-wip-us.apache.org/repos/asf/hbase/blob/28d619b2/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestBackupRepair.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestBackupRepair.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestBackupRepair.java
new file mode 100644
index 0000000..686d34b
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestBackupRepair.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.backup;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
+import org.apache.hadoop.hbase.backup.impl.TableBackupClient;
+import org.apache.hadoop.hbase.backup.impl.TableBackupClient.Stage;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+
+@Category(LargeTests.class)
+public class TestBackupRepair extends TestBackupBase {
+
+  private static final Log LOG = LogFactory.getLog(TestBackupRepair.class);
+
+
+  @Test
+  public void testFullBackupWithFailuresAndRestore() throws Exception {
+
+    autoRestoreOnFailure = false;
+
+    conf1.set(TableBackupClient.BACKUP_CLIENT_IMPL_CLASS,
+      FullTableBackupClientForTest.class.getName());
+    int maxStage = Stage.values().length -1;
+    // Fail stage in loop between 0 and 4 inclusive
+    for (int stage = 0; stage < maxStage; stage++) {
+      LOG.info("Running stage " + stage);
+      runBackupAndFailAtStageWithRestore(stage);
+    }
+  }
+
+  public void runBackupAndFailAtStageWithRestore(int stage) throws Exception {
+
+    conf1.setInt(FullTableBackupClientForTest.BACKUP_TEST_MODE_STAGE, stage);
+    try (BackupSystemTable table = new BackupSystemTable(TEST_UTIL.getConnection())) {
+      int before = table.getBackupHistory().size();
+      String[] args =
+          new String[] { "create", "full", BACKUP_ROOT_DIR, "-t",
+              table1.getNameAsString() + "," + table2.getNameAsString() };
+      // Run backup
+      int ret = ToolRunner.run(conf1, new BackupDriver(), args);
+      assertFalse(ret == 0);
+
+      // Now run restore
+      args = new String[] {"repair"};
+
+      ret  = ToolRunner.run(conf1, new BackupDriver(), args);
+      assertTrue(ret == 0);
+
+      List<BackupInfo> backups = table.getBackupHistory();
+      int after = table.getBackupHistory().size();
+
+      assertTrue(after ==  before +1);
+      for (BackupInfo data : backups) {
+        String backupId = data.getBackupId();
+        assertFalse(checkSucceeded(backupId));
+      }
+      Set<TableName> tables = table.getIncrementalBackupTableSet(BACKUP_ROOT_DIR);
+      assertTrue(tables.size() == 0);
+    }
+  }
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/28d619b2/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackupWithFailures.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackupWithFailures.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackupWithFailures.java
index 9555773..d18de88 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackupWithFailures.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackupWithFailures.java
@@ -20,162 +20,35 @@ package org.apache.hadoop.hbase.backup;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-import java.io.IOException;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
-import java.util.Random;
 import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.backup.BackupInfo.BackupPhase;
-import org.apache.hadoop.hbase.backup.BackupInfo.BackupState;
 import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
-import org.apache.hadoop.hbase.backup.impl.FullTableBackupClient;
 import org.apache.hadoop.hbase.backup.impl.TableBackupClient;
-import org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager;
-import org.apache.hadoop.hbase.backup.util.BackupUtils;
-import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.backup.impl.TableBackupClient.Stage;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
-import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.util.ToolRunner;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
-import com.google.common.annotations.VisibleForTesting;
-
 @Category(LargeTests.class)
 public class TestFullBackupWithFailures extends TestBackupBase {
 
   private static final Log LOG = LogFactory.getLog(TestFullBackupWithFailures.class);
 
-  static class FullTableBackupClientForTest extends FullTableBackupClient
-  {
-    public static final String BACKUP_TEST_MODE_STAGE = "backup.test.mode.stage";
-
-    public FullTableBackupClientForTest() {
-    }
-
-    public FullTableBackupClientForTest(Connection conn, String backupId, BackupRequest request)
-        throws IOException {
-      super(conn, backupId, request);
-    }
-
-    @Override
-    public void execute() throws IOException
-    {
-      // Get the stage ID to fail on
-      try (Admin admin = conn.getAdmin();) {
-        // Begin BACKUP
-        beginBackup(backupManager, backupInfo);
-        failStageIf(0);
-        String savedStartCode = null;
-        boolean firstBackup = false;
-        // do snapshot for full table backup
-        savedStartCode = backupManager.readBackupStartCode();
-        firstBackup = savedStartCode == null || Long.parseLong(savedStartCode) == 0L;
-        if (firstBackup) {
-          // This is our first backup. Let's put some marker to system table so that we can
hold the logs
-          // while we do the backup.
-          backupManager.writeBackupStartCode(0L);
-        }
-        failStageIf(1);
-        // We roll log here before we do the snapshot. It is possible there is duplicate
data
-        // in the log that is already in the snapshot. But if we do it after the snapshot,
we
-        // could have data loss.
-        // A better approach is to do the roll log on each RS in the same global procedure
as
-        // the snapshot.
-        LOG.info("Execute roll log procedure for full backup ...");
-
-        Map<String, String> props = new HashMap<String, String>();
-        props.put("backupRoot", backupInfo.getBackupRootDir());
-        admin.execProcedure(LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_SIGNATURE,
-          LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_NAME, props);
-        failStageIf(2);
-        newTimestamps = backupManager.readRegionServerLastLogRollResult();
-        if (firstBackup) {
-          // Updates registered log files
-          // We record ALL old WAL files as registered, because
-          // this is a first full backup in the system and these
-          // files are not needed for next incremental backup
-          List<String> logFiles = BackupUtils.getWALFilesOlderThan(conf, newTimestamps);
-          backupManager.recordWALFiles(logFiles);
-        }
-
-        // SNAPSHOT_TABLES:
-        backupInfo.setPhase(BackupPhase.SNAPSHOT);
-        for (TableName tableName : tableList) {
-          String snapshotName =
-              "snapshot_" + Long.toString(EnvironmentEdgeManager.currentTime()) + "_"
-                  + tableName.getNamespaceAsString() + "_" + tableName.getQualifierAsString();
-
-          snapshotTable(admin, tableName, snapshotName);
-          backupInfo.setSnapshotName(tableName, snapshotName);
-        }
-        failStageIf(3);
-        // SNAPSHOT_COPY:
-        // do snapshot copy
-        LOG.debug("snapshot copy for " + backupId);
-        snapshotCopy(backupInfo);
-        // Updates incremental backup table set
-        backupManager.addIncrementalBackupTableSet(backupInfo.getTables());
-
-        // BACKUP_COMPLETE:
-        // set overall backup status: complete. Here we make sure to complete the backup.
-        // After this checkpoint, even if entering cancel process, will let the backup finished
-        backupInfo.setState(BackupState.COMPLETE);
-        // The table list in backupInfo is good for both full backup and incremental backup.
-        // For incremental backup, it contains the incremental backup table set.
-        backupManager.writeRegionServerLogTimestamp(backupInfo.getTables(), newTimestamps);
-
-        HashMap<TableName, HashMap<String, Long>> newTableSetTimestampMap =
-            backupManager.readLogTimestampMap();
-
-        Long newStartCode =
-            BackupUtils.getMinValue(BackupUtils
-                .getRSLogTimestampMins(newTableSetTimestampMap));
-        backupManager.writeBackupStartCode(newStartCode);
-        failStageIf(4);
-        // backup complete
-        completeBackup(conn, backupInfo, backupManager, BackupType.FULL, conf);
-
-      } catch (Exception e) {
-        failBackup(conn, backupInfo, backupManager, e, "Unexpected BackupException : ",
-          BackupType.FULL, conf);
-        throw new IOException(e);
-      }
-
-    }
-
-
-
-    @VisibleForTesting
-    protected int getTestStageId() {
-      return conf.getInt(BACKUP_TEST_MODE_STAGE, 0);
-    }
-
-    @VisibleForTesting
-
-    protected void failStageIf(int stage) throws IOException {
-      int current = getTestStageId();
-      if (current == stage) {
-        throw new IOException("Failed stage " + stage+" in testing");
-      }
-    }
-
-  }
-
   @Test
   public void testFullBackupWithFailures() throws Exception {
     conf1.set(TableBackupClient.BACKUP_CLIENT_IMPL_CLASS,
       FullTableBackupClientForTest.class.getName());
-    int stage = (new Random()).nextInt(5);
-    // Fail random stage between 0 and 4 inclusive
-    LOG.info("Running stage " + stage);
-    runBackupAndFailAtStage(stage);
+    int maxStage = Stage.values().length -1;
+    // Fail stages between 0 and 4 inclusive
+    for (int stage = 0; stage <= maxStage; stage++) {
+      LOG.info("Running stage " + stage);
+      runBackupAndFailAtStage(stage);
+    }
   }
 
   public void runBackupAndFailAtStage(int stage) throws Exception {

http://git-wip-us.apache.org/repos/asf/hbase/blob/28d619b2/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackupWithFailures.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackupWithFailures.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackupWithFailures.java
new file mode 100644
index 0000000..88bcc2a
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackupWithFailures.java
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.backup;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.backup.BackupInfo.BackupState;
+import org.apache.hadoop.hbase.backup.impl.BackupAdminImpl;
+import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
+import org.apache.hadoop.hbase.backup.impl.TableBackupClient;
+import org.apache.hadoop.hbase.backup.impl.TableBackupClient.Stage;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.util.ToolRunner;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import com.google.common.collect.Lists;
+
+@Category(LargeTests.class)
+@RunWith(Parameterized.class)
+public class TestIncrementalBackupWithFailures extends TestBackupBase {
+  private static final Log LOG = LogFactory.getLog(TestIncrementalBackupWithFailures.class);
+
+  @Parameterized.Parameters
+  public static Collection<Object[]> data() {
+    provider = "multiwal";
+    List<Object[]> params = new ArrayList<Object[]>();
+    params.add(new Object[] { Boolean.TRUE });
+    return params;
+  }
+
+  public TestIncrementalBackupWithFailures(Boolean b) {
+  }
+
+  // implement all test cases in 1 test since incremental backup/restore has dependencies
+  @Test
+  public void TestIncBackupRestore() throws Exception {
+
+    int ADD_ROWS = 99;
+    // #1 - create full backup for all tables
+    LOG.info("create full backup image for all tables");
+
+    List<TableName> tables = Lists.newArrayList(table1, table2);
+    final byte[] fam3Name = Bytes.toBytes("f3");
+    table1Desc.addFamily(new HColumnDescriptor(fam3Name));
+    HBaseTestingUtility.modifyTableSync(TEST_UTIL.getAdmin(), table1Desc);
+
+    Connection conn = ConnectionFactory.createConnection(conf1);
+    int NB_ROWS_FAM3 = 6;
+    insertIntoTable(conn, table1, fam3Name, 3, NB_ROWS_FAM3).close();
+
+    HBaseAdmin admin = null;
+    admin = (HBaseAdmin) conn.getAdmin();
+    BackupAdminImpl client = new BackupAdminImpl(conn);
+
+    BackupRequest request = createBackupRequest(BackupType.FULL, tables, BACKUP_ROOT_DIR);
+    String backupIdFull = client.backupTables(request);
+
+    assertTrue(checkSucceeded(backupIdFull));
+
+    // #2 - insert some data to table
+    HTable t1 = insertIntoTable(conn, table1, famName, 1, ADD_ROWS);
+    LOG.debug("writing " + ADD_ROWS + " rows to " + table1);
+
+    Assert.assertEquals(TEST_UTIL.countRows(t1), NB_ROWS_IN_BATCH + ADD_ROWS + NB_ROWS_FAM3);
+    t1.close();
+    LOG.debug("written " + ADD_ROWS + " rows to " + table1);
+
+    HTable t2 = (HTable) conn.getTable(table2);
+    Put p2;
+    for (int i = 0; i < 5; i++) {
+      p2 = new Put(Bytes.toBytes("row-t2" + i));
+      p2.addColumn(famName, qualName, Bytes.toBytes("val" + i));
+      t2.put(p2);
+    }
+
+    Assert.assertEquals(TEST_UTIL.countRows(t2), NB_ROWS_IN_BATCH + 5);
+    t2.close();
+    LOG.debug("written " + 5 + " rows to " + table2);
+
+    // #3 - incremental backup for multiple tables
+    incrementalBackupWithFailures();
+
+    admin.close();
+    conn.close();
+
+  }
+
+
+  private void incrementalBackupWithFailures() throws Exception {
+    conf1.set(TableBackupClient.BACKUP_CLIENT_IMPL_CLASS,
+      IncrementalTableBackupClientForTest.class.getName());
+    int maxStage = Stage.values().length -1;
+    // Fail stages between 0 and 4 inclusive
+    for (int stage = 0; stage <= maxStage; stage++) {
+      LOG.info("Running stage " + stage);
+      runBackupAndFailAtStage(stage);
+    }
+  }
+
+  private void runBackupAndFailAtStage(int stage) throws Exception {
+
+    conf1.setInt(FullTableBackupClientForTest.BACKUP_TEST_MODE_STAGE, stage);
+    try (BackupSystemTable table = new BackupSystemTable(TEST_UTIL.getConnection())) {
+      int before = table.getBackupHistory().size();
+      String[] args =
+          new String[] { "create", "incremental", BACKUP_ROOT_DIR, "-t",
+              table1.getNameAsString() + "," + table2.getNameAsString() };
+      // Run backup
+      int ret = ToolRunner.run(conf1, new BackupDriver(), args);
+      assertFalse(ret == 0);
+      List<BackupInfo> backups = table.getBackupHistory();
+      int after = table.getBackupHistory().size();
+
+      assertTrue(after ==  before +1);
+      for (BackupInfo data : backups) {
+        if(data.getType() == BackupType.FULL) {
+          assertTrue(data.getState() == BackupState.COMPLETE);
+        } else {
+          assertTrue(data.getState() == BackupState.FAILED);
+        }
+      }
+    }
+  }
+
+}


Mime
View raw message