hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zg...@apache.org
Subject [hbase] branch branch-2.1 updated: HBASE-22709 Add a chore thread in master to do hbck checking (#404)
Date Sat, 27 Jul 2019 11:00:02 GMT
This is an automated email from the ASF dual-hosted git repository.

zghao pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new e54c401  HBASE-22709 Add a chore thread in master to do hbck checking (#404)
e54c401 is described below

commit e54c4018e70552a38e8213297d766e04d2bec722
Author: Guanghao Zhang <zghao@apache.org>
AuthorDate: Sat Jul 27 18:23:37 2019 +0800

    HBASE-22709 Add a chore thread in master to do hbck checking (#404)
    
    Signed-off-by: stack <stack@apache.org>
---
 .../tmpl/master/AssignmentManagerStatusTmpl.jamon  |  76 ------
 .../hbase/tmpl/master/MasterStatusTmpl.jamon       |   3 +-
 .../org/apache/hadoop/hbase/master/HMaster.java    |   8 +
 .../apache/hadoop/hbase/master/HbckChecker.java    | 282 +++++++++++++++++++++
 .../hbase/master/assignment/AssignmentManager.java |  54 +---
 .../main/resources/hbase-webapps/master/hbck.jsp   | 153 +++++++++++
 ...roblematicRegions.java => TestHbckChecker.java} |  65 +++--
 7 files changed, 497 insertions(+), 144 deletions(-)

diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
index 1d8fa70..76377e2 100644
--- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
+++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
@@ -42,84 +42,8 @@ int limit = 100;
 <%java>
 SortedSet<RegionState> rit = assignmentManager.getRegionStates()
     .getRegionsInTransitionOrderedByTimestamp();
-Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = assignmentManager
-    .getProblematicRegions();
 </%java>
 
-<%if !problematicRegions.isEmpty() %>
-<%java>
-int totalSize = problematicRegions.size();
-int sizePerPage = Math.min(10, totalSize);
-int numOfPages = (int) Math.ceil(totalSize * 1.0 / sizePerPage);
-</%java>
-    <section>
-    <h2><a name="problem-regions">Problematic Regions</a></h2>
-    <p>
-        <span>
-            <% problematicRegions.size() %> problematic region(s). There are three
case: 1. Master
-             thought this region opened, but no regionserver reported it. 2. Master thought
this
-              region opened on Server1, but regionserver reported Server2. 3. More than one
-               regionservers reported opened this region. Notice: the reported online regionservers
-                may be not right when there are regions in transition. Please check them
in
-                 regionserver's web UI.
-        </span>
-    </p>
-    <div class="tabbable">
-        <div class="tab-content">
-        <%java int recordItr = 0; %>
-        <%for Map.Entry<String, Pair<ServerName, Set<ServerName>>> entry
: problematicRegions.entrySet() %>
-            <%if (recordItr % sizePerPage) == 0 %>
-                <%if recordItr == 0 %>
-                    <div class="tab-pane active" id="tab_prs<% (recordItr / sizePerPage)
+ 1 %>">
-                <%else>
-                    <div class="tab-pane" id="tab_prs<% (recordItr / sizePerPage) +
1 %>">
-                </%if>
-                <table class="table table-striped" style="margin-bottom:0px;">
-                    <tr>
-                        <th>Region</th>
-                        <th>Location in META</th>
-                        <th>Reported Online Region Servers</th>
-                    </tr>
-            </%if>
-
-            <tr>
-                <td><% entry.getKey() %></td>
-                <td><% entry.getValue().getFirst() %></td>
-                <td><% entry.getValue().getSecond().stream().map(ServerName::getServerName)
-                    .collect(Collectors.joining(", ")) %></td>
-            </tr>
-            <%java recordItr++; %>
-            <%if (recordItr % sizePerPage) == 0 %>
-                </table>
-                </div>
-            </%if>
-        </%for>
-
-        <%if (recordItr % sizePerPage) != 0 %>
-         <%for ; (recordItr % sizePerPage) != 0 ; recordItr++ %>
-            <tr><td colspan="3" style="height:61px"></td></tr>
-         </%for>
-         </table>
-         </div>
-        </%if>
-
-        </div>
-        <nav>
-         <ul class="nav nav-pills pagination">
-         <%for int i = 1 ; i <= numOfPages; i++ %>
-             <%if i == 1 %>
-             <li class="active">
-             <%else>
-             <li>
-             </%if>
-             <a href="#tab_prs<% i %>"><% i %></a></li>
-         </%for>
-         </ul>
-        </nav>
-    </div>
-    </section>
-</%if>
-
 <%if !rit.isEmpty() %>
 <%java>
 long currentTime = System.currentTimeMillis();
diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon
b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon
index 9dd5dda..41c35c6 100644
--- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon
+++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/MasterStatusTmpl.jamon
@@ -149,7 +149,8 @@ AssignmentManager assignmentManager = master.getAssignmentManager();
                 <li class="active"><a href="/master-status">Home</a></li>
                 <li><a href="/tablesDetailed.jsp">Table Details</a></li>
                 <%if master.isActiveMaster() %>
-                <li><a href="/procedures.jsp">Procedures &amp; Locks</a></li>
+                    <li><a href="/procedures.jsp">Procedures &amp; Locks</a></li>
+                    <li><a href="/hbck.jsp">HBCK Report</a></li>
                 </%if>
                 <li><a href="/processMaster.jsp">Process Metrics</a></li>
                 <li><a href="/logs/">Local Logs</a></li>
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 63a7374..6d70453 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -371,6 +371,7 @@ public class HMaster extends HRegionServer implements MasterServices {
   private ClusterStatusChore clusterStatusChore;
   private ClusterStatusPublisher clusterStatusPublisherChore = null;
 
+  private HbckChecker hbckChecker;
   CatalogJanitor catalogJanitorChore;
   private LogCleaner logCleaner;
   private HFileCleaner hfileCleaner;
@@ -1030,6 +1031,8 @@ public class HMaster extends HRegionServer implements MasterServices
{
     getChoreService().scheduleChore(normalizerChore);
     this.catalogJanitorChore = new CatalogJanitor(this);
     getChoreService().scheduleChore(catalogJanitorChore);
+    this.hbckChecker = new HbckChecker(this);
+    getChoreService().scheduleChore(hbckChecker);
 
     // NAMESPACE READ!!!!
     // Here we expect hbase:namespace to be online. See inside initClusterSchemaService.
@@ -1495,6 +1498,7 @@ public class HMaster extends HRegionServer implements MasterServices
{
       choreService.cancelChore(this.logCleaner);
       choreService.cancelChore(this.hfileCleaner);
       choreService.cancelChore(this.replicationBarrierCleaner);
+      choreService.cancelChore(this.hbckChecker);
     }
   }
 
@@ -3861,4 +3865,8 @@ public class HMaster extends HRegionServer implements MasterServices
{
     }
     return super.getWalGroupsReplicationStatus();
   }
+
+  public HbckChecker getHbckChecker() {
+    return this.hbckChecker;
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChecker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChecker.java
new file mode 100644
index 0000000..fbc2c70
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HbckChecker.java
@@ -0,0 +1,282 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.ScheduledChore;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.HbckRegionInfo;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.yetus.audience.InterfaceStability;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
+
+/**
+ * Used to do the hbck checking job at master side.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class HbckChecker extends ScheduledChore {
+  private static final Logger LOG = LoggerFactory.getLogger(HbckChecker.class.getName());
+
+  private static final String HBCK_CHECKER_INTERVAL = "hbase.master.hbck.checker.interval";
+  private static final int DEFAULT_HBCK_CHECKER_INTERVAL = 60 * 60 * 1000;
+
+  private final MasterServices master;
+
+  /**
+   * This map contains the state of all hbck items.  It maps from encoded region
+   * name to HbckRegionInfo structure.  The information contained in HbckRegionInfo is used
+   * to detect and correct consistency (hdfs/meta/deployment) problems.
+   */
+  private final Map<String, HbckRegionInfo> regionInfoMap = new HashMap<>();
+
+  /**
+   * The regions only opened on RegionServers, but no region info in meta.
+   */
+  private final Map<String, ServerName> orphanRegionsOnRS = new HashMap<>();
+  /**
+   * The regions have directory on FileSystem, but no region info in meta.
+   */
+  private final List<String> orphanRegionsOnFS = new LinkedList<>();
+  /**
+   * The inconsistent regions. There are three case:
+   * case 1. Master thought this region opened, but no regionserver reported it.
+   * case 2. Master thought this region opened on Server1, but regionserver reported Server2
+   * case 3. More than one regionservers reported opened this region
+   */
+  private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions
=
+      new HashMap<>();
+
+  /**
+   * The "snapshot" is used to save the last round's HBCK checking report.
+   */
+  private final Map<String, ServerName> orphanRegionsOnRSSnapshot = new HashMap<>();
+  private final List<String> orphanRegionsOnFSSnapshot = new LinkedList<>();
+  private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegionsSnapshot
=
+      new HashMap<>();
+
+  /**
+   * The "snapshot" may be changed after checking. And this checking report "snapshot" may
be
+   * accessed by web ui. Use this rwLock to synchronize.
+   */
+  ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock();
+
+  /**
+   * When running, the "snapshot" may be changed when this round's checking finish.
+   */
+  private volatile boolean running = false;
+  private volatile long checkingStartTimestamp = 0;
+  private volatile long checkingEndTimestamp = 0;
+
+  public HbckChecker(MasterServices master) {
+    super("HbckChecker-", master,
+        master.getConfiguration().getInt(HBCK_CHECKER_INTERVAL, DEFAULT_HBCK_CHECKER_INTERVAL));
+    this.master = master;
+  }
+
+  @Override
+  protected void chore() {
+    running = true;
+    regionInfoMap.clear();
+    orphanRegionsOnRS.clear();
+    orphanRegionsOnFS.clear();
+    inconsistentRegions.clear();
+    checkingStartTimestamp = EnvironmentEdgeManager.currentTime();
+    loadRegionsFromInMemoryState();
+    loadRegionsFromRSReport();
+    try {
+      loadRegionsFromFS();
+    } catch (IOException e) {
+      LOG.warn("Failed to load the regions from filesystem", e);
+    }
+    saveCheckResultToSnapshot();
+    running = false;
+  }
+
+  private void saveCheckResultToSnapshot() {
+    // Need synchronized here, as this "snapshot" may be access by web ui.
+    rwLock.writeLock().lock();
+    try {
+      orphanRegionsOnRSSnapshot.clear();
+      orphanRegionsOnRS.entrySet()
+          .forEach(e -> orphanRegionsOnRSSnapshot.put(e.getKey(), e.getValue()));
+      orphanRegionsOnFSSnapshot.clear();
+      orphanRegionsOnFSSnapshot.addAll(orphanRegionsOnFS);
+      inconsistentRegionsSnapshot.clear();
+      inconsistentRegions.entrySet()
+          .forEach(e -> inconsistentRegionsSnapshot.put(e.getKey(), e.getValue()));
+      checkingEndTimestamp = EnvironmentEdgeManager.currentTime();
+    } finally {
+      rwLock.writeLock().unlock();
+    }
+  }
+
+  private void loadRegionsFromInMemoryState() {
+    List<RegionState> regionStates =
+        master.getAssignmentManager().getRegionStates().getRegionStates();
+    for (RegionState regionState : regionStates) {
+      RegionInfo regionInfo = regionState.getRegion();
+      HbckRegionInfo.MetaEntry metaEntry =
+          new HbckRegionInfo.MetaEntry(regionInfo, regionState.getServerName(),
+              regionState.getStamp());
+      regionInfoMap.put(regionInfo.getEncodedName(), new HbckRegionInfo(metaEntry));
+    }
+  }
+
+  private void loadRegionsFromRSReport() {
+    Map<ServerName, Set<byte[]>> rsReports = master.getAssignmentManager().getRSReports();
+    for (Map.Entry<ServerName, Set<byte[]>> entry : rsReports.entrySet()) {
+      ServerName serverName = entry.getKey();
+      for (byte[] regionName : entry.getValue()) {
+        String encodedRegionName = RegionInfo.encodeRegionName(regionName);
+        HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
+        if (hri == null) {
+          orphanRegionsOnRS.put(encodedRegionName, serverName);
+          continue;
+        }
+        hri.addServer(hri.getMetaEntry(), serverName);
+      }
+    }
+
+    for (Map.Entry<String, HbckRegionInfo> entry : regionInfoMap.entrySet()) {
+      String encodedRegionName = entry.getKey();
+      HbckRegionInfo hri = entry.getValue();
+      ServerName locationInMeta = hri.getMetaEntry().getRegionServer();
+      if (hri.getDeployedOn().size() == 0) {
+        // Master thought this region opened, but no regionserver reported it.
+        inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, new LinkedList<>()));
+      } else if (hri.getDeployedOn().size() > 1) {
+        // More than one regionserver reported opened this region
+        inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, hri.getDeployedOn()));
+      } else if (!hri.getDeployedOn().get(0).equals(locationInMeta)) {
+        // Master thought this region opened on Server1, but regionserver reported Server2
+        inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, hri.getDeployedOn()));
+      }
+    }
+  }
+
+  private void loadRegionsFromFS() throws IOException {
+    Path rootDir = master.getMasterFileSystem().getRootDir();
+    FileSystem fs = master.getMasterFileSystem().getFileSystem();
+
+    // list all tables from HDFS
+    List<FileStatus> tableDirs = Lists.newArrayList();
+    List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
+    for (Path path : paths) {
+      tableDirs.add(fs.getFileStatus(path));
+    }
+
+    for (FileStatus tableDir : tableDirs) {
+      FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
+      for (FileStatus regionDir : regionDirs) {
+        String encodedRegionName = regionDir.getPath().getName();
+        HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
+        if (hri == null) {
+          orphanRegionsOnFS.add(encodedRegionName);
+          continue;
+        }
+        HbckRegionInfo.HdfsEntry hdfsEntry =
+            new HbckRegionInfo.HdfsEntry(regionDir.getPath(), regionDir.getModificationTime());
+        hri.setHdfsEntry(hdfsEntry);
+      }
+    }
+  }
+
+  /**
+   * When running, the HBCK report may be changed later.
+   */
+  public boolean isRunning() {
+    return running;
+  }
+
+  /**
+   * @return the regions only opened on RegionServers, but no region info in meta.
+   */
+  public Map<String, ServerName> getOrphanRegionsOnRS() {
+    // Need synchronized here, as this "snapshot" may be changed after checking.
+    rwLock.readLock().lock();
+    try {
+      return this.orphanRegionsOnRSSnapshot;
+    } finally {
+      rwLock.readLock().unlock();
+    }
+  }
+
+  /**
+   * @return the regions have directory on FileSystem, but no region info in meta.
+   */
+  public List<String> getOrphanRegionsOnFS() {
+    // Need synchronized here, as this "snapshot" may be changed after checking.
+    rwLock.readLock().lock();
+    try {
+      return this.orphanRegionsOnFSSnapshot;
+    } finally {
+      rwLock.readLock().unlock();
+    }
+  }
+
+  /**
+   * Found the inconsistent regions. There are three case:
+   * case 1. Master thought this region opened, but no regionserver reported it.
+   * case 2. Master thought this region opened on Server1, but regionserver reported Server2
+   * case 3. More than one regionservers reported opened this region
+   *
+   * @return the map of inconsistent regions. Key is the region name. Value is a pair of
location in
+   *         meta and the regionservers which reported opened this region.
+   */
+  public Map<String, Pair<ServerName, List<ServerName>>> getInconsistentRegions()
{
+    // Need synchronized here, as this "snapshot" may be changed after checking.
+    rwLock.readLock().lock();
+    try {
+      return this.inconsistentRegionsSnapshot;
+    } finally {
+      rwLock.readLock().unlock();
+    }
+  }
+
+  /**
+   * Used for web ui to show when the HBCK checking started.
+   */
+  public long getCheckingStartTimestamp() {
+    return this.checkingStartTimestamp;
+  }
+
+  /**
+   * Used for web ui to show when the HBCK checking report generated.
+   */
+  public long getCheckingEndTimestamp() {
+    return this.checkingStartTimestamp;
+  }
+}
\ No newline at end of file
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index e680454..b28a79d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -1339,6 +1339,12 @@ public class AssignmentManager implements ServerListener {
 
   public long submitServerCrash(final ServerName serverName, final boolean shouldSplitWal)
{
     boolean carryingMeta = isCarryingMeta(serverName);
+
+    // Remove the in-memory rsReports result
+    synchronized (rsReports) {
+      rsReports.remove(serverName);
+    }
+
     ProcedureExecutor<MasterProcedureEnv> procExec = this.master.getMasterProcedureExecutor();
     long pid = procExec.submitProcedure(new ServerCrashProcedure(procExec.getEnvironment(),
         serverName, shouldSplitWal, carryingMeta));
@@ -1892,51 +1898,13 @@ public class AssignmentManager implements ServerListener {
   }
 
   /**
-   * Found the potentially problematic opened regions. There are three case:
-   * case 1. Master thought this region opened, but no regionserver reported it.
-   * case 2. Master thought this region opened on Server1, but regionserver reported Server2
-   * case 3. More than one regionservers reported opened this region
-   *
-   * @return the map of potentially problematic opened regions. Key is the region name. Value
is
-   *         a pair of location in meta and the regionservers which reported opened this
region.
+   * @return a snapshot of rsReports
    */
-  public Map<String, Pair<ServerName, Set<ServerName>>> getProblematicRegions()
{
-    Map<String, Set<ServerName>> reportedOnlineRegions = new HashMap<>();
+  public Map<ServerName, Set<byte[]>> getRSReports() {
+    Map<ServerName, Set<byte[]>> rsReportsSnapshot = new HashMap<>();
     synchronized (rsReports) {
-      for (Map.Entry<ServerName, Set<byte[]>> entry : rsReports.entrySet()) {
-        for (byte[] regionName : entry.getValue()) {
-          reportedOnlineRegions
-              .computeIfAbsent(RegionInfo.getRegionNameAsString(regionName), r -> new
HashSet<>())
-              .add(entry.getKey());
-        }
-      }
+      rsReports.entrySet().forEach(e -> rsReportsSnapshot.put(e.getKey(), e.getValue()));
     }
-
-    Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions =
new HashMap<>();
-    List<RegionState> rits = regionStates.getRegionsStateInTransition();
-    for (RegionState regionState : regionStates.getRegionStates()) {
-      // Only consider the opened region and not in transition
-      if (!rits.contains(regionState) && regionState.isOpened()) {
-        String regionName = regionState.getRegion().getRegionNameAsString();
-        ServerName serverName = regionState.getServerName();
-        if (reportedOnlineRegions.containsKey(regionName)) {
-          Set<ServerName> reportedServers = reportedOnlineRegions.get(regionName);
-          if (reportedServers.contains(serverName)) {
-            if (reportedServers.size() > 1) {
-              // More than one regionserver reported opened this region
-              problematicRegions.put(regionName, new Pair<>(serverName, reportedServers));
-            }
-          } else {
-            // Master thought this region opened on Server1, but regionserver reported Server2
-            problematicRegions.put(regionName, new Pair<>(serverName, reportedServers));
-          }
-        } else {
-          // Master thought this region opened, but no regionserver reported it.
-          problematicRegions.put(regionName, new Pair<>(serverName, new HashSet<>()));
-        }
-      }
-    }
-
-    return problematicRegions;
+    return rsReportsSnapshot;
   }
 }
diff --git a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp
new file mode 100644
index 0000000..0245d47
--- /dev/null
+++ b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp
@@ -0,0 +1,153 @@
+<%--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+--%>
+<%@ page contentType="text/html;charset=UTF-8"
+         import="java.util.Date"
+         import="java.util.List"
+         import="java.util.Map"
+         import="java.util.stream.Collectors"
+%>
+<%@ page import="org.apache.hadoop.hbase.master.HbckChecker" %>
+<%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
+<%@ page import="org.apache.hadoop.hbase.ServerName" %>
+<%@ page import="org.apache.hadoop.hbase.util.Pair" %>
+<%
+  HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
+  pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName());
+  HbckChecker hbckChecker = master.getHbckChecker();
+  Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions =
null;
+  Map<String, ServerName> orphanRegionsOnRS = null;
+  List<String> orphanRegionsOnFS = null;
+  long startTimestamp = 0;
+  long endTimestamp = 0;
+  if (hbckChecker != null) {
+    inconsistentRegions = hbckChecker.getInconsistentRegions();
+    orphanRegionsOnRS = hbckChecker.getOrphanRegionsOnRS();
+    orphanRegionsOnFS = hbckChecker.getOrphanRegionsOnFS();
+    startTimestamp = hbckChecker.getCheckingStartTimestamp();
+    endTimestamp = hbckChecker.getCheckingEndTimestamp();
+  }
+%>
+<jsp:include page="header.jsp">
+  <jsp:param name="pageTitle" value="${pageTitle}"/>
+</jsp:include>
+
+<div class="container-fluid content">
+
+  <% if (!master.isInitialized()) { %>
+  <div class="row">
+    <div class="page-header">
+      <h1>Master is not initialized</h1>
+    </div>
+  </div>
+  <jsp:include page="redirect.jsp" />
+  <% } else { %>
+
+  <div class="row">
+    <div class="page-header">
+      <h1>HBCK Report</h1>
+      <p>
+        <span>Checking started at <%= new Date(startTimestamp) %> and generated
report at <%= new Date(endTimestamp) %></span>
+      </p>
+    </div>
+  </div>
+
+  <div class="row">
+    <div class="page-header">
+      <h2>Inconsistent Regions</h2>
+      <p>
+        <span>
+        There are three case: 1. Master thought this region opened, but no regionserver reported
it.
+        2. Master thought this region opened on Server1, but regionserver reported Server2.
+        3. More than one regionservers reported opened this region.
+        Notice: the reported online regionservers may be not right when there are regions
in transition.
+        Please check them in regionserver's web UI.
+        </span>
+      </p>
+    </div>
+  </div>
+
+  <% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>
+  <table class="table table-striped">
+    <tr>
+      <th>Region</th>
+      <th>Location in META</th>
+      <th>Reported Online RegionServers</th>
+    </tr>
+    <% for (Map.Entry<String, Pair<ServerName, List<ServerName>>> entry
: inconsistentRegions.entrySet()) { %>
+    <tr>
+      <td><%= entry.getKey() %></td>
+      <td><%= entry.getValue().getFirst() %></td>
+      <td><%= entry.getValue().getSecond().stream().map(ServerName::getServerName)
+                        .collect(Collectors.joining(", ")) %></td>
+    </tr>
+    <% } %>
+
+    <p><%= inconsistentRegions.size() %> region(s) in set.</p>
+  </table>
+  <% } %>
+
+  <div class="row">
+    <div class="page-header">
+      <h2>Orphan Regions on RegionServer</h2>
+    </div>
+  </div>
+
+  <% if (orphanRegionsOnRS != null && orphanRegionsOnRS.size() > 0) { %>
+  <table class="table table-striped">
+    <tr>
+      <th>Region</th>
+      <th>Reported Online RegionServer</th>
+    </tr>
+    <% for (Map.Entry<String, ServerName> entry : orphanRegionsOnRS.entrySet())
{ %>
+    <tr>
+      <td><%= entry.getKey() %></td>
+      <td><%= entry.getValue() %></td>
+    </tr>
+    <% } %>
+
+    <p><%= orphanRegionsOnRS.size() %> region(s) in set.</p>
+  </table>
+  <% } %>
+
+  <div class="row">
+    <div class="page-header">
+      <h2>Orphan Regions on FileSystem</h2>
+    </div>
+  </div>
+
+  <% if (orphanRegionsOnFS != null && orphanRegionsOnFS.size() > 0) { %>
+  <table class="table table-striped">
+    <tr>
+      <th>Region</th>
+    </tr>
+    <% for (String region : orphanRegionsOnFS) { %>
+    <tr>
+      <td><%= region %></td>
+    </tr>
+    <% } %>
+
+    <p><%= orphanRegionsOnFS.size() %> region(s) in set.</p>
+  </table>
+  <% } %>
+
+  <% } %>
+</div>
+
+<jsp:include page="footer.jsp"/>
\ No newline at end of file
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAMProblematicRegions.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestHbckChecker.java
similarity index 70%
rename from hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAMProblematicRegions.java
rename to hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestHbckChecker.java
index d07e129..f4bba6c 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAMProblematicRegions.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestHbckChecker.java
@@ -24,7 +24,6 @@ import static org.junit.Assert.assertTrue;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.concurrent.Future;
 
 import org.apache.hadoop.hbase.HBaseClassTestRule;
@@ -32,9 +31,11 @@ import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.master.HbckChecker;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Pair;
+import org.junit.Before;
 import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -42,41 +43,52 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 @Category({ MasterTests.class, MediumTests.class })
-public class TestAMProblematicRegions extends TestAssignmentManagerBase {
-  private static final Logger LOG = LoggerFactory.getLogger(TestAMProblematicRegions.class);
+public class TestHbckChecker extends TestAssignmentManagerBase {
+  private static final Logger LOG = LoggerFactory.getLogger(TestHbckChecker.class);
 
   @ClassRule
   public static final HBaseClassTestRule CLASS_RULE =
-      HBaseClassTestRule.forClass(TestAMProblematicRegions.class);
+      HBaseClassTestRule.forClass(TestHbckChecker.class);
+
+  private HbckChecker hbckChecker;
+
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    hbckChecker = new HbckChecker(master);
+  }
 
   @Test
   public void testForMeta() throws Exception {
     byte[] metaRegionNameAsBytes = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName();
-    String metaRegionName = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionNameAsString();
+    String metaRegionName = RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedName();
     List<ServerName> serverNames = master.getServerManager().getOnlineServersList();
     assertEquals(NSERVERS, serverNames.size());
 
-    Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions =
am.getProblematicRegions();
+    hbckChecker.choreForTesting();
+    Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions
=
+        hbckChecker.getInconsistentRegions();
 
     // Test for case1: Master thought this region opened, but no regionserver reported it.
-    assertTrue(problematicRegions.containsKey(metaRegionName));
-    Pair<ServerName, Set<ServerName>> pair = problematicRegions.get(metaRegionName);
+    assertTrue(inconsistentRegions.containsKey(metaRegionName));
+    Pair<ServerName, List<ServerName>> pair = inconsistentRegions.get(metaRegionName);
     ServerName locationInMeta = pair.getFirst();
-    Set<ServerName> reportedRegionServers = pair.getSecond();
+    List<ServerName> reportedRegionServers = pair.getSecond();
     assertTrue(serverNames.contains(locationInMeta));
     assertEquals(0, reportedRegionServers.size());
 
     // Reported right region location. Then not in problematic regions.
     am.reportOnlineRegions(locationInMeta, Collections.singleton(metaRegionNameAsBytes));
-    problematicRegions = am.getProblematicRegions();
-    assertFalse(problematicRegions.containsKey(metaRegionName));
+    hbckChecker.choreForTesting();
+    inconsistentRegions = hbckChecker.getInconsistentRegions();
+    assertFalse(inconsistentRegions.containsKey(metaRegionName));
   }
 
   @Test
   public void testForUserTable() throws Exception {
     TableName tableName = TableName.valueOf("testForUserTable");
     RegionInfo hri = createRegionInfo(tableName, 1);
-    String regionName = hri.getRegionNameAsString();
+    String regionName = hri.getEncodedName();
     rsDispatcher.setMockRsExecutor(new GoodRsExecutor());
     Future<byte[]> future = submitProcedure(am.createAssignProcedure(hri));
     waitOnFuture(future);
@@ -85,11 +97,13 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase
{
     assertEquals(NSERVERS, serverNames.size());
 
     // Test for case1: Master thought this region opened, but no regionserver reported it.
-    Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions =
am.getProblematicRegions();
-    assertTrue(problematicRegions.containsKey(regionName));
-    Pair<ServerName, Set<ServerName>> pair = problematicRegions.get(regionName);
+    hbckChecker.choreForTesting();
+    Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions
=
+        hbckChecker.getInconsistentRegions();
+    assertTrue(inconsistentRegions.containsKey(regionName));
+    Pair<ServerName, List<ServerName>> pair = inconsistentRegions.get(regionName);
     ServerName locationInMeta = pair.getFirst();
-    Set<ServerName> reportedRegionServers = pair.getSecond();
+    List<ServerName> reportedRegionServers = pair.getSecond();
     assertTrue(serverNames.contains(locationInMeta));
     assertEquals(0, reportedRegionServers.size());
 
@@ -99,9 +113,10 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase
{
     final ServerName anotherServer =
         serverNames.stream().filter(s -> !s.equals(tempLocationInMeta)).findFirst().get();
     am.reportOnlineRegions(anotherServer, Collections.singleton(hri.getRegionName()));
-    problematicRegions = am.getProblematicRegions();
-    assertTrue(problematicRegions.containsKey(regionName));
-    pair = problematicRegions.get(regionName);
+    hbckChecker.choreForTesting();
+    inconsistentRegions = hbckChecker.getInconsistentRegions();
+    assertTrue(inconsistentRegions.containsKey(regionName));
+    pair = inconsistentRegions.get(regionName);
     locationInMeta = pair.getFirst();
     reportedRegionServers = pair.getSecond();
     assertEquals(1, reportedRegionServers.size());
@@ -110,9 +125,10 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase
{
 
     // Test for case3: More than one regionservers reported opened this region.
     am.reportOnlineRegions(locationInMeta, Collections.singleton(hri.getRegionName()));
-    problematicRegions = am.getProblematicRegions();
-    assertTrue(problematicRegions.containsKey(regionName));
-    pair = problematicRegions.get(regionName);
+    hbckChecker.choreForTesting();
+    inconsistentRegions = hbckChecker.getInconsistentRegions();
+    assertTrue(inconsistentRegions.containsKey(regionName));
+    pair = inconsistentRegions.get(regionName);
     locationInMeta = pair.getFirst();
     reportedRegionServers = pair.getSecond();
     assertEquals(2, reportedRegionServers.size());
@@ -121,7 +137,8 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase
{
 
     // Reported right region location. Then not in problematic regions.
     am.reportOnlineRegions(anotherServer, Collections.EMPTY_SET);
-    problematicRegions = am.getProblematicRegions();
-    assertFalse(problematicRegions.containsKey(regionName));
+    hbckChecker.choreForTesting();
+    inconsistentRegions = hbckChecker.getInconsistentRegions();
+    assertFalse(inconsistentRegions.containsKey(regionName));
   }
 }
\ No newline at end of file


Mime
View raw message