From commits-return-79039-archive-asf-public=cust-asf.ponee.io@hbase.apache.org Fri Oct 12 16:53:08 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 016B618072F for ; Fri, 12 Oct 2018 16:53:03 +0200 (CEST) Received: (qmail 12207 invoked by uid 500); 12 Oct 2018 14:52:58 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 10012 invoked by uid 99); 12 Oct 2018 14:52:57 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 12 Oct 2018 14:52:57 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 286D8E09F3; Fri, 12 Oct 2018 14:52:57 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: git-site-role@apache.org To: commits@hbase.apache.org Date: Fri, 12 Oct 2018 14:53:30 -0000 Message-Id: <2ec3f1334a294a928fa05d7a69ce572d@git.apache.org> In-Reply-To: <6ee5f467033443ac8a925f8fc60e2ccd@git.apache.org> References: <6ee5f467033443ac8a925f8fc60e2ccd@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [35/51] [partial] hbase-site git commit: Published site at fa5fa6ecdd071b72b58971058ff3ab9d28c3e709. http://git-wip-us.apache.org/repos/asf/hbase-site/blob/d1341859/devapidocs/src-html/org/apache/hadoop/hbase/master/HMaster.RedirectServlet.html ---------------------------------------------------------------------- diff --git a/devapidocs/src-html/org/apache/hadoop/hbase/master/HMaster.RedirectServlet.html b/devapidocs/src-html/org/apache/hadoop/hbase/master/HMaster.RedirectServlet.html index 721035e..f808b16 100644 --- a/devapidocs/src-html/org/apache/hadoop/hbase/master/HMaster.RedirectServlet.html +++ b/devapidocs/src-html/org/apache/hadoop/hbase/master/HMaster.RedirectServlet.html @@ -169,3778 +169,3785 @@ 161import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 162import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteProcedure; 163import org.apache.hadoop.hbase.procedure2.RemoteProcedureException; -164import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; -165import org.apache.hadoop.hbase.quotas.MasterQuotaManager; -166import org.apache.hadoop.hbase.quotas.MasterQuotasObserver; -167import org.apache.hadoop.hbase.quotas.QuotaObserverChore; -168import org.apache.hadoop.hbase.quotas.QuotaUtil; -169import org.apache.hadoop.hbase.quotas.SnapshotQuotaObserverChore; -170import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifier; -171import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifierFactory; -172import org.apache.hadoop.hbase.regionserver.DefaultStoreEngine; -173import org.apache.hadoop.hbase.regionserver.HRegionServer; -174import org.apache.hadoop.hbase.regionserver.HStore; -175import org.apache.hadoop.hbase.regionserver.RSRpcServices; -176import org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost; -177import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy; -178import org.apache.hadoop.hbase.regionserver.compactions.ExploringCompactionPolicy; -179import org.apache.hadoop.hbase.regionserver.compactions.FIFOCompactionPolicy; -180import org.apache.hadoop.hbase.replication.ReplicationException; -181import org.apache.hadoop.hbase.replication.ReplicationLoadSource; -182import org.apache.hadoop.hbase.replication.ReplicationPeerConfig; -183import org.apache.hadoop.hbase.replication.ReplicationPeerDescription; -184import org.apache.hadoop.hbase.replication.ReplicationUtils; -185import org.apache.hadoop.hbase.replication.SyncReplicationState; -186import org.apache.hadoop.hbase.replication.master.ReplicationHFileCleaner; -187import org.apache.hadoop.hbase.replication.master.ReplicationLogCleaner; -188import org.apache.hadoop.hbase.replication.master.ReplicationPeerConfigUpgrader; -189import org.apache.hadoop.hbase.security.AccessDeniedException; -190import org.apache.hadoop.hbase.security.UserProvider; -191import org.apache.hadoop.hbase.trace.TraceUtil; -192import org.apache.hadoop.hbase.util.Addressing; -193import org.apache.hadoop.hbase.util.BloomFilterUtil; -194import org.apache.hadoop.hbase.util.Bytes; -195import org.apache.hadoop.hbase.util.CompressionTest; -196import org.apache.hadoop.hbase.util.EncryptionTest; -197import org.apache.hadoop.hbase.util.HBaseFsck; -198import org.apache.hadoop.hbase.util.HFileArchiveUtil; -199import org.apache.hadoop.hbase.util.HasThread; -200import org.apache.hadoop.hbase.util.IdLock; -201import org.apache.hadoop.hbase.util.ModifyRegionUtils; -202import org.apache.hadoop.hbase.util.Pair; -203import org.apache.hadoop.hbase.util.RetryCounter; -204import org.apache.hadoop.hbase.util.RetryCounterFactory; -205import org.apache.hadoop.hbase.util.Threads; -206import org.apache.hadoop.hbase.util.VersionInfo; -207import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker; -208import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; -209import org.apache.hadoop.hbase.zookeeper.MasterMaintenanceModeTracker; -210import org.apache.hadoop.hbase.zookeeper.RegionNormalizerTracker; -211import org.apache.hadoop.hbase.zookeeper.ZKClusterId; -212import org.apache.hadoop.hbase.zookeeper.ZKUtil; -213import org.apache.hadoop.hbase.zookeeper.ZKWatcher; -214import org.apache.hadoop.hbase.zookeeper.ZNodePaths; -215import org.apache.yetus.audience.InterfaceAudience; -216import org.apache.zookeeper.KeeperException; -217import org.eclipse.jetty.server.Server; -218import org.eclipse.jetty.server.ServerConnector; -219import org.eclipse.jetty.servlet.ServletHolder; -220import org.eclipse.jetty.webapp.WebAppContext; -221import org.slf4j.Logger; -222import org.slf4j.LoggerFactory; -223 -224import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; -225import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; -226import org.apache.hbase.thirdparty.com.google.common.collect.Lists; -227import org.apache.hbase.thirdparty.com.google.common.collect.Maps; -228 -229import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; -230import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState; -231import org.apache.hadoop.hbase.shaded.protobuf.generated.QuotaProtos.Quotas; -232import org.apache.hadoop.hbase.shaded.protobuf.generated.QuotaProtos.SpaceViolationPolicy; -233import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; -234import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos; -235 -236/** -237 * HMaster is the "master server" for HBase. An HBase cluster has one active -238 * master. If many masters are started, all compete. Whichever wins goes on to -239 * run the cluster. All others park themselves in their constructor until -240 * master or cluster shutdown or until the active master loses its lease in -241 * zookeeper. Thereafter, all running master jostle to take over master role. -242 * -243 * <p>The Master can be asked shutdown the cluster. See {@link #shutdown()}. In -244 * this case it will tell all regionservers to go down and then wait on them -245 * all reporting in that they are down. This master will then shut itself down. -246 * -247 * <p>You can also shutdown just this master. Call {@link #stopMaster()}. -248 * -249 * @see org.apache.zookeeper.Watcher -250 */ -251@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) -252@SuppressWarnings("deprecation") -253public class HMaster extends HRegionServer implements MasterServices { -254 private static Logger LOG = LoggerFactory.getLogger(HMaster.class); -255 -256 /** -257 * Protection against zombie master. Started once Master accepts active responsibility and -258 * starts taking over responsibilities. Allows a finite time window before giving up ownership. -259 */ -260 private static class InitializationMonitor extends HasThread { -261 /** The amount of time in milliseconds to sleep before checking initialization status. */ -262 public static final String TIMEOUT_KEY = "hbase.master.initializationmonitor.timeout"; -263 public static final long TIMEOUT_DEFAULT = TimeUnit.MILLISECONDS.convert(15, TimeUnit.MINUTES); -264 -265 /** -266 * When timeout expired and initialization has not complete, call {@link System#exit(int)} when -267 * true, do nothing otherwise. -268 */ -269 public static final String HALT_KEY = "hbase.master.initializationmonitor.haltontimeout"; -270 public static final boolean HALT_DEFAULT = false; -271 -272 private final HMaster master; -273 private final long timeout; -274 private final boolean haltOnTimeout; -275 -276 /** Creates a Thread that monitors the {@link #isInitialized()} state. */ -277 InitializationMonitor(HMaster master) { -278 super("MasterInitializationMonitor"); -279 this.master = master; -280 this.timeout = master.getConfiguration().getLong(TIMEOUT_KEY, TIMEOUT_DEFAULT); -281 this.haltOnTimeout = master.getConfiguration().getBoolean(HALT_KEY, HALT_DEFAULT); -282 this.setDaemon(true); -283 } -284 -285 @Override -286 public void run() { -287 try { -288 while (!master.isStopped() && master.isActiveMaster()) { -289 Thread.sleep(timeout); -290 if (master.isInitialized()) { -291 LOG.debug("Initialization completed within allotted tolerance. Monitor exiting."); -292 } else { -293 LOG.error("Master failed to complete initialization after " + timeout + "ms. Please" -294 + " consider submitting a bug report including a thread dump of this process."); -295 if (haltOnTimeout) { -296 LOG.error("Zombie Master exiting. Thread dump to stdout"); -297 Threads.printThreadInfo(System.out, "Zombie HMaster"); -298 System.exit(-1); -299 } -300 } -301 } -302 } catch (InterruptedException ie) { -303 LOG.trace("InitMonitor thread interrupted. Existing."); -304 } -305 } -306 } -307 -308 // MASTER is name of the webapp and the attribute name used stuffing this -309 //instance into web context. -310 public static final String MASTER = "master"; -311 -312 // Manager and zk listener for master election -313 private final ActiveMasterManager activeMasterManager; -314 // Region server tracker -315 private RegionServerTracker regionServerTracker; -316 // Draining region server tracker -317 private DrainingServerTracker drainingServerTracker; -318 // Tracker for load balancer state -319 LoadBalancerTracker loadBalancerTracker; -320 // Tracker for meta location, if any client ZK quorum specified -321 MetaLocationSyncer metaLocationSyncer; -322 // Tracker for active master location, if any client ZK quorum specified -323 MasterAddressSyncer masterAddressSyncer; -324 -325 // Tracker for split and merge state -326 private SplitOrMergeTracker splitOrMergeTracker; -327 -328 // Tracker for region normalizer state -329 private RegionNormalizerTracker regionNormalizerTracker; -330 -331 //Tracker for master maintenance mode setting -332 private MasterMaintenanceModeTracker maintenanceModeTracker; -333 -334 private ClusterSchemaService clusterSchemaService; -335 -336 public static final String HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS = -337 "hbase.master.wait.on.service.seconds"; -338 public static final int DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS = 5 * 60; -339 -340 // Metrics for the HMaster -341 final MetricsMaster metricsMaster; -342 // file system manager for the master FS operations -343 private MasterFileSystem fileSystemManager; -344 private MasterWalManager walManager; -345 -346 // server manager to deal with region server info -347 private volatile ServerManager serverManager; -348 -349 // manager of assignment nodes in zookeeper -350 private AssignmentManager assignmentManager; -351 -352 // manager of replication -353 private ReplicationPeerManager replicationPeerManager; -354 -355 private SyncReplicationReplayWALManager syncReplicationReplayWALManager; -356 -357 // buffer for "fatal error" notices from region servers -358 // in the cluster. This is only used for assisting -359 // operations/debugging. -360 MemoryBoundedLogMessageBuffer rsFatals; -361 -362 // flag set after we become the active master (used for testing) -363 private volatile boolean activeMaster = false; -364 -365 // flag set after we complete initialization once active -366 private final ProcedureEvent<?> initialized = new ProcedureEvent<>("master initialized"); -367 -368 // flag set after master services are started, -369 // initialization may have not completed yet. -370 volatile boolean serviceStarted = false; -371 -372 // Maximum time we should run balancer for -373 private final int maxBlancingTime; -374 // Maximum percent of regions in transition when balancing -375 private final double maxRitPercent; -376 -377 private final LockManager lockManager = new LockManager(this); -378 -379 private LoadBalancer balancer; -380 private RegionNormalizer normalizer; -381 private BalancerChore balancerChore; -382 private RegionNormalizerChore normalizerChore; -383 private ClusterStatusChore clusterStatusChore; -384 private ClusterStatusPublisher clusterStatusPublisherChore = null; -385 -386 CatalogJanitor catalogJanitorChore; -387 private LogCleaner logCleaner; -388 private HFileCleaner hfileCleaner; -389 private ReplicationBarrierCleaner replicationBarrierCleaner; -390 private ExpiredMobFileCleanerChore expiredMobFileCleanerChore; -391 private MobCompactionChore mobCompactChore; -392 private MasterMobCompactionThread mobCompactThread; -393 // used to synchronize the mobCompactionStates -394 private final IdLock mobCompactionLock = new IdLock(); -395 // save the information of mob compactions in tables. -396 // the key is table name, the value is the number of compactions in that table. -397 private Map<TableName, AtomicInteger> mobCompactionStates = Maps.newConcurrentMap(); -398 -399 MasterCoprocessorHost cpHost; -400 -401 private final boolean preLoadTableDescriptors; -402 -403 // Time stamps for when a hmaster became active -404 private long masterActiveTime; -405 -406 // Time stamp for when HMaster finishes becoming Active Master -407 private long masterFinishedInitializationTime; -408 -409 //should we check the compression codec type at master side, default true, HBASE-6370 -410 private final boolean masterCheckCompression; -411 -412 //should we check encryption settings at master side, default true -413 private final boolean masterCheckEncryption; -414 -415 Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap(); -416 -417 // monitor for snapshot of hbase tables -418 SnapshotManager snapshotManager; -419 // monitor for distributed procedures -420 private MasterProcedureManagerHost mpmHost; -421 -422 // it is assigned after 'initialized' guard set to true, so should be volatile -423 private volatile MasterQuotaManager quotaManager; -424 private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier; -425 private QuotaObserverChore quotaObserverChore; -426 private SnapshotQuotaObserverChore snapshotQuotaChore; -427 -428 private ProcedureExecutor<MasterProcedureEnv> procedureExecutor; -429 private WALProcedureStore procedureStore; -430 -431 // handle table states -432 private TableStateManager tableStateManager; -433 -434 private long splitPlanCount; -435 private long mergePlanCount; -436 -437 /* Handle favored nodes information */ -438 private FavoredNodesManager favoredNodesManager; -439 -440 /** jetty server for master to redirect requests to regionserver infoServer */ -441 private Server masterJettyServer; -442 -443 public static class RedirectServlet extends HttpServlet { -444 private static final long serialVersionUID = 2894774810058302473L; -445 private final int regionServerInfoPort; -446 private final String regionServerHostname; -447 -448 /** -449 * @param infoServer that we're trying to send all requests to -450 * @param hostname may be null. if given, will be used for redirects instead of host from client. -451 */ -452 public RedirectServlet(InfoServer infoServer, String hostname) { -453 regionServerInfoPort = infoServer.getPort(); -454 regionServerHostname = hostname; -455 } -456 -457 @Override -458 public void doGet(HttpServletRequest request, -459 HttpServletResponse response) throws ServletException, IOException { -460 String redirectHost = regionServerHostname; -461 if(redirectHost == null) { -462 redirectHost = request.getServerName(); -463 if(!Addressing.isLocalAddress(InetAddress.getByName(redirectHost))) { -464 LOG.warn("Couldn't resolve '" + redirectHost + "' as an address local to this node and '" + -465 MASTER_HOSTNAME_KEY + "' is not set; client will get a HTTP 400 response. If " + -466 "your HBase deployment relies on client accessible names that the region server process " + -467 "can't resolve locally, then you should set the previously mentioned configuration variable " + -468 "to an appropriate hostname."); -469 // no sending client provided input back to the client, so the goal host is just in the logs. -470 response.sendError(400, "Request was to a host that I can't resolve for any of the network interfaces on " + -471 "this node. If this is due to an intermediary such as an HTTP load balancer or other proxy, your HBase " + -472 "administrator can set '" + MASTER_HOSTNAME_KEY + "' to point to the correct hostname."); -473 return; -474 } -475 } -476 // TODO this scheme should come from looking at the scheme registered in the infoserver's http server for the -477 // host and port we're using, but it's buried way too deep to do that ATM. -478 String redirectUrl = request.getScheme() + "://" -479 + redirectHost + ":" + regionServerInfoPort -480 + request.getRequestURI(); -481 response.sendRedirect(redirectUrl); -482 } -483 } -484 -485 /** -486 * Initializes the HMaster. The steps are as follows: -487 * <p> -488 * <ol> -489 * <li>Initialize the local HRegionServer -490 * <li>Start the ActiveMasterManager. -491 * </ol> -492 * <p> -493 * Remaining steps of initialization occur in -494 * #finishActiveMasterInitialization(MonitoredTask) after -495 * the master becomes the active one. -496 */ -497 public HMaster(final Configuration conf) -498 throws IOException, KeeperException { -499 super(conf); -500 TraceUtil.initTracer(conf); -501 try { -502 this.rsFatals = new MemoryBoundedLogMessageBuffer( -503 conf.getLong("hbase.master.buffer.for.rs.fatals", 1 * 1024 * 1024)); -504 LOG.info("hbase.rootdir=" + getRootDir() + -505 ", hbase.cluster.distributed=" + this.conf.getBoolean(HConstants.CLUSTER_DISTRIBUTED, false)); -506 -507 // Disable usage of meta replicas in the master -508 this.conf.setBoolean(HConstants.USE_META_REPLICAS, false); -509 -510 decorateMasterConfiguration(this.conf); -511 -512 // Hack! Maps DFSClient => Master for logs. HDFS made this -513 // config param for task trackers, but we can piggyback off of it. -514 if (this.conf.get("mapreduce.task.attempt.id") == null) { -515 this.conf.set("mapreduce.task.attempt.id", "hb_m_" + this.serverName.toString()); -516 } -517 -518 // should we check the compression codec type at master side, default true, HBASE-6370 -519 this.masterCheckCompression = conf.getBoolean("hbase.master.check.compression", true); -520 -521 // should we check encryption settings at master side, default true -522 this.masterCheckEncryption = conf.getBoolean("hbase.master.check.encryption", true); -523 -524 this.metricsMaster = new MetricsMaster(new MetricsMasterWrapperImpl(this)); -525 -526 // preload table descriptor at startup -527 this.preLoadTableDescriptors = conf.getBoolean("hbase.master.preload.tabledescriptors", true); -528 -529 this.maxBlancingTime = getMaxBalancingTime(); -530 this.maxRitPercent = conf.getDouble(HConstants.HBASE_MASTER_BALANCER_MAX_RIT_PERCENT, -531 HConstants.DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT); -532 -533 // Do we publish the status? -534 -535 boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED, -536 HConstants.STATUS_PUBLISHED_DEFAULT); -537 Class<? extends ClusterStatusPublisher.Publisher> publisherClass = -538 conf.getClass(ClusterStatusPublisher.STATUS_PUBLISHER_CLASS, -539 ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS, -540 ClusterStatusPublisher.Publisher.class); -541 -542 if (shouldPublish) { -543 if (publisherClass == null) { -544 LOG.warn(HConstants.STATUS_PUBLISHED + " is true, but " + -545 ClusterStatusPublisher.DEFAULT_STATUS_PUBLISHER_CLASS + -546 " is not set - not publishing status"); -547 } else { -548 clusterStatusPublisherChore = new ClusterStatusPublisher(this, conf, publisherClass); -549 getChoreService().scheduleChore(clusterStatusPublisherChore); -550 } -551 } -552 -553 // Some unit tests don't need a cluster, so no zookeeper at all -554 if (!conf.getBoolean("hbase.testing.nocluster", false)) { -555 this.activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName, this); -556 } else { -557 this.activeMasterManager = null; -558 } -559 } catch (Throwable t) { -560 // Make sure we log the exception. HMaster is often started via reflection and the -561 // cause of failed startup is lost. -562 LOG.error("Failed construction of Master", t); -563 throw t; -564 } -565 } -566 -567 @Override -568 protected String getUseThisHostnameInstead(Configuration conf) { -569 return conf.get(MASTER_HOSTNAME_KEY); -570 } -571 -572 // Main run loop. Calls through to the regionserver run loop AFTER becoming active Master; will -573 // block in here until then. -574 @Override -575 public void run() { -576 try { -577 if (!conf.getBoolean("hbase.testing.nocluster", false)) { -578 Threads.setDaemonThreadRunning(new Thread(() -> { -579 try { -580 int infoPort = putUpJettyServer(); -581 startActiveMasterManager(infoPort); -582 } catch (Throwable t) { -583 // Make sure we log the exception. -584 String error = "Failed to become Active Master"; -585 LOG.error(error, t); -586 // Abort should have been called already. -587 if (!isAborted()) { -588 abort(error, t); -589 } -590 } -591 }), getName() + ":becomeActiveMaster"); -592 } -593 // Fall in here even if we have been aborted. Need to run the shutdown services and -594 // the super run call will do this for us. -595 super.run(); -596 } finally { -597 if (this.clusterSchemaService != null) { -598 // If on way out, then we are no longer active master. -599 this.clusterSchemaService.stopAsync(); -600 try { -601 this.clusterSchemaService.awaitTerminated( -602 getConfiguration().getInt(HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS, -603 DEFAULT_HBASE_MASTER_WAIT_ON_SERVICE_IN_SECONDS), TimeUnit.SECONDS); -604 } catch (TimeoutException te) { -605 LOG.warn("Failed shutdown of clusterSchemaService", te); -606 } -607 } -608 this.activeMaster = false; -609 } -610 } -611 -612 // return the actual infoPort, -1 means disable info server. -613 private int putUpJettyServer() throws IOException { -614 if (!conf.getBoolean("hbase.master.infoserver.redirect", true)) { -615 return -1; -616 } -617 final int infoPort = conf.getInt("hbase.master.info.port.orig", -618 HConstants.DEFAULT_MASTER_INFOPORT); -619 // -1 is for disabling info server, so no redirecting -620 if (infoPort < 0 || infoServer == null) { -621 return -1; -622 } -623 if(infoPort == infoServer.getPort()) { -624 return infoPort; -625 } -626 final String addr = conf.get("hbase.master.info.bindAddress", "0.0.0.0"); -627 if (!Addressing.isLocalAddress(InetAddress.getByName(addr))) { -628 String msg = -629 "Failed to start redirecting jetty server. Address " + addr -630 + " does not belong to this host. Correct configuration parameter: " -631 + "hbase.master.info.bindAddress"; -632 LOG.error(msg); -633 throw new IOException(msg); -634 } -635 -636 // TODO I'm pretty sure we could just add another binding to the InfoServer run by -637 // the RegionServer and have it run the RedirectServlet instead of standing up -638 // a second entire stack here. -639 masterJettyServer = new Server(); -640 final ServerConnector connector = new ServerConnector(masterJettyServer); -641 connector.setHost(addr); -642 connector.setPort(infoPort); -643 masterJettyServer.addConnector(connector); -644 masterJettyServer.setStopAtShutdown(true); -645 -646 final String redirectHostname = -647 StringUtils.isBlank(useThisHostnameInstead) ? null : useThisHostnameInstead; -648 -649 final RedirectServlet redirect = new RedirectServlet(infoServer, redirectHostname); -650 final WebAppContext context = new WebAppContext(null, "/", null, null, null, null, WebAppContext.NO_SESSIONS); -651 context.addServlet(new ServletHolder(redirect), "/*"); -652 context.setServer(masterJettyServer); -653 -654 try { -655 masterJettyServer.start(); -656 } catch (Exception e) { -657 throw new IOException("Failed to start redirecting jetty server", e); -658 } -659 return connector.getLocalPort(); -660 } -661 -662 @Override -663 protected Function<TableDescriptorBuilder, TableDescriptorBuilder> getMetaTableObserver() { -664 return builder -> builder.setRegionReplication(conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM)); -665 } -666 /** -667 * For compatibility, if failed with regionserver credentials, try the master one -668 */ -669 @Override -670 protected void login(UserProvider user, String host) throws IOException { -671 try { -672 super.login(user, host); -673 } catch (IOException ie) { -674 user.login("hbase.master.keytab.file", -675 "hbase.master.kerberos.principal", host); -676 } -677 } -678 -679 /** -680 * If configured to put regions on active master, -681 * wait till a backup master becomes active. -682 * Otherwise, loop till the server is stopped or aborted. -683 */ -684 @Override -685 protected void waitForMasterActive(){ -686 boolean tablesOnMaster = LoadBalancer.isTablesOnMaster(conf); -687 while (!(tablesOnMaster && activeMaster) && !isStopped() && !isAborted()) { -688 sleeper.sleep(); -689 } -690 } -691 -692 @VisibleForTesting -693 public MasterRpcServices getMasterRpcServices() { -694 return (MasterRpcServices)rpcServices; -695 } -696 -697 public boolean balanceSwitch(final boolean b) throws IOException { -698 return getMasterRpcServices().switchBalancer(b, BalanceSwitchMode.ASYNC); -699 } -700 -701 @Override -702 protected String getProcessName() { -703 return MASTER; -704 } -705 -706 @Override -707 protected boolean canCreateBaseZNode() { -708 return true; -709 } -710 -711 @Override -712 protected boolean canUpdateTableDescriptor() { -713 return true; -714 } -715 -716 @Override -717 protected RSRpcServices createRpcServices() throws IOException { -718 return new MasterRpcServices(this); -719 } -720 -721 @Override -722 protected void configureInfoServer() { -723 infoServer.addServlet("master-status", "/master-status", MasterStatusServlet.class); -724 infoServer.setAttribute(MASTER, this); -725 if (LoadBalancer.isTablesOnMaster(conf)) { -726 super.configureInfoServer(); -727 } -728 } -729 -730 @Override -731 protected Class<? extends HttpServlet> getDumpServlet() { -732 return MasterDumpServlet.class; -733 } -734 -735 @Override -736 public MetricsMaster getMasterMetrics() { -737 return metricsMaster; -738 } -739 -740 /** -741 * <p> -742 * Initialize all ZK based system trackers. But do not include {@link RegionServerTracker}, it -743 * should have already been initialized along with {@link ServerManager}. -744 * </p> -745 * <p> -746 * Will be overridden in tests. -747 * </p> -748 */ -749 @VisibleForTesting -750 protected void initializeZKBasedSystemTrackers() -751 throws IOException, InterruptedException, KeeperException, ReplicationException { -752 this.balancer = LoadBalancerFactory.getLoadBalancer(conf); -753 this.normalizer = RegionNormalizerFactory.getRegionNormalizer(conf); -754 this.normalizer.setMasterServices(this); -755 this.normalizer.setMasterRpcServices((MasterRpcServices)rpcServices); -756 this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this); -757 this.loadBalancerTracker.start(); -758 -759 this.regionNormalizerTracker = new RegionNormalizerTracker(zooKeeper, this); -760 this.regionNormalizerTracker.start(); -761 -762 this.splitOrMergeTracker = new SplitOrMergeTracker(zooKeeper, conf, this); -763 this.splitOrMergeTracker.start(); -764 -765 this.replicationPeerManager = ReplicationPeerManager.create(zooKeeper, conf); -766 this.syncReplicationReplayWALManager = new SyncReplicationReplayWALManager(this); -767 -768 this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this, this.serverManager); -769 this.drainingServerTracker.start(); -770 -771 this.maintenanceModeTracker = new MasterMaintenanceModeTracker(zooKeeper); -772 this.maintenanceModeTracker.start(); -773 -774 String clientQuorumServers = conf.get(HConstants.CLIENT_ZOOKEEPER_QUORUM); -775 boolean clientZkObserverMode = conf.getBoolean(HConstants.CLIENT_ZOOKEEPER_OBSERVER_MODE, -776 HConstants.DEFAULT_CLIENT_ZOOKEEPER_OBSERVER_MODE); -777 if (clientQuorumServers != null && !clientZkObserverMode) { -778 // we need to take care of the ZK information synchronization -779 // if given client ZK are not observer nodes -780 ZKWatcher clientZkWatcher = new ZKWatcher(conf, -781 getProcessName() + ":" + rpcServices.getSocketAddress().getPort() + "-clientZK", this, -782 false, true); -783 this.metaLocationSyncer = new MetaLocationSyncer(zooKeeper, clientZkWatcher, this); -784 this.metaLocationSyncer.start(); -785 this.masterAddressSyncer = new MasterAddressSyncer(zooKeeper, clientZkWatcher, this); -786 this.masterAddressSyncer.start(); -787 // set cluster id is a one-go effort -788 ZKClusterId.setClusterId(clientZkWatcher, fileSystemManager.getClusterId()); -789 } -790 -791 // Set the cluster as up. If new RSs, they'll be waiting on this before -792 // going ahead with their startup. -793 boolean wasUp = this.clusterStatusTracker.isClusterUp(); -794 if (!wasUp) this.clusterStatusTracker.setClusterUp(); -795 -796 LOG.info("Active/primary master=" + this.serverName + -797 ", sessionid=0x" + -798 Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()) + -799 ", setting cluster-up flag (Was=" + wasUp + ")"); -800 -801 // create/initialize the snapshot manager and other procedure managers -802 this.snapshotManager = new SnapshotManager(); -803 this.mpmHost = new MasterProcedureManagerHost(); -804 this.mpmHost.register(this.snapshotManager); -805 this.mpmHost.register(new MasterFlushTableProcedureManager()); -806 this.mpmHost.loadProcedures(conf); -807 this.mpmHost.initialize(this, this.metricsMaster); -808 } -809 -810 private static final ImmutableSet<Class<?>> UNSUPPORTED_PROCEDURES = -811 ImmutableSet.of(RecoverMetaProcedure.class, AssignProcedure.class, UnassignProcedure.class, -812 MoveRegionProcedure.class); -813 -814 /** -815 * In HBASE-20811, we have introduced a new TRSP to assign/unassign/move regions, and it is -816 * incompatible with the old AssignProcedure/UnassignProcedure/MoveRegionProcedure. So we need to -817 * make sure that there are none these procedures when upgrading. If there are, the master will -818 * quit, you need to go back to the old version to finish these procedures first before upgrading. -819 */ -820 private void checkUnsupportedProcedure( -821 Map<Class<? extends Procedure>, List<Procedure<MasterProcedureEnv>>> procsByType) -822 throws HBaseIOException { -823 // Confirm that we do not have unfinished assign/unassign related procedures. It is not easy to -824 // support both the old assign/unassign procedures and the new TransitRegionStateProcedure as -825 // there will be conflict in the code for AM. We should finish all these procedures before -826 // upgrading. -827 for (Class<?> clazz : UNSUPPORTED_PROCEDURES) { -828 List<Procedure<MasterProcedureEnv>> procs = procsByType.get(clazz); -829 if (procs != null) { -830 LOG.error( -831 "Unsupported procedure type {} found, please rollback your master to the old" + -832 " version to finish them, and then try to upgrade again. The full procedure list: {}", -833 clazz, procs); -834 throw new HBaseIOException("Unsupported procedure type " + clazz + " found"); -835 } -836 } -837 // A special check for SCP, as we do not support RecoverMetaProcedure any more so we need to -838 // make sure that no one will try to schedule it but SCP does have a state which will schedule -839 // it. -840 if (procsByType.getOrDefault(ServerCrashProcedure.class, Collections.emptyList()).stream() -841 .map(p -> (ServerCrashProcedure) p).anyMatch(ServerCrashProcedure::isInRecoverMetaState)) { -842 LOG.error("At least one ServerCrashProcedure is going to schedule a RecoverMetaProcedure," + -843 " which is not supported any more. Please rollback your master to the old version to" + -844 " finish them, and then try to upgrade again."); -845 throw new HBaseIOException("Unsupported procedure state found for ServerCrashProcedure"); -846 } -847 } -848 -849 // Will be overriden in test to inject customized AssignmentManager -850 @VisibleForTesting -851 protected AssignmentManager createAssignmentManager(MasterServices master) { -852 return new AssignmentManager(master); -853 } -854 -855 /** -856 * Finish initialization of HMaster after becoming the primary master. -857 * <p/> -858 * The startup order is a bit complicated but very important, do not change it unless you know -859 * what you are doing. -860 * <ol> -861 * <li>Initialize file system based components - file system manager, wal manager, table -862 * descriptors, etc</li> -863 * <li>Publish cluster id</li> -864 * <li>Here comes the most complicated part - initialize server manager, assignment manager and -865 * region server tracker -866 * <ol type='i'> -867 * <li>Create server manager</li> -868 * <li>Create procedure executor, load the procedures, but do not start workers. We will start it -869 * later after we finish scheduling SCPs to avoid scheduling duplicated SCPs for the same -870 * server</li> -871 * <li>Create assignment manager and start it, load the meta region state, but do not load data -872 * from meta region</li> -873 * <li>Start region server tracker, construct the online servers set and find out dead servers and -874 * schedule SCP for them. The online servers will be constructed by scanning zk, and we will also -875 * scan the wal directory to find out possible live region servers, and the differences between -876 * these two sets are the dead servers</li> -877 * </ol> -878 * </li> -879 * <li>If this is a new deploy, schedule a InitMetaProcedure to initialize meta</li> -880 * <li>Start necessary service threads - balancer, catalog janior, executor services, and also the -881 * procedure executor, etc. Notice that the balancer must be created first as assignment manager -882 * may use it when assigning regions.</li> -883 * <li>Wait for meta to be initialized if necesssary, start table state manager.</li> -884 * <li>Wait for enough region servers to check-in</li> -885 * <li>Let assignment manager load data from meta and construct region states</li> -886 * <li>Start all other things such as chore services, etc</li> -887 * </ol> -888 * <p/> -889 * Notice that now we will not schedule a special procedure to make meta online(unless the first -890 * time where meta has not been created yet), we will rely on SCP to bring meta online. -891 */ -892 private void finishActiveMasterInitialization(MonitoredTask status) throws IOException, -893 InterruptedException, KeeperException, ReplicationException { -894 Thread zombieDetector = new Thread(new InitializationMonitor(this), -895 "ActiveMasterInitializationMonitor-" + System.currentTimeMillis()); -896 zombieDetector.setDaemon(true); -897 zombieDetector.start(); -898 -899 /* -900 * We are active master now... go initialize components we need to run. -901 */ -902 status.setStatus("Initializing Master file system"); -903 -904 this.masterActiveTime = System.currentTimeMillis(); -905 // TODO: Do this using Dependency Injection, using PicoContainer, Guice or Spring. -906 // Initialize the chunkCreator -907 initializeMemStoreChunkCreator(); -908 this.fileSystemManager = new MasterFileSystem(conf); -909 this.walManager = new MasterWalManager(this); -910 -911 // enable table descriptors cache -912 this.tableDescriptors.setCacheOn(); -913 -914 // warm-up HTDs cache on master initialization -915 if (preLoadTableDescriptors) { -916 status.setStatus("Pre-loading table descriptors"); -917 this.tableDescriptors.getAll(); -918 } -919 -920 // Publish cluster ID; set it in Master too. The superclass RegionServer does this later but -921 // only after it has checked in with the Master. At least a few tests ask Master for clusterId -922 // before it has called its run method and before RegionServer has done the reportForDuty. -923 ClusterId clusterId = fileSystemManager.getClusterId(); -924 status.setStatus("Publishing Cluster ID " + clusterId + " in ZooKeeper"); -925 ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId()); -926 this.clusterId = clusterId.toString(); -927 -928 // Precaution. Put in place the old hbck1 lock file to fence out old hbase1s running their -929 // hbck1s against an hbase2 cluster; it could do damage. To skip this behavior, set -930 // hbase.write.hbck1.lock.file to false. -931 if (this.conf.getBoolean("hbase.write.hbck1.lock.file", true)) { -932 HBaseFsck.checkAndMarkRunningHbck(this.conf, -933 HBaseFsck.createLockRetryCounterFactory(this.conf).create()); -934 } -935 -936 status.setStatus("Initialize ServerManager and schedule SCP for crash servers"); -937 this.serverManager = createServerManager(this); -938 createProcedureExecutor(); -939 @SuppressWarnings("rawtypes") -940 Map<Class<? extends Procedure>, List<Procedure<MasterProcedureEnv>>> procsByType = -941 procedureExecutor.getActiveProceduresNoCopy().stream() -942 .collect(Collectors.groupingBy(p -> p.getClass())); -943 -944 checkUnsupportedProcedure(procsByType); -945 -946 // Create Assignment Manager -947 this.assignmentManager = createAssignmentManager(this); -948 this.assignmentManager.start(); -949 // TODO: TRSP can perform as the sub procedure for other procedures, so even if it is marked as -950 // completed, it could still be in the procedure list. This is a bit strange but is another -951 // story, need to verify the implementation for ProcedureExecutor and ProcedureStore. -952 List<TransitRegionStateProcedure> ritList = -953 procsByType.getOrDefault(TransitRegionStateProcedure.class, Collections.emptyList()).stream() -954 .filter(p -> !p.isFinished()).map(p -> (TransitRegionStateProcedure) p) -955 .collect(Collectors.toList()); -956 this.assignmentManager.setupRIT(ritList); -957 -958 // Start RegionServerTracker with listing of servers found with exiting SCPs -- these should -959 // be registered in the deadServers set -- and with the list of servernames out on the -960 // filesystem that COULD BE 'alive' (we'll schedule SCPs for each and let SCP figure it out). -961 // We also pass dirs that are already 'splitting'... so we can do some checks down in tracker. -962 // TODO: Generate the splitting and live Set in one pass instead of two as we currently do. -963 this.regionServerTracker = new RegionServerTracker(zooKeeper, this, this.serverManager); -964 this.regionServerTracker.start( -965 procsByType.getOrDefault(ServerCrashProcedure.class, Collections.emptyList()).stream() -966 .map(p -> (ServerCrashProcedure) p).map(p -> p.getServerName()).collect(Collectors.toSet()), -967 walManager.getLiveServersFromWALDir(), walManager.getSplittingServersFromWALDir()); -968 // This manager will be started AFTER hbase:meta is confirmed on line. -969 // hbase.mirror.table.state.to.zookeeper is so hbase1 clients can connect. They read table -970 // state from zookeeper while hbase2 reads it from hbase:meta. Disable if no hbase1 clients. -971 this.tableStateManager = -972 this.conf.getBoolean(MirroringTableStateManager.MIRROR_TABLE_STATE_TO_ZK_KEY, true) -973 ? -974 new MirroringTableStateManager(this): -975 new TableStateManager(this); -976 -977 status.setStatus("Initializing ZK system trackers"); -978 initializeZKBasedSystemTrackers(); -979 status.setStatus("Loading last flushed sequence id of regions"); -980 try { -981 this.serverManager.loadLastFlushedSequenceIds(); -982 } catch (IOException e) { -983 LOG.debug("Failed to load last flushed sequence id of regions" -984 + " from file system", e); -985 } -986 // Set ourselves as active Master now our claim has succeeded up in zk. -987 this.activeMaster = true; -988 -989 // This is for backwards compatibility -990 // See HBASE-11393 -991 status.setStatus("Update TableCFs node in ZNode"); -992 ReplicationPeerConfigUpgrader tableCFsUpdater = -993 new ReplicationPeerConfigUpgrader(zooKeeper, conf);