Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id D603C1848A for ; Fri, 5 Feb 2016 16:46:02 +0000 (UTC) Received: (qmail 51833 invoked by uid 500); 5 Feb 2016 16:46:00 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 51750 invoked by uid 500); 5 Feb 2016 16:46:00 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 49871 invoked by uid 99); 5 Feb 2016 16:45:59 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 05 Feb 2016 16:45:59 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id E0819E38D2; Fri, 5 Feb 2016 16:45:58 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: misty@apache.org To: commits@hbase.apache.org Date: Fri, 05 Feb 2016 16:46:16 -0000 Message-Id: <879b01b2ddde4ded97445b3fef35f291@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [19/51] [partial] hbase-site git commit: Published site at 6f6a8ed71fe98b83e8a8db974fc15b0d8597b174. http://git-wip-us.apache.org/repos/asf/hbase-site/blob/1b9384b2/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegionServer.html ---------------------------------------------------------------------- diff --git a/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegionServer.html b/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegionServer.html index 4078580..b9f3a92 100644 --- a/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegionServer.html +++ b/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegionServer.html @@ -1526,1902 +1526,1903 @@ 1518 .setTotalStaticIndexSizeKB(totalStaticIndexSizeKB) 1519 .setTotalStaticBloomSizeKB(totalStaticBloomSizeKB) 1520 .setReadRequestsCount(r.getReadRequestsCount()) -1521 .setWriteRequestsCount(r.getWriteRequestsCount()) -1522 .setTotalCompactingKVs(totalCompactingKVs) -1523 .setCurrentCompactedKVs(currentCompactedKVs) -1524 .setDataLocality(dataLocality) -1525 .setLastMajorCompactionTs(r.getOldestHfileTs(true)); -1526 ((HRegion)r).setCompleteSequenceId(regionLoadBldr); -1527 -1528 return regionLoadBldr.build(); -1529 } -1530 -1531 /** -1532 * @param encodedRegionName -1533 * @return An instance of RegionLoad. -1534 */ -1535 public RegionLoad createRegionLoad(final String encodedRegionName) throws IOException { -1536 Region r = onlineRegions.get(encodedRegionName); -1537 return r != null ? createRegionLoad(r, null, null) : null; -1538 } -1539 -1540 /* -1541 * Inner class that runs on a long period checking if regions need compaction. -1542 */ -1543 private static class CompactionChecker extends ScheduledChore { -1544 private final HRegionServer instance; -1545 private final int majorCompactPriority; -1546 private final static int DEFAULT_PRIORITY = Integer.MAX_VALUE; -1547 private long iteration = 0; -1548 -1549 CompactionChecker(final HRegionServer h, final int sleepTime, -1550 final Stoppable stopper) { -1551 super("CompactionChecker", stopper, sleepTime); -1552 this.instance = h; -1553 LOG.info(this.getName() + " runs every " + StringUtils.formatTime(sleepTime)); -1554 -1555 /* MajorCompactPriority is configurable. -1556 * If not set, the compaction will use default priority. -1557 */ -1558 this.majorCompactPriority = this.instance.conf. -1559 getInt("hbase.regionserver.compactionChecker.majorCompactPriority", -1560 DEFAULT_PRIORITY); -1561 } -1562 -1563 @Override -1564 protected void chore() { -1565 for (Region r : this.instance.onlineRegions.values()) { -1566 if (r == null) -1567 continue; -1568 for (Store s : r.getStores()) { -1569 try { -1570 long multiplier = s.getCompactionCheckMultiplier(); -1571 assert multiplier > 0; -1572 if (iteration % multiplier != 0) continue; -1573 if (s.needsCompaction()) { -1574 // Queue a compaction. Will recognize if major is needed. -1575 this.instance.compactSplitThread.requestSystemCompaction(r, s, getName() -1576 + " requests compaction"); -1577 } else if (s.isMajorCompaction()) { -1578 if (majorCompactPriority == DEFAULT_PRIORITY -1579 || majorCompactPriority > ((HRegion)r).getCompactPriority()) { -1580 this.instance.compactSplitThread.requestCompaction(r, s, getName() -1581 + " requests major compaction; use default priority", null); -1582 } else { -1583 this.instance.compactSplitThread.requestCompaction(r, s, getName() -1584 + " requests major compaction; use configured priority", -1585 this.majorCompactPriority, null, null); -1586 } -1587 } -1588 } catch (IOException e) { -1589 LOG.warn("Failed major compaction check on " + r, e); -1590 } -1591 } -1592 } -1593 iteration = (iteration == Long.MAX_VALUE) ? 0 : (iteration + 1); -1594 } -1595 } -1596 -1597 static class PeriodicMemstoreFlusher extends ScheduledChore { -1598 final HRegionServer server; -1599 final static int RANGE_OF_DELAY = 5 * 60 * 1000; // 5 min in milliseconds -1600 final static int MIN_DELAY_TIME = 0; // millisec -1601 public PeriodicMemstoreFlusher(int cacheFlushInterval, final HRegionServer server) { -1602 super(server.getServerName() + "-MemstoreFlusherChore", server, cacheFlushInterval); -1603 this.server = server; -1604 } -1605 -1606 @Override -1607 protected void chore() { -1608 final StringBuffer whyFlush = new StringBuffer(); -1609 for (Region r : this.server.onlineRegions.values()) { -1610 if (r == null) continue; -1611 if (((HRegion)r).shouldFlush(whyFlush)) { -1612 FlushRequester requester = server.getFlushRequester(); -1613 if (requester != null) { -1614 long randomDelay = RandomUtils.nextInt(RANGE_OF_DELAY) + MIN_DELAY_TIME; -1615 LOG.info(getName() + " requesting flush of " + -1616 r.getRegionInfo().getRegionNameAsString() + " because " + -1617 whyFlush.toString() + -1618 " after random delay " + randomDelay + "ms"); -1619 //Throttle the flushes by putting a delay. If we don't throttle, and there -1620 //is a balanced write-load on the regions in a table, we might end up -1621 //overwhelming the filesystem with too many flushes at once. -1622 requester.requestDelayedFlush(r, randomDelay, false); -1623 } -1624 } -1625 } -1626 } -1627 } -1628 -1629 /** -1630 * Report the status of the server. A server is online once all the startup is -1631 * completed (setting up filesystem, starting service threads, etc.). This -1632 * method is designed mostly to be useful in tests. -1633 * -1634 * @return true if online, false if not. -1635 */ -1636 public boolean isOnline() { -1637 return online.get(); -1638 } -1639 -1640 /** -1641 * Setup WAL log and replication if enabled. -1642 * Replication setup is done in here because it wants to be hooked up to WAL. -1643 * @return A WAL instance. -1644 * @throws IOException -1645 */ -1646 private WALFactory setupWALAndReplication() throws IOException { -1647 // TODO Replication make assumptions here based on the default filesystem impl -1648 final Path oldLogDir = new Path(rootDir, HConstants.HREGION_OLDLOGDIR_NAME); -1649 final String logName = DefaultWALProvider.getWALDirectoryName(this.serverName.toString()); -1650 -1651 Path logdir = new Path(rootDir, logName); -1652 if (LOG.isDebugEnabled()) LOG.debug("logdir=" + logdir); -1653 if (this.fs.exists(logdir)) { -1654 throw new RegionServerRunningException("Region server has already " + -1655 "created directory at " + this.serverName.toString()); -1656 } -1657 -1658 // Instantiate replication manager if replication enabled. Pass it the -1659 // log directories. -1660 createNewReplicationInstance(conf, this, this.fs, logdir, oldLogDir); -1661 -1662 // listeners the wal factory will add to wals it creates. -1663 final List<WALActionsListener> listeners = new ArrayList<WALActionsListener>(); -1664 listeners.add(new MetricsWAL()); -1665 if (this.replicationSourceHandler != null && -1666 this.replicationSourceHandler.getWALActionsListener() != null) { -1667 // Replication handler is an implementation of WALActionsListener. -1668 listeners.add(this.replicationSourceHandler.getWALActionsListener()); -1669 } -1670 -1671 return new WALFactory(conf, listeners, serverName.toString()); -1672 } -1673 -1674 /** -1675 * We initialize the roller for the wal that handles meta lazily -1676 * since we don't know if this regionserver will handle it. All calls to -1677 * this method return a reference to the that same roller. As newly referenced -1678 * meta regions are brought online, they will be offered to the roller for maintenance. -1679 * As a part of that registration process, the roller will add itself as a -1680 * listener on the wal. -1681 */ -1682 protected LogRoller ensureMetaWALRoller() { -1683 // Using a tmp log roller to ensure metaLogRoller is alive once it is not -1684 // null -1685 LogRoller roller = metawalRoller.get(); -1686 if (null == roller) { -1687 LogRoller tmpLogRoller = new LogRoller(this, this); -1688 String n = Thread.currentThread().getName(); -1689 Threads.setDaemonThreadRunning(tmpLogRoller.getThread(), -1690 n + "-MetaLogRoller", uncaughtExceptionHandler); -1691 if (metawalRoller.compareAndSet(null, tmpLogRoller)) { -1692 roller = tmpLogRoller; -1693 } else { -1694 // Another thread won starting the roller -1695 Threads.shutdown(tmpLogRoller.getThread()); -1696 roller = metawalRoller.get(); -1697 } -1698 } -1699 return roller; -1700 } -1701 -1702 public MetricsRegionServer getRegionServerMetrics() { -1703 return this.metricsRegionServer; -1704 } -1705 -1706 /** -1707 * @return Master address tracker instance. -1708 */ -1709 public MasterAddressTracker getMasterAddressTracker() { -1710 return this.masterAddressTracker; -1711 } -1712 -1713 /* -1714 * Start maintenance Threads, Server, Worker and lease checker threads. -1715 * Install an UncaughtExceptionHandler that calls abort of RegionServer if we -1716 * get an unhandled exception. We cannot set the handler on all threads. -1717 * Server's internal Listener thread is off limits. For Server, if an OOME, it -1718 * waits a while then retries. Meantime, a flush or a compaction that tries to -1719 * run should trigger same critical condition and the shutdown will run. On -1720 * its way out, this server will shut down Server. Leases are sort of -1721 * inbetween. It has an internal thread that while it inherits from Chore, it -1722 * keeps its own internal stop mechanism so needs to be stopped by this -1723 * hosting server. Worker logs the exception and exits. -1724 */ -1725 private void startServiceThreads() throws IOException { -1726 // Start executor services -1727 this.service.startExecutorService(ExecutorType.RS_OPEN_REGION, -1728 conf.getInt("hbase.regionserver.executor.openregion.threads", 3)); -1729 this.service.startExecutorService(ExecutorType.RS_OPEN_META, -1730 conf.getInt("hbase.regionserver.executor.openmeta.threads", 1)); -1731 this.service.startExecutorService(ExecutorType.RS_CLOSE_REGION, -1732 conf.getInt("hbase.regionserver.executor.closeregion.threads", 3)); -1733 this.service.startExecutorService(ExecutorType.RS_CLOSE_META, -1734 conf.getInt("hbase.regionserver.executor.closemeta.threads", 1)); -1735 if (conf.getBoolean(StoreScanner.STORESCANNER_PARALLEL_SEEK_ENABLE, false)) { -1736 this.service.startExecutorService(ExecutorType.RS_PARALLEL_SEEK, -1737 conf.getInt("hbase.storescanner.parallel.seek.threads", 10)); -1738 } -1739 this.service.startExecutorService(ExecutorType.RS_LOG_REPLAY_OPS, conf.getInt( -1740 "hbase.regionserver.wal.max.splitters", SplitLogWorkerCoordination.DEFAULT_MAX_SPLITTERS)); -1741 // Start the threads for compacted files discharger -1742 this.service.startExecutorService(ExecutorType.RS_COMPACTED_FILES_DISCHARGER, -1743 conf.getInt(CompactionConfiguration.HBASE_HFILE_COMPACTION_DISCHARGER_THREAD_COUNT, 10)); -1744 if (ServerRegionReplicaUtil.isRegionReplicaWaitForPrimaryFlushEnabled(conf)) { -1745 this.service.startExecutorService(ExecutorType.RS_REGION_REPLICA_FLUSH_OPS, -1746 conf.getInt("hbase.regionserver.region.replica.flusher.threads", -1747 conf.getInt("hbase.regionserver.executor.openregion.threads", 3))); -1748 } -1749 -1750 Threads.setDaemonThreadRunning(this.walRoller.getThread(), getName() + ".logRoller", -1751 uncaughtExceptionHandler); -1752 this.cacheFlusher.start(uncaughtExceptionHandler); -1753 -1754 if (this.compactionChecker != null) choreService.scheduleChore(compactionChecker); -1755 if (this.periodicFlusher != null) choreService.scheduleChore(periodicFlusher); -1756 if (this.healthCheckChore != null) choreService.scheduleChore(healthCheckChore); -1757 if (this.nonceManagerChore != null) choreService.scheduleChore(nonceManagerChore); -1758 if (this.storefileRefresher != null) choreService.scheduleChore(storefileRefresher); -1759 if (this.movedRegionsCleaner != null) choreService.scheduleChore(movedRegionsCleaner); -1760 -1761 // Leases is not a Thread. Internally it runs a daemon thread. If it gets -1762 // an unhandled exception, it will just exit. -1763 Threads.setDaemonThreadRunning(this.leases.getThread(), getName() + ".leaseChecker", -1764 uncaughtExceptionHandler); -1765 -1766 if (this.replicationSourceHandler == this.replicationSinkHandler && -1767 this.replicationSourceHandler != null) { -1768 this.replicationSourceHandler.startReplicationService(); -1769 } else { -1770 if (this.replicationSourceHandler != null) { -1771 this.replicationSourceHandler.startReplicationService(); -1772 } -1773 if (this.replicationSinkHandler != null) { -1774 this.replicationSinkHandler.startReplicationService(); -1775 } -1776 } -1777 -1778 // Create the log splitting worker and start it -1779 // set a smaller retries to fast fail otherwise splitlogworker could be blocked for -1780 // quite a while inside HConnection layer. The worker won't be available for other -1781 // tasks even after current task is preempted after a split task times out. -1782 Configuration sinkConf = HBaseConfiguration.create(conf); -1783 sinkConf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, -1784 conf.getInt("hbase.log.replay.retries.number", 8)); // 8 retries take about 23 seconds -1785 sinkConf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, -1786 conf.getInt("hbase.log.replay.rpc.timeout", 30000)); // default 30 seconds -1787 sinkConf.setInt("hbase.client.serverside.retries.multiplier", 1); -1788 this.splitLogWorker = new SplitLogWorker(this, sinkConf, this, this, walFactory); -1789 splitLogWorker.start(); -1790 } -1791 -1792 /** -1793 * Puts up the webui. -1794 * @return Returns final port -- maybe different from what we started with. -1795 * @throws IOException -1796 */ -1797 private int putUpWebUI() throws IOException { -1798 int port = this.conf.getInt(HConstants.REGIONSERVER_INFO_PORT, -1799 HConstants.DEFAULT_REGIONSERVER_INFOPORT); -1800 String addr = this.conf.get("hbase.regionserver.info.bindAddress", "0.0.0.0"); -1801 -1802 if(this instanceof HMaster) { -1803 port = conf.getInt(HConstants.MASTER_INFO_PORT, -1804 HConstants.DEFAULT_MASTER_INFOPORT); -1805 addr = this.conf.get("hbase.master.info.bindAddress", "0.0.0.0"); -1806 } -1807 // -1 is for disabling info server -1808 if (port < 0) return port; -1809 -1810 if (!Addressing.isLocalAddress(InetAddress.getByName(addr))) { -1811 String msg = -1812 "Failed to start http info server. Address " + addr -1813 + " does not belong to this host. Correct configuration parameter: " -1814 + "hbase.regionserver.info.bindAddress"; -1815 LOG.error(msg); -1816 throw new IOException(msg); -1817 } -1818 // check if auto port bind enabled -1819 boolean auto = this.conf.getBoolean(HConstants.REGIONSERVER_INFO_PORT_AUTO, -1820 false); -1821 while (true) { -1822 try { -1823 this.infoServer = new InfoServer(getProcessName(), addr, port, false, this.conf); -1824 infoServer.addServlet("dump", "/dump", getDumpServlet()); -1825 configureInfoServer(); -1826 this.infoServer.start(); -1827 break; -1828 } catch (BindException e) { -1829 if (!auto) { -1830 // auto bind disabled throw BindException -1831 LOG.error("Failed binding http info server to port: " + port); -1832 throw e; -1833 } -1834 // auto bind enabled, try to use another port -1835 LOG.info("Failed binding http info server to port: " + port); -1836 port++; -1837 } -1838 } -1839 port = this.infoServer.getPort(); -1840 conf.setInt(HConstants.REGIONSERVER_INFO_PORT, port); -1841 int masterInfoPort = conf.getInt(HConstants.MASTER_INFO_PORT, -1842 HConstants.DEFAULT_MASTER_INFOPORT); -1843 conf.setInt("hbase.master.info.port.orig", masterInfoPort); -1844 conf.setInt(HConstants.MASTER_INFO_PORT, port); -1845 return port; -1846 } -1847 -1848 /* -1849 * Verify that server is healthy -1850 */ -1851 private boolean isHealthy() { -1852 if (!fsOk) { -1853 // File system problem -1854 return false; -1855 } -1856 // Verify that all threads are alive -1857 if (!(leases.isAlive() -1858 && cacheFlusher.isAlive() && walRoller.isAlive() -1859 && this.compactionChecker.isScheduled() -1860 && this.periodicFlusher.isScheduled())) { -1861 stop("One or more threads are no longer alive -- stop"); -1862 return false; -1863 } -1864 final LogRoller metawalRoller = this.metawalRoller.get(); -1865 if (metawalRoller != null && !metawalRoller.isAlive()) { -1866 stop("Meta WAL roller thread is no longer alive -- stop"); -1867 return false; -1868 } -1869 return true; -1870 } -1871 -1872 private static final byte[] UNSPECIFIED_REGION = new byte[]{}; -1873 -1874 @Override -1875 public WAL getWAL(HRegionInfo regionInfo) throws IOException { -1876 WAL wal; -1877 LogRoller roller = walRoller; -1878 //_ROOT_ and hbase:meta regions have separate WAL. -1879 if (regionInfo != null && regionInfo.isMetaTable() && -1880 regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) { -1881 roller = ensureMetaWALRoller(); -1882 wal = walFactory.getMetaWAL(regionInfo.getEncodedNameAsBytes()); -1883 } else if (regionInfo == null) { -1884 wal = walFactory.getWAL(UNSPECIFIED_REGION, null); -1885 } else { -1886 byte[] namespace = regionInfo.getTable().getNamespace(); -1887 wal = walFactory.getWAL(regionInfo.getEncodedNameAsBytes(), namespace); -1888 } -1889 roller.addWAL(wal); -1890 return wal; -1891 } -1892 -1893 @Override -1894 public Connection getConnection() { -1895 return getClusterConnection(); -1896 } -1897 -1898 @Override -1899 public ClusterConnection getClusterConnection() { -1900 return this.clusterConnection; -1901 } -1902 -1903 @Override -1904 public MetaTableLocator getMetaTableLocator() { -1905 return this.metaTableLocator; -1906 } -1907 -1908 @Override -1909 public void stop(final String msg) { -1910 if (!this.stopped) { -1911 try { -1912 if (this.rsHost != null) { -1913 this.rsHost.preStop(msg); -1914 } -1915 this.stopped = true; -1916 LOG.info("STOPPED: " + msg); -1917 // Wakes run() if it is sleeping -1918 sleeper.skipSleepCycle(); -1919 } catch (IOException exp) { -1920 LOG.warn("The region server did not stop", exp); -1921 } -1922 } -1923 } -1924 -1925 public void waitForServerOnline(){ -1926 while (!isStopped() && !isOnline()) { -1927 synchronized (online) { -1928 try { -1929 online.wait(msgInterval); -1930 } catch (InterruptedException ie) { -1931 Thread.currentThread().interrupt(); -1932 break; -1933 } -1934 } -1935 } -1936 } -1937 -1938 @Override -1939 public void postOpenDeployTasks(final Region r) throws KeeperException, IOException { -1940 postOpenDeployTasks(new PostOpenDeployContext(r, -1)); -1941 } -1942 -1943 @Override -1944 public void postOpenDeployTasks(final PostOpenDeployContext context) -1945 throws KeeperException, IOException { -1946 Region r = context.getRegion(); -1947 long masterSystemTime = context.getMasterSystemTime(); -1948 Preconditions.checkArgument(r instanceof HRegion, "r must be an HRegion"); -1949 rpcServices.checkOpen(); -1950 LOG.info("Post open deploy tasks for " + r.getRegionInfo().getRegionNameAsString()); -1951 // Do checks to see if we need to compact (references or too many files) -1952 for (Store s : r.getStores()) { -1953 if (s.hasReferences() || s.needsCompaction()) { -1954 this.compactSplitThread.requestSystemCompaction(r, s, "Opening Region"); -1955 } -1956 } -1957 long openSeqNum = r.getOpenSeqNum(); -1958 if (openSeqNum == HConstants.NO_SEQNUM) { -1959 // If we opened a region, we should have read some sequence number from it. -1960 LOG.error("No sequence number found when opening " + -1961 r.getRegionInfo().getRegionNameAsString()); -1962 openSeqNum = 0; -1963 } -1964 -1965 // Update flushed sequence id of a recovering region in ZK -1966 updateRecoveringRegionLastFlushedSequenceId(r); -1967 -1968 // Notify master -1969 if (!reportRegionStateTransition(new RegionStateTransitionContext( -1970 TransitionCode.OPENED, openSeqNum, masterSystemTime, r.getRegionInfo()))) { -1971 throw new IOException("Failed to report opened region to master: " -1972 + r.getRegionInfo().getRegionNameAsString()); -1973 } -1974 -1975 triggerFlushInPrimaryRegion((HRegion)r); -1976 -1977 LOG.debug("Finished post open deploy task for " + r.getRegionInfo().getRegionNameAsString()); -1978 } -1979 -1980 @Override -1981 public boolean reportRegionStateTransition(TransitionCode code, HRegionInfo... hris) { -1982 return reportRegionStateTransition(code, HConstants.NO_SEQNUM, hris); -1983 } -1984 -1985 @Override -1986 public boolean reportRegionStateTransition( -1987 TransitionCode code, long openSeqNum, HRegionInfo... hris) { -1988 return reportRegionStateTransition( -1989 new RegionStateTransitionContext(code, HConstants.NO_SEQNUM, -1, hris)); -1990 } -1991 -1992 @Override -1993 public boolean reportRegionStateTransition(final RegionStateTransitionContext context) { -1994 TransitionCode code = context.getCode(); -1995 long openSeqNum = context.getOpenSeqNum(); -1996 long masterSystemTime = context.getMasterSystemTime(); -1997 HRegionInfo[] hris = context.getHris(); -1998 -1999 if (TEST_SKIP_REPORTING_TRANSITION) { -2000 // This is for testing only in case there is no master -2001 // to handle the region transition report at all. -2002 if (code == TransitionCode.OPENED) { -2003 Preconditions.checkArgument(hris != null && hris.length == 1); -2004 if (hris[0].isMetaRegion()) { -2005 try { -2006 MetaTableLocator.setMetaLocation(getZooKeeper(), serverName, -2007 hris[0].getReplicaId(),State.OPEN); -2008 } catch (KeeperException e) { -2009 LOG.info("Failed to update meta location", e); -2010 return false; -2011 } -2012 } else { -2013 try { -2014 MetaTableAccessor.updateRegionLocation(clusterConnection, -2015 hris[0], serverName, openSeqNum, masterSystemTime); -2016 } catch (IOException e) { -2017 LOG.info("Failed to update meta", e); -2018 return false; -2019 } -2020 } -2021 } -2022 return true; -2023 } -2024 -2025 ReportRegionStateTransitionRequest.Builder builder = -2026 ReportRegionStateTransitionRequest.newBuilder(); -2027 builder.setServer(ProtobufUtil.toServerName(serverName)); -2028 RegionStateTransition.Builder transition = builder.addTransitionBuilder(); -2029 transition.setTransitionCode(code); -2030 if (code == TransitionCode.OPENED && openSeqNum >= 0) { -2031 transition.setOpenSeqNum(openSeqNum); -2032 } -2033 for (HRegionInfo hri: hris) { -2034 transition.addRegionInfo(HRegionInfo.convert(hri)); -2035 } -2036 ReportRegionStateTransitionRequest request = builder.build(); -2037 while (keepLooping()) { -2038 RegionServerStatusService.BlockingInterface rss = rssStub; -2039 try { -2040 if (rss == null) { -2041 createRegionServerStatusStub(); -2042 continue; -2043 } -2044 ReportRegionStateTransitionResponse response = -2045 rss.reportRegionStateTransition(null, request); -2046 if (response.hasErrorMessage()) { -2047 LOG.info("Failed to transition " + hris[0] -2048 + " to " + code + ": " + response.getErrorMessage()); -2049 return false; -2050 } -2051 return true; -2052 } catch (ServiceException se) { -2053 IOException ioe = ProtobufUtil.getRemoteException(se); -2054 LOG.info("Failed to report region transition, will retry", ioe); -2055 if (rssStub == rss) { -2056 rssStub = null; -2057 } -2058 } -2059 } -2060 return false; -2061 } -2062 -2063 /** -2064 * Trigger a flush in the primary region replica if this region is a secondary replica. Does not -2065 * block this thread. See RegionReplicaFlushHandler for details. -2066 */ -2067 void triggerFlushInPrimaryRegion(final HRegion region) { -2068 if (ServerRegionReplicaUtil.isDefaultReplica(region.getRegionInfo())) { -2069 return; -2070 } -2071 if (!ServerRegionReplicaUtil.isRegionReplicaReplicationEnabled(region.conf) || -2072 !ServerRegionReplicaUtil.isRegionReplicaWaitForPrimaryFlushEnabled( -2073 region.conf)) { -2074 region.setReadsEnabled(true); -2075 return; -2076 } -2077 -2078 region.setReadsEnabled(false); // disable reads before marking the region as opened. -2079 // RegionReplicaFlushHandler might reset this. -2080 -2081 // submit it to be handled by one of the handlers so that we do not block OpenRegionHandler -2082 this.service.submit( -2083 new RegionReplicaFlushHandler(this, clusterConnection, -2084 rpcRetryingCallerFactory, rpcControllerFactory, operationTimeout, region)); -2085 } -2086 -2087 @Override -2088 public RpcServerInterface getRpcServer() { -2089 return rpcServices.rpcServer; -2090 } -2091 -2092 @VisibleForTesting -2093 public RSRpcServices getRSRpcServices() { -2094 return rpcServices; -2095 } -2096 -2097 /** -2098 * Cause the server to exit without closing the regions it is serving, the log -2099 * it is using and without notifying the master. Used unit testing and on -2100 * catastrophic events such as HDFS is yanked out from under hbase or we OOME. -2101 * -2102 * @param reason -2103 * the reason we are aborting -2104 * @param cause -2105 * the exception that caused the abort, or null -2106 */ -2107 @Override -2108 public void abort(String reason, Throwable cause) { -2109 String msg = "ABORTING region server " + this + ": " + reason; -2110 if (cause != null) { -2111 LOG.fatal(msg, cause); -2112 } else { -2113 LOG.fatal(msg); -2114 } -2115 this.abortRequested = true; -2116 // HBASE-4014: show list of coprocessors that were loaded to help debug -2117 // regionserver crashes.Note that we're implicitly using -2118 // java.util.HashSet's toString() method to print the coprocessor names. -2119 LOG.fatal("RegionServer abort: loaded coprocessors are: " + -2120 CoprocessorHost.getLoadedCoprocessors()); -2121 // Try and dump metrics if abort -- might give clue as to how fatal came about.... -2122 try { -2123 LOG.info("Dump of metrics as JSON on abort: " + JSONBean.dumpRegionServerMetrics()); -2124 } catch (MalformedObjectNameException | IOException e) { -2125 LOG.warn("Failed dumping metrics", e); -2126 } -2127 -2128 // Do our best to report our abort to the master, but this may not work -2129 try { -2130 if (cause != null) { -2131 msg += "\nCause:\n" + StringUtils.stringifyException(cause); -2132 } -2133 // Report to the master but only if we have already registered with the master. -2134 if (rssStub != null && this.serverName != null) { -2135 ReportRSFatalErrorRequest.Builder builder = -2136 ReportRSFatalErrorRequest.newBuilder(); -2137 ServerName sn = -2138 ServerName.parseVersionedServerName(this.serverName.getVersionedBytes()); -2139 builder.setServer(ProtobufUtil.toServerName(sn)); -2140 builder.setErrorMessage(msg); -2141 rssStub.reportRSFatalError(null, builder.build()); -2142 } -2143 } catch (Throwable t) { -2144 LOG.warn("Unable to report fatal error to master", t); -2145 } -2146 stop(reason); -2147 } -2148 -2149 /** -2150 * @see HRegionServer#abort(String, Throwable) -2151 */ -2152 public void abort(String reason) { -2153 abort(reason, null); -2154 } -2155 -2156 @Override -2157 public boolean isAborted() { -2158 return this.abortRequested; -2159 } -2160 -2161 /* -2162 * Simulate a kill -9 of this server. Exits w/o closing regions or cleaninup -2163 * logs but it does close socket in case want to bring up server on old -2164 * hostname+port immediately. -2165 */ -2166 protected void kill() { -2167 this.killed = true; -2168 abort("Simulated kill"); -2169 } -2170 -2171 /** -2172 * Called on stop/abort before closing the cluster connection and meta locator. -2173 */ -2174 protected void sendShutdownInterrupt() { -2175 } -2176 -2177 /** -2178 * Wait on all threads to finish. Presumption is that all closes and stops -2179 * have already been called. -2180 */ -2181 protected void stopServiceThreads() { -2182 // clean up the scheduled chores -2183 if (this.choreService != null) choreService.shutdown(); -2184 if (this.nonceManagerChore != null) nonceManagerChore.cancel(true); -2185 if (this.compactionChecker != null) compactionChecker.cancel(true); -2186 if (this.periodicFlusher != null) periodicFlusher.cancel(true); -2187 if (this.healthCheckChore != null) healthCheckChore.cancel(true); -2188 if (this.storefileRefresher != null) storefileRefresher.cancel(true); -2189 if (this.movedRegionsCleaner != null) movedRegionsCleaner.cancel(true); -2190 -2191 if (this.cacheFlusher != null) { -2192 this.cacheFlusher.join(); -2193 } -2194 -2195 if (this.spanReceiverHost != null) { -2196 this.spanReceiverHost.closeReceivers(); -2197 } -2198 if (this.walRoller != null) { -2199 Threads.shutdown(this.walRoller.getThread()); -2200 } -2201 final LogRoller metawalRoller = this.metawalRoller.get(); -2202 if (metawalRoller != null) { -2203 Threads.shutdown(metawalRoller.getThread()); -2204 } -2205 if (this.compactSplitThread != null) { -2206 this.compactSplitThread.join(); -2207 } -2208 if (this.service != null) this.service.shutdown(); -2209 if (this.replicationSourceHandler != null && -2210 this.replicationSourceHandler == this.replicationSinkHandler) { -2211 this.replicationSourceHandler.stopReplicationService(); -2212 } else { -2213 if (this.replicationSourceHandler != null) { -2214 this.replicationSourceHandler.stopReplicationService(); -2215 } -2216 if (this.replicationSinkHandler != null) { -2217 this.replicationSinkHandler.stopReplicationService(); -2218 } -2219 } -2220 } -2221 -2222 /** -2223 * @return Return the object that implements the replication -2224 * source service. -2225 */ -2226 ReplicationSourceService getReplicationSourceService() { -2227 return replicationSourceHandler; -2228 } -2229 -2230 /** -2231 * @return Return the object that implements the replication -2232 * sink service. -2233 */ -2234 ReplicationSinkService getReplicationSinkService() { -2235 return replicationSinkHandler; -2236 } -2237 -2238 /** -2239 * Get the current master from ZooKeeper and open the RPC connection to it. -2240 * To get a fresh connection, the current rssStub must be null. -2241 * Method will block until a master is available. You can break from this -2242 * block by requesting the server stop. -2243 * -2244 * @return master + port, or null if server has been stopped -2245 */ -2246 @VisibleForTesting -2247 protected synchronized ServerName createRegionServerStatusStub() { -2248 if (rssStub != null) { -2249 return masterAddressTracker.getMasterAddress(); -2250 } -2251 ServerName sn = null; -2252 long previousLogTime = 0; -2253 boolean refresh = false; // for the first time, use cached data -2254 RegionServerStatusService.BlockingInterface intf = null; -2255 boolean interrupted = false; -2256 try { -2257 while (keepLooping()) { -2258 sn = this.masterAddressTracker.getMasterAddress(refresh); -2259 if (sn == null) { -2260 if (!keepLooping()) { -2261 // give up with no connection. -2262 LOG.debug("No master found and cluster is stopped; bailing out"); -2263 return null; -2264 } -2265 if (System.currentTimeMillis() > (previousLogTime + 1000)) { -2266 LOG.debug("No master found; retry"); -2267 previousLogTime = System.currentTimeMillis(); -2268 } -2269 refresh = true; // let's try pull it from ZK directly -2270 if (sleep(200)) { -2271 interrupted = true; -2272 } -2273 continue; -2274 } -2275 -2276 // If we are on the active master, use the shortcut -2277 if (this instanceof HMaster && sn.equals(getServerName())) { -2278 intf = ((HMaster)this).getMasterRpcServices(); -2279 break; -2280 } -2281 try { -2282 BlockingRpcChannel channel = -2283 this.rpcClient.createBlockingRpcChannel(sn, userProvider.getCurrent(), -2284 shortOperationTimeout); -2285 intf = RegionServerStatusService.newBlockingStub(channel); -2286 break; -2287 } catch (IOException e) { -2288 if (System.currentTimeMillis() > (previousLogTime + 1000)) { -2289 e = e instanceof RemoteException ? -2290 ((RemoteException)e).unwrapRemoteException() : e; -2291 if (e instanceof ServerNotRunningYetException) { -2292 LOG.info("Master isn't available yet, retrying"); -2293 } else { -2294 LOG.warn("Unable to connect to master. Retrying. Error was:", e); -2295 } -2296 previousLogTime = System.currentTimeMillis(); -2297 } -2298 if (sleep(200)) { -2299 interrupted = true; -2300 } -2301 } -2302 } -2303 } finally { -2304 if (interrupted) { -2305 Thread.currentThread().interrupt(); -2306 } -2307 } -2308 rssStub = intf; -2309 return sn; -2310 } -2311 -2312 /** -2313 * @return True if we should break loop because cluster is going down or -2314 * this server has been stopped or hdfs has gone bad. -2315 */ -2316 private boolean keepLooping() { -2317 return !this.stopped && isClusterUp(); -2318 } -2319 -2320 /* -2321 * Let the master know we're here Run initialization using parameters passed -2322 * us by the master. -2323 * @return A Map of key/value configurations we got from the Master else -2324 * null if we failed to register. -2325 * @throws IOException -2326 */ -2327 private RegionServerStartupResponse reportForDuty() throws IOException { -2328 ServerName masterServerName = createRegionServerStatusStub(); -2329 if (masterServerName == null) return null; -2330 RegionServerStartupResponse result = null; -2331 try { -2332 rpcServices.requestCount.set(0); -2333 LOG.info("reportForDuty to master=" + masterServerName + " with port=" -2334 + rpcServices.isa.getPort() + ", startcode=" + this.startcode); -2335 long now = EnvironmentEdgeManager.currentTime(); -2336 int port = rpcServices.isa.getPort(); -2337 RegionServerStartupRequest.Builder request = RegionServerStartupRequest.newBuilder(); -2338 if (shouldUseThisHostnameInstead()) { -2339 request.setUseThisHostnameInstead(useThisHostnameInstead); -2340 } -2341 request.setPort(port); -2342 request.setServerStartCode(this.startcode); -2343 request.setServerCurrentTime(now); -2344 result = this.rssStub.regionServerStartup(null, request.build()); -2345 } catch (ServiceException se) { -2346 IOException ioe = ProtobufUtil.getRemoteException(se); -2347 if (ioe instanceof ClockOutOfSyncException) { -2348 LOG.fatal("Master rejected startup because clock is out of sync", ioe); -2349 // Re-throw IOE will cause RS to abort -2350 throw ioe; -2351 } else if (ioe instanceof ServerNotRunningYetException) { -2352 LOG.debug("Master is not running yet"); -2353 } else { -2354 LOG.warn("error telling master we are up", se); -2355 } -2356 rssStub = null; -2357 } -2358 return result; -2359 } -2360 -2361 @Override -2362 public RegionStoreSequenceIds getLastSequenceId(byte[] encodedRegionName) { -2363 try { -2364 GetLastFlushedSequenceIdRequest req = -2365 RequestConverter.buildGetLastFlushedSequenceIdRequest(encodedRegionName); -2366 RegionServerStatusService.BlockingInterface rss = rssStub; -2367 if (rss == null) { // Try to connect one more time -2368 createRegionServerStatusStub(); -2369 rss = rssStub; -2370 if (rss == null) { -2371 // Still no luck, we tried -2372 LOG.warn("Unable to connect to the master to check " + "the last flushed sequence id"); -2373 return RegionStoreSequenceIds.newBuilder().setLastFlushedSequenceId(HConstants.NO_SEQNUM) -2374 .build(); -2375 } -2376 } -2377 GetLastFlushedSequenceIdResponse resp = rss.getLastFlushedSequenceId(null, req); -2378 return RegionStoreSequenceIds.newBuilder() -2379 .setLastFlushedSequenceId(resp.getLastFlushedSequenceId()) -2380 .addAllStoreSequenceId(resp.getStoreLastFlushedSequenceIdList()).build(); -2381 } catch (ServiceException e) { -2382 LOG.warn("Unable to connect to the master to check the last flushed sequence id", e); -2383 return RegionStoreSequenceIds.newBuilder().setLastFlushedSequenceId(HConstants.NO_SEQNUM) -2384 .build(); -2385 } -2386 } -2387 -2388 /** -2389 * Closes all regions. Called on our way out. -2390 * Assumes that