Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 1283218A6F for ; Tue, 1 Dec 2015 21:20:20 +0000 (UTC) Received: (qmail 64345 invoked by uid 500); 1 Dec 2015 21:20:18 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 63805 invoked by uid 500); 1 Dec 2015 21:20:18 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 63224 invoked by uid 99); 1 Dec 2015 21:20:17 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 01 Dec 2015 21:20:17 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 80D75E0E06; Tue, 1 Dec 2015 21:20:17 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: misty@apache.org To: commits@hbase.apache.org Date: Tue, 01 Dec 2015 21:20:23 -0000 Message-Id: <3c6c78d3dd4f460ea5d4ce6e733397d7@git.apache.org> In-Reply-To: <7a2dcb5c2f344e96854cc0c9f6543f34@git.apache.org> References: <7a2dcb5c2f344e96854cc0c9f6543f34@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [07/51] [partial] hbase git commit: Published site at 7979ac46cce36f21033f8ed03c8d0dd5fddde005. http://git-wip-us.apache.org/repos/asf/hbase/blob/c0e3c35a/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegionServer.PeriodicMemstoreFlusher.html ---------------------------------------------------------------------- diff --git a/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegionServer.PeriodicMemstoreFlusher.html b/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegionServer.PeriodicMemstoreFlusher.html index fdc4809..bb57874 100644 --- a/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegionServer.PeriodicMemstoreFlusher.html +++ b/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/HRegionServer.PeriodicMemstoreFlusher.html @@ -1875,1491 +1875,1511 @@ 1867 } 1868 1869 @Override -1870 public ClusterConnection getConnection() { -1871 return this.clusterConnection; -1872 } +1870 public void releaseWAL(HRegionInfo regionInfo, WAL wal) throws IOException { +1871 if (regionInfo != null && regionInfo.isMetaTable() && +1872 regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) { 1873 -1874 @Override -1875 public MetaTableLocator getMetaTableLocator() { -1876 return this.metaTableLocator; -1877 } -1878 -1879 @Override -1880 public void stop(final String msg) { -1881 if (!this.stopped) { -1882 try { -1883 if (this.rsHost != null) { -1884 this.rsHost.preStop(msg); -1885 } -1886 this.stopped = true; -1887 LOG.info("STOPPED: " + msg); -1888 // Wakes run() if it is sleeping -1889 sleeper.skipSleepCycle(); -1890 } catch (IOException exp) { -1891 LOG.warn("The region server did not stop", exp); -1892 } -1893 } -1894 } -1895 -1896 public void waitForServerOnline(){ -1897 while (!isStopped() && !isOnline()) { -1898 synchronized (online) { -1899 try { -1900 online.wait(msgInterval); -1901 } catch (InterruptedException ie) { -1902 Thread.currentThread().interrupt(); -1903 break; -1904 } -1905 } -1906 } -1907 } -1908 -1909 @Override -1910 public void postOpenDeployTasks(final Region r) throws KeeperException, IOException { -1911 postOpenDeployTasks(new PostOpenDeployContext(r, -1)); -1912 } -1913 -1914 @Override -1915 public void postOpenDeployTasks(final PostOpenDeployContext context) -1916 throws KeeperException, IOException { -1917 Region r = context.getRegion(); -1918 long masterSystemTime = context.getMasterSystemTime(); -1919 Preconditions.checkArgument(r instanceof HRegion, "r must be an HRegion"); -1920 rpcServices.checkOpen(); -1921 LOG.info("Post open deploy tasks for " + r.getRegionInfo().getRegionNameAsString()); -1922 // Do checks to see if we need to compact (references or too many files) -1923 for (Store s : r.getStores()) { -1924 if (s.hasReferences() || s.needsCompaction()) { -1925 this.compactSplitThread.requestSystemCompaction(r, s, "Opening Region"); -1926 } -1927 } -1928 long openSeqNum = r.getOpenSeqNum(); -1929 if (openSeqNum == HConstants.NO_SEQNUM) { -1930 // If we opened a region, we should have read some sequence number from it. -1931 LOG.error("No sequence number found when opening " + -1932 r.getRegionInfo().getRegionNameAsString()); -1933 openSeqNum = 0; -1934 } -1935 -1936 // Update flushed sequence id of a recovering region in ZK -1937 updateRecoveringRegionLastFlushedSequenceId(r); -1938 -1939 // Notify master -1940 if (!reportRegionStateTransition(new RegionStateTransitionContext( -1941 TransitionCode.OPENED, openSeqNum, masterSystemTime, r.getRegionInfo()))) { -1942 throw new IOException("Failed to report opened region to master: " -1943 + r.getRegionInfo().getRegionNameAsString()); -1944 } -1945 -1946 triggerFlushInPrimaryRegion((HRegion)r); -1947 -1948 LOG.debug("Finished post open deploy task for " + r.getRegionInfo().getRegionNameAsString()); -1949 } -1950 -1951 @Override -1952 public boolean reportRegionStateTransition(TransitionCode code, HRegionInfo... hris) { -1953 return reportRegionStateTransition(code, HConstants.NO_SEQNUM, hris); -1954 } +1874 walFactory.closeMetaWAL(regionInfo.getEncodedNameAsBytes()); +1875 +1876 LogRoller roller; +1877 if (regionInfo != null && regionInfo.isMetaTable() && +1878 regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) { +1879 roller = metawalRoller.get(); +1880 if (roller != null) { +1881 roller.removeWAL(wal); // only do this for meta WAL +1882 } +1883 +1884 // TODO: meta wal roller is left running. Should be fine. +1885 } +1886 } +1887 } +1888 +1889 @Override +1890 public ClusterConnection getConnection() { +1891 return this.clusterConnection; +1892 } +1893 +1894 @Override +1895 public MetaTableLocator getMetaTableLocator() { +1896 return this.metaTableLocator; +1897 } +1898 +1899 @Override +1900 public void stop(final String msg) { +1901 if (!this.stopped) { +1902 try { +1903 if (this.rsHost != null) { +1904 this.rsHost.preStop(msg); +1905 } +1906 this.stopped = true; +1907 LOG.info("STOPPED: " + msg); +1908 // Wakes run() if it is sleeping +1909 sleeper.skipSleepCycle(); +1910 } catch (IOException exp) { +1911 LOG.warn("The region server did not stop", exp); +1912 } +1913 } +1914 } +1915 +1916 public void waitForServerOnline(){ +1917 while (!isStopped() && !isOnline()) { +1918 synchronized (online) { +1919 try { +1920 online.wait(msgInterval); +1921 } catch (InterruptedException ie) { +1922 Thread.currentThread().interrupt(); +1923 break; +1924 } +1925 } +1926 } +1927 } +1928 +1929 @Override +1930 public void postOpenDeployTasks(final Region r) throws KeeperException, IOException { +1931 postOpenDeployTasks(new PostOpenDeployContext(r, -1)); +1932 } +1933 +1934 @Override +1935 public void postOpenDeployTasks(final PostOpenDeployContext context) +1936 throws KeeperException, IOException { +1937 Region r = context.getRegion(); +1938 long masterSystemTime = context.getMasterSystemTime(); +1939 Preconditions.checkArgument(r instanceof HRegion, "r must be an HRegion"); +1940 rpcServices.checkOpen(); +1941 LOG.info("Post open deploy tasks for " + r.getRegionInfo().getRegionNameAsString()); +1942 // Do checks to see if we need to compact (references or too many files) +1943 for (Store s : r.getStores()) { +1944 if (s.hasReferences() || s.needsCompaction()) { +1945 this.compactSplitThread.requestSystemCompaction(r, s, "Opening Region"); +1946 } +1947 } +1948 long openSeqNum = r.getOpenSeqNum(); +1949 if (openSeqNum == HConstants.NO_SEQNUM) { +1950 // If we opened a region, we should have read some sequence number from it. +1951 LOG.error("No sequence number found when opening " + +1952 r.getRegionInfo().getRegionNameAsString()); +1953 openSeqNum = 0; +1954 } 1955 -1956 @Override -1957 public boolean reportRegionStateTransition( -1958 TransitionCode code, long openSeqNum, HRegionInfo... hris) { -1959 return reportRegionStateTransition( -1960 new RegionStateTransitionContext(code, HConstants.NO_SEQNUM, -1, hris)); -1961 } -1962 -1963 @Override -1964 public boolean reportRegionStateTransition(final RegionStateTransitionContext context) { -1965 TransitionCode code = context.getCode(); -1966 long openSeqNum = context.getOpenSeqNum(); -1967 long masterSystemTime = context.getMasterSystemTime(); -1968 HRegionInfo[] hris = context.getHris(); -1969 -1970 if (TEST_SKIP_REPORTING_TRANSITION) { -1971 // This is for testing only in case there is no master -1972 // to handle the region transition report at all. -1973 if (code == TransitionCode.OPENED) { -1974 Preconditions.checkArgument(hris != null && hris.length == 1); -1975 if (hris[0].isMetaRegion()) { -1976 try { -1977 MetaTableLocator.setMetaLocation(getZooKeeper(), serverName, -1978 hris[0].getReplicaId(),State.OPEN); -1979 } catch (KeeperException e) { -1980 LOG.info("Failed to update meta location", e); -1981 return false; -1982 } -1983 } else { -1984 try { -1985 MetaTableAccessor.updateRegionLocation(clusterConnection, -1986 hris[0], serverName, openSeqNum, masterSystemTime); -1987 } catch (IOException e) { -1988 LOG.info("Failed to update meta", e); -1989 return false; -1990 } -1991 } -1992 } -1993 return true; -1994 } -1995 -1996 ReportRegionStateTransitionRequest.Builder builder = -1997 ReportRegionStateTransitionRequest.newBuilder(); -1998 builder.setServer(ProtobufUtil.toServerName(serverName)); -1999 RegionStateTransition.Builder transition = builder.addTransitionBuilder(); -2000 transition.setTransitionCode(code); -2001 if (code == TransitionCode.OPENED && openSeqNum >= 0) { -2002 transition.setOpenSeqNum(openSeqNum); -2003 } -2004 for (HRegionInfo hri: hris) { -2005 transition.addRegionInfo(HRegionInfo.convert(hri)); -2006 } -2007 ReportRegionStateTransitionRequest request = builder.build(); -2008 while (keepLooping()) { -2009 RegionServerStatusService.BlockingInterface rss = rssStub; -2010 try { -2011 if (rss == null) { -2012 createRegionServerStatusStub(); -2013 continue; -2014 } -2015 ReportRegionStateTransitionResponse response = -2016 rss.reportRegionStateTransition(null, request); -2017 if (response.hasErrorMessage()) { -2018 LOG.info("Failed to transition " + hris[0] -2019 + " to " + code + ": " + response.getErrorMessage()); -2020 return false; -2021 } -2022 return true; -2023 } catch (ServiceException se) { -2024 IOException ioe = ProtobufUtil.getRemoteException(se); -2025 LOG.info("Failed to report region transition, will retry", ioe); -2026 if (rssStub == rss) { -2027 rssStub = null; -2028 } -2029 } -2030 } -2031 return false; -2032 } -2033 -2034 /** -2035 * Trigger a flush in the primary region replica if this region is a secondary replica. Does not -2036 * block this thread. See RegionReplicaFlushHandler for details. -2037 */ -2038 void triggerFlushInPrimaryRegion(final HRegion region) { -2039 if (ServerRegionReplicaUtil.isDefaultReplica(region.getRegionInfo())) { -2040 return; -2041 } -2042 if (!ServerRegionReplicaUtil.isRegionReplicaReplicationEnabled(region.conf) || -2043 !ServerRegionReplicaUtil.isRegionReplicaWaitForPrimaryFlushEnabled( -2044 region.conf)) { -2045 region.setReadsEnabled(true); -2046 return; -2047 } -2048 -2049 region.setReadsEnabled(false); // disable reads before marking the region as opened. -2050 // RegionReplicaFlushHandler might reset this. -2051 -2052 // submit it to be handled by one of the handlers so that we do not block OpenRegionHandler -2053 this.service.submit( -2054 new RegionReplicaFlushHandler(this, clusterConnection, -2055 rpcRetryingCallerFactory, rpcControllerFactory, operationTimeout, region)); -2056 } -2057 -2058 @Override -2059 public RpcServerInterface getRpcServer() { -2060 return rpcServices.rpcServer; -2061 } -2062 -2063 @VisibleForTesting -2064 public RSRpcServices getRSRpcServices() { -2065 return rpcServices; -2066 } -2067 -2068 /** -2069 * Cause the server to exit without closing the regions it is serving, the log -2070 * it is using and without notifying the master. Used unit testing and on -2071 * catastrophic events such as HDFS is yanked out from under hbase or we OOME. -2072 * -2073 * @param reason -2074 * the reason we are aborting -2075 * @param cause -2076 * the exception that caused the abort, or null -2077 */ +1956 // Update flushed sequence id of a recovering region in ZK +1957 updateRecoveringRegionLastFlushedSequenceId(r); +1958 +1959 // Notify master +1960 if (!reportRegionStateTransition(new RegionStateTransitionContext( +1961 TransitionCode.OPENED, openSeqNum, masterSystemTime, r.getRegionInfo()))) { +1962 throw new IOException("Failed to report opened region to master: " +1963 + r.getRegionInfo().getRegionNameAsString()); +1964 } +1965 +1966 triggerFlushInPrimaryRegion((HRegion)r); +1967 +1968 LOG.debug("Finished post open deploy task for " + r.getRegionInfo().getRegionNameAsString()); +1969 } +1970 +1971 @Override +1972 public boolean reportRegionStateTransition(TransitionCode code, HRegionInfo... hris) { +1973 return reportRegionStateTransition(code, HConstants.NO_SEQNUM, hris); +1974 } +1975 +1976 @Override +1977 public boolean reportRegionStateTransition( +1978 TransitionCode code, long openSeqNum, HRegionInfo... hris) { +1979 return reportRegionStateTransition( +1980 new RegionStateTransitionContext(code, HConstants.NO_SEQNUM, -1, hris)); +1981 } +1982 +1983 @Override +1984 public boolean reportRegionStateTransition(final RegionStateTransitionContext context) { +1985 TransitionCode code = context.getCode(); +1986 long openSeqNum = context.getOpenSeqNum(); +1987 long masterSystemTime = context.getMasterSystemTime(); +1988 HRegionInfo[] hris = context.getHris(); +1989 +1990 if (TEST_SKIP_REPORTING_TRANSITION) { +1991 // This is for testing only in case there is no master +1992 // to handle the region transition report at all. +1993 if (code == TransitionCode.OPENED) { +1994 Preconditions.checkArgument(hris != null && hris.length == 1); +1995 if (hris[0].isMetaRegion()) { +1996 try { +1997 MetaTableLocator.setMetaLocation(getZooKeeper(), serverName, +1998 hris[0].getReplicaId(),State.OPEN); +1999 } catch (KeeperException e) { +2000 LOG.info("Failed to update meta location", e); +2001 return false; +2002 } +2003 } else { +2004 try { +2005 MetaTableAccessor.updateRegionLocation(clusterConnection, +2006 hris[0], serverName, openSeqNum, masterSystemTime); +2007 } catch (IOException e) { +2008 LOG.info("Failed to update meta", e); +2009 return false; +2010 } +2011 } +2012 } +2013 return true; +2014 } +2015 +2016 ReportRegionStateTransitionRequest.Builder builder = +2017 ReportRegionStateTransitionRequest.newBuilder(); +2018 builder.setServer(ProtobufUtil.toServerName(serverName)); +2019 RegionStateTransition.Builder transition = builder.addTransitionBuilder(); +2020 transition.setTransitionCode(code); +2021 if (code == TransitionCode.OPENED && openSeqNum >= 0) { +2022 transition.setOpenSeqNum(openSeqNum); +2023 } +2024 for (HRegionInfo hri: hris) { +2025 transition.addRegionInfo(HRegionInfo.convert(hri)); +2026 } +2027 ReportRegionStateTransitionRequest request = builder.build(); +2028 while (keepLooping()) { +2029 RegionServerStatusService.BlockingInterface rss = rssStub; +2030 try { +2031 if (rss == null) { +2032 createRegionServerStatusStub(); +2033 continue; +2034 } +2035 ReportRegionStateTransitionResponse response = +2036 rss.reportRegionStateTransition(null, request); +2037 if (response.hasErrorMessage()) { +2038 LOG.info("Failed to transition " + hris[0] +2039 + " to " + code + ": " + response.getErrorMessage()); +2040 return false; +2041 } +2042 return true; +2043 } catch (ServiceException se) { +2044 IOException ioe = ProtobufUtil.getRemoteException(se); +2045 LOG.info("Failed to report region transition, will retry", ioe); +2046 if (rssStub == rss) { +2047 rssStub = null; +2048 } +2049 } +2050 } +2051 return false; +2052 } +2053 +2054 /** +2055 * Trigger a flush in the primary region replica if this region is a secondary replica. Does not +2056 * block this thread. See RegionReplicaFlushHandler for details. +2057 */ +2058 void triggerFlushInPrimaryRegion(final HRegion region) { +2059 if (ServerRegionReplicaUtil.isDefaultReplica(region.getRegionInfo())) { +2060 return; +2061 } +2062 if (!ServerRegionReplicaUtil.isRegionReplicaReplicationEnabled(region.conf) || +2063 !ServerRegionReplicaUtil.isRegionReplicaWaitForPrimaryFlushEnabled( +2064 region.conf)) { +2065 region.setReadsEnabled(true); +2066 return; +2067 } +2068 +2069 region.setReadsEnabled(false); // disable reads before marking the region as opened. +2070 // RegionReplicaFlushHandler might reset this. +2071 +2072 // submit it to be handled by one of the handlers so that we do not block OpenRegionHandler +2073 this.service.submit( +2074 new RegionReplicaFlushHandler(this, clusterConnection, +2075 rpcRetryingCallerFactory, rpcControllerFactory, operationTimeout, region)); +2076 } +2077 2078 @Override -2079 public void abort(String reason, Throwable cause) { -2080 String msg = "ABORTING region server " + this + ": " + reason; -2081 if (cause != null) { -2082 LOG.fatal(msg, cause); -2083 } else { -2084 LOG.fatal(msg); -2085 } -2086 this.abortRequested = true; -2087 // HBASE-4014: show list of coprocessors that were loaded to help debug -2088 // regionserver crashes.Note that we're implicitly using -2089 // java.util.HashSet's toString() method to print the coprocessor names. -2090 LOG.fatal("RegionServer abort: loaded coprocessors are: " + -2091 CoprocessorHost.getLoadedCoprocessors()); -2092 // Try and dump metrics if abort -- might give clue as to how fatal came about.... -2093 try { -2094 LOG.info("Dump of metrics as JSON on abort: " + JSONBean.dumpRegionServerMetrics()); -2095 } catch (MalformedObjectNameException | IOException e) { -2096 LOG.warn("Failed dumping metrics", e); -2097 } -2098 -2099 // Do our best to report our abort to the master, but this may not work -2100 try { -2101 if (cause != null) { -2102 msg += "\nCause:\n" + StringUtils.stringifyException(cause); -2103 } -2104 // Report to the master but only if we have already registered with the master. -2105 if (rssStub != null && this.serverName != null) { -2106 ReportRSFatalErrorRequest.Builder builder = -2107 ReportRSFatalErrorRequest.newBuilder(); -2108 ServerName sn = -2109 ServerName.parseVersionedServerName(this.serverName.getVersionedBytes()); -2110 builder.setServer(ProtobufUtil.toServerName(sn)); -2111 builder.setErrorMessage(msg); -2112 rssStub.reportRSFatalError(null, builder.build()); -2113 } -2114 } catch (Throwable t) { -2115 LOG.warn("Unable to report fatal error to master", t); -2116 } -2117 stop(reason); -2118 } -2119 -2120 /** -2121 * @see HRegionServer#abort(String, Throwable) -2122 */ -2123 public void abort(String reason) { -2124 abort(reason, null); -2125 } -2126 -2127 @Override -2128 public boolean isAborted() { -2129 return this.abortRequested; -2130 } -2131 -2132 /* -2133 * Simulate a kill -9 of this server. Exits w/o closing regions or cleaninup -2134 * logs but it does close socket in case want to bring up server on old -2135 * hostname+port immediately. -2136 */ -2137 protected void kill() { -2138 this.killed = true; -2139 abort("Simulated kill"); -2140 } -2141 -2142 /** -2143 * Called on stop/abort before closing the cluster connection and meta locator. -2144 */ -2145 protected void sendShutdownInterrupt() { -2146 } -2147 -2148 /** -2149 * Wait on all threads to finish. Presumption is that all closes and stops -2150 * have already been called. -2151 */ -2152 protected void stopServiceThreads() { -2153 // clean up the scheduled chores -2154 if (this.choreService != null) choreService.shutdown(); -2155 if (this.nonceManagerChore != null) nonceManagerChore.cancel(true); -2156 if (this.compactionChecker != null) compactionChecker.cancel(true); -2157 if (this.periodicFlusher != null) periodicFlusher.cancel(true); -2158 if (this.healthCheckChore != null) healthCheckChore.cancel(true); -2159 if (this.storefileRefresher != null) storefileRefresher.cancel(true); -2160 if (this.movedRegionsCleaner != null) movedRegionsCleaner.cancel(true); +2079 public RpcServerInterface getRpcServer() { +2080 return rpcServices.rpcServer; +2081 } +2082 +2083 @VisibleForTesting +2084 public RSRpcServices getRSRpcServices() { +2085 return rpcServices; +2086 } +2087 +2088 /** +2089 * Cause the server to exit without closing the regions it is serving, the log +2090 * it is using and without notifying the master. Used unit testing and on +2091 * catastrophic events such as HDFS is yanked out from under hbase or we OOME. +2092 * +2093 * @param reason +2094 * the reason we are aborting +2095 * @param cause +2096 * the exception that caused the abort, or null +2097 */ +2098 @Override +2099 public void abort(String reason, Throwable cause) { +2100 String msg = "ABORTING region server " + this + ": " + reason; +2101 if (cause != null) { +2102 LOG.fatal(msg, cause); +2103 } else { +2104 LOG.fatal(msg); +2105 } +2106 this.abortRequested = true; +2107 // HBASE-4014: show list of coprocessors that were loaded to help debug +2108 // regionserver crashes.Note that we're implicitly using +2109 // java.util.HashSet's toString() method to print the coprocessor names. +2110 LOG.fatal("RegionServer abort: loaded coprocessors are: " + +2111 CoprocessorHost.getLoadedCoprocessors()); +2112 // Try and dump metrics if abort -- might give clue as to how fatal came about.... +2113 try { +2114 LOG.info("Dump of metrics as JSON on abort: " + JSONBean.dumpRegionServerMetrics()); +2115 } catch (MalformedObjectNameException | IOException e) { +2116 LOG.warn("Failed dumping metrics", e); +2117 } +2118 +2119 // Do our best to report our abort to the master, but this may not work +2120 try { +2121 if (cause != null) { +2122 msg += "\nCause:\n" + StringUtils.stringifyException(cause); +2123 } +2124 // Report to the master but only if we have already registered with the master. +2125 if (rssStub != null && this.serverName != null) { +2126 ReportRSFatalErrorRequest.Builder builder = +2127 ReportRSFatalErrorRequest.newBuilder(); +2128 ServerName sn = +2129 ServerName.parseVersionedServerName(this.serverName.getVersionedBytes()); +2130 builder.setServer(ProtobufUtil.toServerName(sn)); +2131 builder.setErrorMessage(msg); +2132 rssStub.reportRSFatalError(null, builder.build()); +2133 } +2134 } catch (Throwable t) { +2135 LOG.warn("Unable to report fatal error to master", t); +2136 } +2137 stop(reason); +2138 } +2139 +2140 /** +2141 * @see HRegionServer#abort(String, Throwable) +2142 */ +2143 public void abort(String reason) { +2144 abort(reason, null); +2145 } +2146 +2147 @Override +2148 public boolean isAborted() { +2149 return this.abortRequested; +2150 } +2151 +2152 /* +2153 * Simulate a kill -9 of this server. Exits w/o closing regions or cleaninup +2154 * logs but it does close socket in case want to bring up server on old +2155 * hostname+port immediately. +2156 */ +2157 protected void kill() { +2158 this.killed = true; +2159 abort("Simulated kill"); +2160 } 2161 -2162 if (this.cacheFlusher != null) { -2163 this.cacheFlusher.join(); -2164 } -2165 -2166 if (this.spanReceiverHost != null) { -2167 this.spanReceiverHost.closeReceivers(); -2168 } -2169 if (this.walRoller != null) { -2170 Threads.shutdown(this.walRoller.getThread()); -2171 } -2172 final LogRoller metawalRoller = this.metawalRoller.get(); -2173 if (metawalRoller != null) { -2174 Threads.shutdown(metawalRoller.getThread()); -2175 } -2176 if (this.compactSplitThread != null) { -2177 this.compactSplitThread.join(); -2178 } -2179 if (this.service != null) this.service.shutdown(); -2180 if (this.replicationSourceHandler != null && -2181 this.replicationSourceHandler == this.replicationSinkHandler) { -2182 this.replicationSourceHandler.stopReplicationService(); -2183 } else { -2184 if (this.replicationSourceHandler != null) { -2185 this.replicationSourceHandler.stopReplicationService(); -2186 } -2187 if (this.replicationSinkHandler != null) { -2188 this.replicationSinkHandler.stopReplicationService(); -2189 } -2190 } -2191 } -2192 -2193 /** -2194 * @return Return the object that implements the replication -2195 * source service. -2196 */ -2197 ReplicationSourceService getReplicationSourceService() { -2198 return replicationSourceHandler; -2199 } -2200 -2201 /** -2202 * @return Return the object that implements the replication -2203 * sink service. -2204 */ -2205 ReplicationSinkService getReplicationSinkService() { -2206 return replicationSinkHandler; -2207 } -2208 -2209 /** -2210 * Get the current master from ZooKeeper and open the RPC connection to it. -2211 * To get a fresh connection, the current rssStub must be null. -2212 * Method will block until a master is available. You can break from this -2213 * block by requesting the server stop. -2214 * -2215 * @return master + port, or null if server has been stopped +2162 /** +2163 * Called on stop/abort before closing the cluster connection and meta locator. +2164 */ +2165 protected void sendShutdownInterrupt() { +2166 } +2167 +2168 /** +2169 * Wait on all threads to finish. Presumption is that all closes and stops +2170 * have already been called. +2171 */ +2172 protected void stopServiceThreads() { +2173 // clean up the scheduled chores +2174 if (this.choreService != null) choreService.shutdown(); +2175 if (this.nonceManagerChore != null) nonceManagerChore.cancel(true); +2176 if (this.compactionChecker != null) compactionChecker.cancel(true); +2177 if (this.periodicFlusher != null) periodicFlusher.cancel(true); +2178 if (this.healthCheckChore != null) healthCheckChore.cancel(true); +2179 if (this.storefileRefresher != null) storefileRefresher.cancel(true); +2180 if (this.movedRegionsCleaner != null) movedRegionsCleaner.cancel(true); +2181 +2182 if (this.cacheFlusher != null) { +2183 this.cacheFlusher.join(); +2184 } +2185 +2186 if (this.spanReceiverHost != null) { +2187 this.spanReceiverHost.closeReceivers(); +2188 } +2189 if (this.walRoller != null) { +2190 Threads.shutdown(this.walRoller.getThread()); +2191 } +2192 final LogRoller metawalRoller = this.metawalRoller.get(); +2193 if (metawalRoller != null) { +2194 Threads.shutdown(metawalRoller.getThread()); +2195 } +2196 if (this.compactSplitThread != null) { +2197 this.compactSplitThread.join(); +2198 } +2199 if (this.service != null) this.service.shutdown(); +2200 if (this.replicationSourceHandler != null && +2201 this.replicationSourceHandler == this.replicationSinkHandler) { +2202 this.replicationSourceHandler.stopReplicationService(); +2203 } else { +2204 if (this.replicationSourceHandler != null) { +2205 this.replicationSourceHandler.stopReplicationService(); +2206 } +2207 if (this.replicationSinkHandler != null) { +2208 this.replicationSinkHandler.stopReplicationService(); +2209 } +2210 } +2211 } +2212 +2213 /** +2214 * @return Return the object that implements the replication +2215 * source service. 2216 */ -2217 @VisibleForTesting -2218 protected synchronized ServerName createRegionServerStatusStub() { -2219 if (rssStub != null) { -2220 return masterAddressTracker.getMasterAddress(); -2221 } -2222 ServerName sn = null; -2223 long previousLogTime = 0; -2224 boolean refresh = false; // for the first time, use cached data -2225 RegionServerStatusService.BlockingInterface intf = null; -2226 boolean interrupted = false; -2227 try { -2228 while (keepLooping()) { -2229 sn = this.masterAddressTracker.getMasterAddress(refresh); -2230 if (sn == null) { -2231 if (!keepLooping()) { -2232 // give up with no connection. -2233 LOG.debug("No master found and cluster is stopped; bailing out"); -2234 return null; -2235 } -2236 if (System.currentTimeMillis() > (previousLogTime + 1000)) { -2237 LOG.debug("No master found; retry"); -2238 previousLogTime = System.currentTimeMillis(); -2239 } -2240 refresh = true; // let's try pull it from ZK directly -2241 if (sleep(200)) { -2242 interrupted = true; -2243 } -2244 continue; -2245 } -2246 -2247 // If we are on the active master, use the shortcut -2248 if (this instanceof HMaster && sn.equals(getServerName())) { -2249 intf = ((HMaster)this).getMasterRpcServices(); -2250 break; -2251 } -2252 try { -2253 BlockingRpcChannel channel = -2254 this.rpcClient.createBlockingRpcChannel(sn, userProvider.getCurrent(), -2255 shortOperationTimeout); -2256 intf = RegionServerStatusService.newBlockingStub(channel); -2257 break; -2258 } catch (IOException e) { -2259 if (System.currentTimeMillis() > (previousLogTime + 1000)) { -2260 e = e instanceof RemoteException ? -2261 ((RemoteException)e).unwrapRemoteException() : e; -2262 if (e instanceof ServerNotRunningYetException) { -2263 LOG.info("Master isn't available yet, retrying"); -2264 } else { -2265 LOG.warn("Unable to connect to master. Retrying. Error was:", e); -2266 } -2267 previousLogTime = System.currentTimeMillis(); -2268 } -2269 if (sleep(200)) { -2270 interrupted = true; -2271 } -2272 } -2273 } -2274 } finally { -2275 if (interrupted) { -2276 Thread.currentThread().interrupt(); -2277 } -2278 } -2279 rssStub = intf; -2280 return sn; -2281 } -2282 -2283 /** -2284 * @return True if we should break loop because cluster is going down or -2285 * this server has been stopped or hdfs has gone bad. -2286 */ -2287 private boolean keepLooping() { -2288 return !this.stopped && isClusterUp(); -2289 } -2290 -2291 /* -2292 * Let the master know we're here Run initialization using parameters passed -2293 * us by the master. -2294 * @return A Map of key/value configurations we got from the Master else -2295 * null if we failed to register. -2296 * @throws IOException -2297 */ -2298 private RegionServerStartupResponse reportForDuty() throws IOException { -2299 ServerName masterServerName = createRegionServerStatusStub(); -2300 if (masterServerName == null) return null; -2301 RegionServerStartupResponse result = null; -2302 try { -2303 rpcServices.requestCount.set(0); -2304 LOG.info("reportForDuty to master=" + masterServerName + " with port=" -2305 + rpcServices.isa.getPort() + ", startcode=" + this.startcode); -2306 long now = EnvironmentEdgeManager.currentTime(); -2307 int port = rpcServices.isa.getPort(); -2308 RegionServerStartupRequest.Builder request = RegionServerStartupRequest.newBuilder(); -2309 if (shouldUseThisHostnameInstead()) { -2310 request.setUseThisHostnameInstead(useThisHostnameInstead); -2311 } -2312 request.setPort(port); -2313 request.setServerStartCode(this.startcode); -2314 request.setServerCurrentTime(now); -2315 result = this.rssStub.regionServerStartup(null, request.build()); -2316 } catch (ServiceException se) { -2317 IOException ioe = ProtobufUtil.getRemoteException(se); -2318 if (ioe instanceof ClockOutOfSyncException) { -2319 LOG.fatal("Master rejected startup because clock is out of sync", ioe); -2320 // Re-throw IOE will cause RS to abort -2321 throw ioe; -2322 } else if (ioe instanceof ServerNotRunningYetException) { -2323 LOG.debug("Master is not running yet"); -2324 } else { -2325 LOG.warn("error telling master we are up", se); -2326 } -2327 rssStub = null; -2328 } -2329 return result; -2330 } -2331 -2332 @Override -2333 public RegionStoreSequenceIds getLastSequenceId(byte[] encodedRegionName) { -2334 try { -2335 GetLastFlushedSequenceIdRequest req = -2336 RequestConverter.buildGetLastFlushedSequenceIdRequest(encodedRegionName); -2337 RegionServerStatusService.BlockingInterface rss = rssStub; -2338 if (rss == null) { // Try to connect one more time -2339 createRegionServerStatusStub(); -2340 rss = rssStub; -2341 if (rss == null) { -2342 // Still no luck, we tried -2343 LOG.warn("Unable to connect to the master to check " + "the last flushed sequence id"); -2344 return RegionStoreSequenceIds.newBuilder().setLastFlushedSequenceId(HConstants.NO_SEQNUM) -2345 .build(); -2346 } -2347 } -2348 GetLastFlushedSequenceIdResponse resp = rss.getLastFlushedSequenceId(null, req); -2349 return RegionStoreSequenceIds.newBuilder() -2350 .setLastFlushedSequenceId(resp.getLastFlushedSequenceId()) -2351 .addAllStoreSequenceId(resp.getStoreLastFlushedSequenceIdList()).build(); -2352 } catch (ServiceException e) { -2353 LOG.warn("Unable to connect to the master to check the last flushed sequence id", e); -2354 return RegionStoreSequenceIds.newBuilder().setLastFlushedSequenceId(HConstants.NO_SEQNUM) -2355 .build(); -2356 } -2357 } -2358 -2359 /** -2360 * Closes all regions. Called on our way out. -2361 * Assumes that its not possible for new regions to be added to onlineRegions -2362 * while this method runs. -2363 */ -2364 protected void closeAllRegions(final boolean abort) { -2365 closeUserRegions(abort); -2366 closeMetaTableRegions(abort); -2367 } -2368 -2369 /** -2370 * Close meta region if we carry it -2371 * @param abort Whether we're running an abort. -2372 */ -2373 void closeMetaTableRegions(final boolean abort) { -2374 Region meta = null; -2375 this.lock.writeLock().lock(); -2376 try { -2377 for (Map.Entry<String, Region> e: onlineRegions.entrySet()) { -2378 HRegionInfo hri = e.getValue().getRegionInfo(); -2379 if (hri.isMetaRegion()) { -2380 meta = e.getValue(); -2381 } -2382 if (meta != null) break; -2383 } -2384 } finally { -2385 this.lock.writeLock().unlock(); -2386 } -2387 if (meta != null) closeRegionIgnoreErrors(meta.getRegionInfo(), abort); -2388 } -2389 -2390 /** -2391 * Schedule closes on all user regions. -2392 * Should be safe calling multiple times because it wont' close regions -2393 * that are already closed or that are closing. -2394 * @param abort Whether we're running an abort. -2395 */ -2396 void closeUserRegions(final boolean abort) { -2397 this.lock.writeLock().lock(); -2398 try { -2399 for (Map.Entry<String, Region> e: this.onlineRegions.entrySet()) { -2400 Region r = e.getValue(); -2401 if (!r.getRegionInfo().isMetaTable() && r.isAvailable()) { -2402 // Don't update zk with this close transition; pass false. -2403 closeRegionIgnoreErrors(r.getRegionInfo(), abort); -2404 } -2405 } -2406 } finally { -2407 this.lock.writeLock().unlock(); -2408 } -2409 } -2410 -2411 /** @return the info server */ -2412 public InfoServer getInfoServer() { -2413 return infoServer; -2414 } -2415 -2416 /** -2417 * @return true if a stop has been requested. -2418 */ -2419 @Override -2420 public boolean isStopped() { -2421 return this.stopped; -2422 } -2423 -2424 @Override -2425 public boolean isStopping() { -2426 return this.stopping; -2427 } -2428 -2429 @Override -2430 public Map<String, Region> getRecoveringRegions() { -