From commits-return-68152-archive-asf-public=cust-asf.ponee.io@hbase.apache.org Sun Feb 18 16:13:45 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 2F4B81807A6 for ; Sun, 18 Feb 2018 16:13:43 +0100 (CET) Received: (qmail 167 invoked by uid 500); 18 Feb 2018 15:13:36 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 98744 invoked by uid 99); 18 Feb 2018 15:13:35 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 18 Feb 2018 15:13:35 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 99BA8F32CB; Sun, 18 Feb 2018 15:13:32 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: git-site-role@apache.org To: commits@hbase.apache.org Date: Sun, 18 Feb 2018 15:13:52 -0000 Message-Id: In-Reply-To: <75acd469b295403ba58b65c6bb8fdd62@git.apache.org> References: <75acd469b295403ba58b65c6bb8fdd62@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [21/51] [partial] hbase-site git commit: Published site at . http://git-wip-us.apache.org/repos/asf/hbase-site/blob/991224b9/devapidocs/src-html/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.html ---------------------------------------------------------------------- diff --git a/devapidocs/src-html/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.html b/devapidocs/src-html/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.html index 4f5b33a..4361237 100644 --- a/devapidocs/src-html/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.html +++ b/devapidocs/src-html/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.html @@ -278,567 +278,568 @@ 270 } else { 271 LOG.error(msg, e); 272 setFailure(e); -273 } -274 } -275 // if split fails, need to call ((HRegion)parent).clearSplit() when it is a force split -276 return Flow.HAS_MORE_STATE; -277 } -278 -279 /** -280 * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously -281 * submitted for parent region to be split (rollback doesn't wait on the completion of the -282 * AssignProcedure) . This can be improved by changing rollback() to support sub-procedures. -283 * See HBASE-19851 for details. -284 */ -285 @Override -286 protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state) -287 throws IOException, InterruptedException { -288 if (isTraceEnabled()) { -289 LOG.trace(this + " rollback state=" + state); -290 } -291 -292 try { -293 switch (state) { -294 case SPLIT_TABLE_REGION_POST_OPERATION: -295 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: -296 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: -297 case SPLIT_TABLE_REGION_UPDATE_META: -298 // PONR -299 throw new UnsupportedOperationException(this + " unhandled state=" + state); -300 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: -301 break; -302 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: -303 // Doing nothing, as re-open parent region would clean up daughter region directories. -304 break; -305 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: -306 openParentRegion(env); -307 break; -308 case SPLIT_TABLE_REGION_PRE_OPERATION: -309 postRollBackSplitRegion(env); -310 break; -311 case SPLIT_TABLE_REGION_PREPARE: -312 break; // nothing to do -313 default: -314 throw new UnsupportedOperationException(this + " unhandled state=" + state); -315 } -316 } catch (IOException e) { -317 // This will be retried. Unless there is a bug in the code, -318 // this should be just a "temporary error" (e.g. network down) -319 LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state + -320 " for splitting the region " -321 + getParentRegion().getEncodedName() + " in table " + getTableName(), e); -322 throw e; -323 } -324 } -325 -326 /* -327 * Check whether we are in the state that can be rollback -328 */ -329 @Override -330 protected boolean isRollbackSupported(final SplitTableRegionState state) { -331 switch (state) { -332 case SPLIT_TABLE_REGION_POST_OPERATION: -333 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: -334 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: -335 case SPLIT_TABLE_REGION_UPDATE_META: -336 // It is not safe to rollback if we reach to these states. -337 return false; -338 default: -339 break; -340 } -341 return true; -342 } -343 -344 @Override -345 protected SplitTableRegionState getState(final int stateId) { -346 return SplitTableRegionState.forNumber(stateId); -347 } -348 -349 @Override -350 protected int getStateId(final SplitTableRegionState state) { -351 return state.getNumber(); -352 } -353 -354 @Override -355 protected SplitTableRegionState getInitialState() { -356 return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE; -357 } -358 -359 @Override -360 protected void serializeStateData(ProcedureStateSerializer serializer) -361 throws IOException { -362 super.serializeStateData(serializer); -363 -364 final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg = -365 MasterProcedureProtos.SplitTableRegionStateData.newBuilder() -366 .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser())) -367 .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion())) -368 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_1_RI)) -369 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_2_RI)); -370 serializer.serialize(splitTableRegionMsg.build()); -371 } -372 -373 @Override -374 protected void deserializeStateData(ProcedureStateSerializer serializer) -375 throws IOException { -376 super.deserializeStateData(serializer); -377 -378 final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg = -379 serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class); -380 setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo())); -381 setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo())); -382 assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2); -383 daughter_1_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0)); -384 daughter_2_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1)); -385 } -386 -387 @Override -388 public void toStringClassDetails(StringBuilder sb) { -389 sb.append(getClass().getSimpleName()); -390 sb.append(" table="); -391 sb.append(getTableName()); -392 sb.append(", parent="); -393 sb.append(getParentRegion().getShortNameToLog()); -394 sb.append(", daughterA="); -395 sb.append(daughter_1_RI.getShortNameToLog()); -396 sb.append(", daughterB="); -397 sb.append(daughter_2_RI.getShortNameToLog()); -398 } -399 -400 private RegionInfo getParentRegion() { -401 return getRegion(); -402 } -403 -404 @Override -405 public TableOperationType getTableOperationType() { -406 return TableOperationType.REGION_SPLIT; -407 } -408 -409 @Override -410 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { -411 return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics(); -412 } -413 -414 private byte[] getSplitRow() { -415 return daughter_2_RI.getStartKey(); -416 } -417 -418 private static State [] EXPECTED_SPLIT_STATES = new State [] {State.OPEN, State.CLOSED}; -419 /** -420 * Prepare to Split region. -421 * @param env MasterProcedureEnv -422 * @throws IOException -423 */ -424 @VisibleForTesting -425 public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException { -426 // Check whether the region is splittable -427 RegionStateNode node = -428 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); -429 -430 if (node == null) { -431 throw new UnknownRegionException(getParentRegion().getRegionNameAsString()); -432 } -433 -434 RegionInfo parentHRI = node.getRegionInfo(); -435 // Lookup the parent HRI state from the AM, which has the latest updated info. -436 // Protect against the case where concurrent SPLIT requests came in and succeeded -437 // just before us. -438 if (node.isInState(State.SPLIT)) { -439 LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT"); -440 return false; -441 } -442 if (parentHRI.isSplit() || parentHRI.isOffline()) { -443 LOG.info("Split of " + parentHRI + " skipped because offline/split."); -444 return false; -445 } -446 -447 // expected parent to be online or closed -448 if (!node.isInState(EXPECTED_SPLIT_STATES)) { -449 // We may have SPLIT already? -450 setFailure(new IOException("Split " + parentHRI.getRegionNameAsString() + -451 " FAILED because state=" + node.getState() + "; expected " + -452 Arrays.toString(EXPECTED_SPLIT_STATES))); -453 return false; -454 } -455 -456 // Since we have the lock and the master is coordinating the operation -457 // we are always able to split the region -458 if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { -459 LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI); -460 setFailure(new IOException("Split region " + -461 (parentHRI == null? "null": parentHRI.getRegionNameAsString()) + -462 " failed due to split switch off")); -463 return false; -464 } -465 -466 // set node state as SPLITTING -467 node.setState(State.SPLITTING); -468 -469 return true; -470 } -471 -472 /** -473 * Action before splitting region in a table. -474 * @param env MasterProcedureEnv -475 * @throws IOException -476 * @throws InterruptedException -477 */ -478 private void preSplitRegion(final MasterProcedureEnv env) -479 throws IOException, InterruptedException { -480 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); -481 if (cpHost != null) { -482 cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser()); -483 } -484 -485 // TODO: Clean up split and merge. Currently all over the place. -486 // Notify QuotaManager and RegionNormalizer -487 try { -488 env.getMasterServices().getMasterQuotaManager().onRegionSplit(this.getParentRegion()); -489 } catch (QuotaExceededException e) { -490 env.getAssignmentManager().getRegionNormalizer().planSkipped(this.getParentRegion(), -491 NormalizationPlan.PlanType.SPLIT); -492 throw e; -493 } -494 } -495 -496 /** -497 * Action after rollback a split table region action. -498 * @param env MasterProcedureEnv -499 * @throws IOException -500 */ -501 private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException { -502 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); -503 if (cpHost != null) { -504 cpHost.postRollBackSplitRegionAction(getUser()); -505 } -506 } -507 -508 /** -509 * Rollback close parent region -510 * @param env MasterProcedureEnv -511 **/ -512 private void openParentRegion(final MasterProcedureEnv env) throws IOException { -513 // Check whether the region is closed; if so, open it in the same server -514 final int regionReplication = getRegionReplication(env); -515 final ServerName serverName = getParentRegionServerName(env); -516 -517 final AssignProcedure[] procs = new AssignProcedure[regionReplication]; -518 for (int i = 0; i < regionReplication; ++i) { -519 final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(getParentRegion(), i); -520 procs[i] = env.getAssignmentManager().createAssignProcedure(hri, serverName); -521 } -522 env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs); -523 } -524 -525 /** -526 * Create daughter regions -527 * @param env MasterProcedureEnv -528 * @throws IOException -529 */ -530 @VisibleForTesting -531 public void createDaughterRegions(final MasterProcedureEnv env) throws IOException { -532 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); -533 final Path tabledir = FSUtils.getTableDir(mfs.getRootDir(), getTableName()); -534 final FileSystem fs = mfs.getFileSystem(); -535 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( -536 env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false); -537 regionFs.createSplitsDir(); -538 -539 Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs); -540 -541 assertReferenceFileCount(fs, expectedReferences.getFirst(), -542 regionFs.getSplitsDir(daughter_1_RI)); -543 //Move the files from the temporary .splits to the final /table/region directory -544 regionFs.commitDaughterRegion(daughter_1_RI); -545 assertReferenceFileCount(fs, expectedReferences.getFirst(), -546 new Path(tabledir, daughter_1_RI.getEncodedName())); -547 -548 assertReferenceFileCount(fs, expectedReferences.getSecond(), -549 regionFs.getSplitsDir(daughter_2_RI)); -550 regionFs.commitDaughterRegion(daughter_2_RI); -551 assertReferenceFileCount(fs, expectedReferences.getSecond(), -552 new Path(tabledir, daughter_2_RI.getEncodedName())); -553 } -554 -555 /** -556 * Create Split directory -557 * @param env MasterProcedureEnv -558 * @throws IOException -559 */ -560 private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env, -561 final HRegionFileSystem regionFs) throws IOException { -562 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); -563 final Configuration conf = env.getMasterConfiguration(); -564 // The following code sets up a thread pool executor with as many slots as -565 // there's files to split. It then fires up everything, waits for -566 // completion and finally checks for any exception -567 // -568 // Note: splitStoreFiles creates daughter region dirs under the parent splits dir -569 // Nothing to unroll here if failure -- re-run createSplitsDir will -570 // clean this up. -571 int nbFiles = 0; -572 final Map<String, Collection<StoreFileInfo>> files = -573 new HashMap<String, Collection<StoreFileInfo>>(regionFs.getFamilies().size()); -574 for (String family: regionFs.getFamilies()) { -575 Collection<StoreFileInfo> sfis = regionFs.getStoreFiles(family); -576 if (sfis == null) continue; -577 Collection<StoreFileInfo> filteredSfis = null; -578 for (StoreFileInfo sfi: sfis) { -579 // Filter. There is a lag cleaning up compacted reference files. They get cleared -580 // after a delay in case outstanding Scanners still have references. Because of this, -581 // the listing of the Store content may have straggler reference files. Skip these. -582 // It should be safe to skip references at this point because we checked above with -583 // the region if it thinks it is splittable and if we are here, it thinks it is -584 // splitable. -585 if (sfi.isReference()) { -586 LOG.info("Skipping split of " + sfi + "; presuming ready for archiving."); -587 continue; -588 } -589 if (filteredSfis == null) { -590 filteredSfis = new ArrayList<StoreFileInfo>(sfis.size()); -591 files.put(family, filteredSfis); -592 } -593 filteredSfis.add(sfi); -594 nbFiles++; -595 } -596 } -597 if (nbFiles == 0) { -598 // no file needs to be splitted. -599 return new Pair<Integer, Integer>(0,0); -600 } -601 // Max #threads is the smaller of the number of storefiles or the default max determined above. -602 int maxThreads = Math.min( -603 conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, -604 conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)), -605 nbFiles); -606 LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" + -607 getParentRegion().getShortNameToLog() + ", threads=" + maxThreads); -608 final ExecutorService threadPool = Executors.newFixedThreadPool( -609 maxThreads, Threads.getNamedThreadFactory("StoreFileSplitter-%1$d")); -610 final List<Future<Pair<Path,Path>>> futures = new ArrayList<Future<Pair<Path,Path>>>(nbFiles); -611 -612 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); -613 // Split each store file. -614 for (Map.Entry<String, Collection<StoreFileInfo>>e: files.entrySet()) { -615 byte [] familyName = Bytes.toBytes(e.getKey()); -616 final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName); -617 final Collection<StoreFileInfo> storeFiles = e.getValue(); -618 if (storeFiles != null && storeFiles.size() > 0) { -619 final CacheConfig cacheConf = new CacheConfig(conf, hcd); -620 for (StoreFileInfo storeFileInfo: storeFiles) { -621 StoreFileSplitter sfs = -622 new StoreFileSplitter(regionFs, familyName, new HStoreFile(mfs.getFileSystem(), -623 storeFileInfo, conf, cacheConf, hcd.getBloomFilterType(), true)); -624 futures.add(threadPool.submit(sfs)); -625 } -626 } -627 } -628 // Shutdown the pool -629 threadPool.shutdown(); -630 -631 // Wait for all the tasks to finish. -632 // When splits ran on the RegionServer, how-long-to-wait-configuration was named -633 // hbase.regionserver.fileSplitTimeout. If set, use its value. -634 long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", -635 conf.getLong("hbase.regionserver.fileSplitTimeout", 600000)); -636 try { -637 boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS); -638 if (stillRunning) { -639 threadPool.shutdownNow(); -640 // wait for the thread to shutdown completely. -641 while (!threadPool.isTerminated()) { -642 Thread.sleep(50); -643 } -644 throw new IOException("Took too long to split the" + -645 " files and create the references, aborting split"); -646 } -647 } catch (InterruptedException e) { -648 throw (InterruptedIOException)new InterruptedIOException().initCause(e); -649 } -650 -651 int daughterA = 0; -652 int daughterB = 0; -653 // Look for any exception -654 for (Future<Pair<Path, Path>> future : futures) { -655 try { -656 Pair<Path, Path> p = future.get(); -657 daughterA += p.getFirst() != null ? 1 : 0; -658 daughterB += p.getSecond() != null ? 1 : 0; -659 } catch (InterruptedException e) { -660 throw (InterruptedIOException) new InterruptedIOException().initCause(e); -661 } catch (ExecutionException e) { -662 throw new IOException(e); -663 } -664 } -665 -666 if (LOG.isDebugEnabled()) { -667 LOG.debug("pid=" + getProcId() + " split storefiles for region " + -668 getParentRegion().getShortNameToLog() + -669 " Daughter A: " + daughterA + " storefiles, Daughter B: " + -670 daughterB + " storefiles."); -671 } -672 return new Pair<Integer, Integer>(daughterA, daughterB); -673 } -674 -675 private void assertReferenceFileCount(final FileSystem fs, final int expectedReferenceFileCount, -676 final Path dir) throws IOException { -677 if (expectedReferenceFileCount != 0 && -678 expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(fs, dir)) { -679 throw new IOException("Failing split. Expected reference file count isn't equal."); -680 } -681 } -682 -683 private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) -684 throws IOException { -685 if (LOG.isDebugEnabled()) { -686 LOG.debug("pid=" + getProcId() + " splitting started for store file: " + -687 sf.getPath() + " for region: " + getParentRegion().getShortNameToLog()); -688 } -689 -690 final byte[] splitRow = getSplitRow(); -691 final String familyName = Bytes.toString(family); -692 final Path path_first = regionFs.splitStoreFile(this.daughter_1_RI, familyName, sf, splitRow, -693 false, splitPolicy); -694 final Path path_second = regionFs.splitStoreFile(this.daughter_2_RI, familyName, sf, splitRow, -695 true, splitPolicy); -696 if (LOG.isDebugEnabled()) { -697 LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + -698 sf.getPath() + " for region: " + getParentRegion().getShortNameToLog()); -699 } -700 return new Pair<Path,Path>(path_first, path_second); -701 } -702 -703 /** -704 * Utility class used to do the file splitting / reference writing -705 * in parallel instead of sequentially. -706 */ -707 private class StoreFileSplitter implements Callable<Pair<Path,Path>> { -708 private final HRegionFileSystem regionFs; -709 private final byte[] family; -710 private final HStoreFile sf; -711 -712 /** -713 * Constructor that takes what it needs to split -714 * @param regionFs the file system -715 * @param family Family that contains the store file -716 * @param sf which file -717 */ -718 public StoreFileSplitter(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) { -719 this.regionFs = regionFs; -720 this.sf = sf; -721 this.family = family; -722 } -723 -724 @Override -725 public Pair<Path,Path> call() throws IOException { -726 return splitStoreFile(regionFs, family, sf); -727 } -728 } -729 -730 /** -731 * Post split region actions before the Point-of-No-Return step -732 * @param env MasterProcedureEnv -733 **/ -734 private void preSplitRegionBeforeMETA(final MasterProcedureEnv env) -735 throws IOException, InterruptedException { -736 final List<Mutation> metaEntries = new ArrayList<Mutation>(); -737 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); -738 if (cpHost != null) { -739 cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser()); -740 try { -741 for (Mutation p : metaEntries) { -742 RegionInfo.parseRegionName(p.getRow()); -743 } -744 } catch (IOException e) { -745 LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as " -746 + "region name." -747 + "Mutations from coprocessor should only for hbase:meta table."); -748 throw e; -749 } -750 } -751 } -752 -753 /** -754 * Add daughter regions to META -755 * @param env MasterProcedureEnv -756 * @throws IOException -757 */ -758 private void updateMetaForDaughterRegions(final MasterProcedureEnv env) throws IOException { -759 env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env), -760 daughter_1_RI, daughter_2_RI); -761 } -762 -763 /** -764 * Pre split region actions after the Point-of-No-Return step -765 * @param env MasterProcedureEnv -766 **/ -767 private void preSplitRegionAfterMETA(final MasterProcedureEnv env) -768 throws IOException, InterruptedException { -769 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); -770 if (cpHost != null) { -771 cpHost.preSplitAfterMETAAction(getUser()); -772 } -773 } -774 -775 /** -776 * Post split region actions -777 * @param env MasterProcedureEnv -778 **/ -779 private void postSplitRegion(final MasterProcedureEnv env) throws IOException { -780 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); -781 if (cpHost != null) { -782 cpHost.postCompletedSplitRegionAction(daughter_1_RI, daughter_2_RI, getUser()); -783 } -784 } -785 -786 private ServerName getParentRegionServerName(final MasterProcedureEnv env) { -787 return env.getMasterServices().getAssignmentManager() -788 .getRegionStates().getRegionServerOfRegion(getParentRegion()); -789 } -790 -791 private UnassignProcedure[] createUnassignProcedures(final MasterProcedureEnv env, -792 final int regionReplication) { -793 final UnassignProcedure[] procs = new UnassignProcedure[regionReplication]; -794 for (int i = 0; i < procs.length; ++i) { -795 final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(getParentRegion(), i); -796 procs[i] = env.getAssignmentManager().createUnassignProcedure(hri, null, true); -797 } -798 return procs; -799 } -800 -801 private AssignProcedure[] createAssignProcedures(final MasterProcedureEnv env, -802 final int regionReplication) { -803 final ServerName targetServer = getParentRegionServerName(env); -804 final AssignProcedure[] procs = new AssignProcedure[regionReplication * 2]; -805 int procsIdx = 0; -806 for (int i = 0; i < regionReplication; ++i) { -807 final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(daughter_1_RI, i); -808 procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, targetServer); -809 } -810 for (int i = 0; i < regionReplication; ++i) { -811 final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(daughter_2_RI, i); -812 procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, targetServer); -813 } -814 return procs; -815 } -816 -817 private int getRegionReplication(final MasterProcedureEnv env) throws IOException { -818 final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); -819 return htd.getRegionReplication(); -820 } -821 -822 /** -823 * The procedure could be restarted from a different machine. If the variable is null, we need to -824 * retrieve it. -825 * @return traceEnabled -826 */ -827 private boolean isTraceEnabled() { -828 if (traceEnabled == null) { -829 traceEnabled = LOG.isTraceEnabled(); -830 } -831 return traceEnabled; -832 } -833} +273 setFailure("master-split-regions", e); +274 } +275 } +276 // if split fails, need to call ((HRegion)parent).clearSplit() when it is a force split +277 return Flow.HAS_MORE_STATE; +278 } +279 +280 /** +281 * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously +282 * submitted for parent region to be split (rollback doesn't wait on the completion of the +283 * AssignProcedure) . This can be improved by changing rollback() to support sub-procedures. +284 * See HBASE-19851 for details. +285 */ +286 @Override +287 protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state) +288 throws IOException, InterruptedException { +289 if (isTraceEnabled()) { +290 LOG.trace(this + " rollback state=" + state); +291 } +292 +293 try { +294 switch (state) { +295 case SPLIT_TABLE_REGION_POST_OPERATION: +296 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: +297 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: +298 case SPLIT_TABLE_REGION_UPDATE_META: +299 // PONR +300 throw new UnsupportedOperationException(this + " unhandled state=" + state); +301 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: +302 break; +303 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: +304 // Doing nothing, as re-open parent region would clean up daughter region directories. +305 break; +306 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: +307 openParentRegion(env); +308 break; +309 case SPLIT_TABLE_REGION_PRE_OPERATION: +310 postRollBackSplitRegion(env); +311 break; +312 case SPLIT_TABLE_REGION_PREPARE: +313 break; // nothing to do +314 default: +315 throw new UnsupportedOperationException(this + " unhandled state=" + state); +316 } +317 } catch (IOException e) { +318 // This will be retried. Unless there is a bug in the code, +319 // this should be just a "temporary error" (e.g. network down) +320 LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state + +321 " for splitting the region " +322 + getParentRegion().getEncodedName() + " in table " + getTableName(), e); +323 throw e; +324 } +325 } +326 +327 /* +328 * Check whether we are in the state that can be rollback +329 */ +330 @Override +331 protected boolean isRollbackSupported(final SplitTableRegionState state) { +332 switch (state) { +333 case SPLIT_TABLE_REGION_POST_OPERATION: +334 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: +335 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: +336 case SPLIT_TABLE_REGION_UPDATE_META: +337 // It is not safe to rollback if we reach to these states. +338 return false; +339 default: +340 break; +341 } +342 return true; +343 } +344 +345 @Override +346 protected SplitTableRegionState getState(final int stateId) { +347 return SplitTableRegionState.forNumber(stateId); +348 } +349 +350 @Override +351 protected int getStateId(final SplitTableRegionState state) { +352 return state.getNumber(); +353 } +354 +355 @Override +356 protected SplitTableRegionState getInitialState() { +357 return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE; +358 } +359 +360 @Override +361 protected void serializeStateData(ProcedureStateSerializer serializer) +362 throws IOException { +363 super.serializeStateData(serializer); +364 +365 final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg = +366 MasterProcedureProtos.SplitTableRegionStateData.newBuilder() +367 .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser())) +368 .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion())) +369 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_1_RI)) +370 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughter_2_RI)); +371 serializer.serialize(splitTableRegionMsg.build()); +372 } +373 +374 @Override +375 protected void deserializeStateData(ProcedureStateSerializer serializer) +376 throws IOException { +377 super.deserializeStateData(serializer); +378 +379 final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg = +380 serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class); +381 setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo())); +382 setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo())); +383 assert(splitTableRegionsMsg.getChildRegionInfoCount() == 2); +384 daughter_1_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0)); +385 daughter_2_RI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1)); +386 } +387 +388 @Override +389 public void toStringClassDetails(StringBuilder sb) { +390 sb.append(getClass().getSimpleName()); +391 sb.append(" table="); +392 sb.append(getTableName()); +393 sb.append(", parent="); +394 sb.append(getParentRegion().getShortNameToLog()); +395 sb.append(", daughterA="); +396 sb.append(daughter_1_RI.getShortNameToLog()); +397 sb.append(", daughterB="); +398 sb.append(daughter_2_RI.getShortNameToLog()); +399 } +400 +401 private RegionInfo getParentRegion() { +402 return getRegion(); +403 } +404 +405 @Override +406 public TableOperationType getTableOperationType() { +407 return TableOperationType.REGION_SPLIT; +408 } +409 +410 @Override +411 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { +412 return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics(); +413 } +414 +415 private byte[] getSplitRow() { +416 return daughter_2_RI.getStartKey(); +417 } +418 +419 private static State [] EXPECTED_SPLIT_STATES = new State [] {State.OPEN, State.CLOSED}; +420 /** +421 * Prepare to Split region. +422 * @param env MasterProcedureEnv +423 * @throws IOException +424 */ +425 @VisibleForTesting +426 public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException { +427 // Check whether the region is splittable +428 RegionStateNode node = +429 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); +430 +431 if (node == null) { +432 throw new UnknownRegionException(getParentRegion().getRegionNameAsString()); +433 } +434 +435 RegionInfo parentHRI = node.getRegionInfo(); +436 // Lookup the parent HRI state from the AM, which has the latest updated info. +437 // Protect against the case where concurrent SPLIT requests came in and succeeded +438 // just before us. +439 if (node.isInState(State.SPLIT)) { +440 LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT"); +441 return false; +442 } +443 if (parentHRI.isSplit() || parentHRI.isOffline()) { +444 LOG.info("Split of " + parentHRI + " skipped because offline/split."); +445 return false; +446 } +447 +448 // expected parent to be online or closed +449 if (!node.isInState(EXPECTED_SPLIT_STATES)) { +450 // We may have SPLIT already? +451 setFailure(new IOException("Split " + parentHRI.getRegionNameAsString() + +452 " FAILED because state=" + node.getState() + "; expected " + +453 Arrays.toString(EXPECTED_SPLIT_STATES))); +454 return false; +455 } +456 +457 // Since we have the lock and the master is coordinating the operation +458 // we are always able to split the region +459 if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { +460 LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI); +461 setFailure(new IOException("Split region " + +462 (parentHRI == null? "null": parentHRI.getRegionNameAsString()) + +463 " failed due to split switch off")); +464 return false; +465 } +466 +467 // set node state as SPLITTING +468 node.setState(State.SPLITTING); +469 +470 return true; +471 } +472 +473 /** +474 * Action before splitting region in a table. +475 * @param env MasterProcedureEnv +476 * @throws IOException +477 * @throws InterruptedException +478 */ +479 private void preSplitRegion(final MasterProcedureEnv env) +480 throws IOException, InterruptedException { +481 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); +482 if (cpHost != null) { +483 cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser()); +484 } +485 +486 // TODO: Clean up split and merge. Currently all over the place. +487 // Notify QuotaManager and RegionNormalizer +488 try { +489 env.getMasterServices().getMasterQuotaManager().onRegionSplit(this.getParentRegion()); +490 } catch (QuotaExceededException e) { +491 env.getAssignmentManager().getRegionNormalizer().planSkipped(this.getParentRegion(), +492 NormalizationPlan.PlanType.SPLIT); +493 throw e; +494 } +495 } +496 +497 /** +498 * Action after rollback a split table region action. +499 * @param env MasterProcedureEnv +500 * @throws IOException +501 */ +502 private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException { +503 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); +504 if (cpHost != null) { +505 cpHost.postRollBackSplitRegionAction(getUser()); +506 } +507 } +508 +509 /** +510 * Rollback close parent region +511 * @param env MasterProcedureEnv +512 **/ +513 private void openParentRegion(final MasterProcedureEnv env) throws IOException { +514 // Check whether the region is closed; if so, open it in the same server +515 final int regionReplication = getRegionReplication(env); +516 final ServerName serverName = getParentRegionServerName(env); +517 +518 final AssignProcedure[] procs = new AssignProcedure[regionReplication]; +519 for (int i = 0; i < regionReplication; ++i) { +520 final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(getParentRegion(), i); +521 procs[i] = env.getAssignmentManager().createAssignProcedure(hri, serverName); +522 } +523 env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs); +524 } +525 +526 /** +527 * Create daughter regions +528 * @param env MasterProcedureEnv +529 * @throws IOException +530 */ +531 @VisibleForTesting +532 public void createDaughterRegions(final MasterProcedureEnv env) throws IOException { +533 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); +534 final Path tabledir = FSUtils.getTableDir(mfs.getRootDir(), getTableName()); +535 final FileSystem fs = mfs.getFileSystem(); +536 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( +537 env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false); +538 regionFs.createSplitsDir(); +539 +540 Pair<Integer, Integer> expectedReferences = splitStoreFiles(env, regionFs); +541 +542 assertReferenceFileCount(fs, expectedReferences.getFirst(), +543 regionFs.getSplitsDir(daughter_1_RI)); +544 //Move the files from the temporary .splits to the final /table/region directory +545 regionFs.commitDaughterRegion(daughter_1_RI); +546 assertReferenceFileCount(fs, expectedReferences.getFirst(), +547 new Path(tabledir, daughter_1_RI.getEncodedName())); +548 +549 assertReferenceFileCount(fs, expectedReferences.getSecond(), +550 regionFs.getSplitsDir(daughter_2_RI)); +551 regionFs.commitDaughterRegion(daughter_2_RI); +552 assertReferenceFileCount(fs, expectedReferences.getSecond(), +553 new Path(tabledir, daughter_2_RI.getEncodedName())); +554 } +555 +556 /** +557 * Create Split directory +558 * @param env MasterProcedureEnv +559 * @throws IOException +560 */ +561 private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env, +562 final HRegionFileSystem regionFs) throws IOException { +563 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); +564 final Configuration conf = env.getMasterConfiguration(); +565 // The following code sets up a thread pool executor with as many slots as +566 // there's files to split. It then fires up everything, waits for +567 // completion and finally checks for any exception +568 // +569 // Note: splitStoreFiles creates daughter region dirs under the parent splits dir +570 // Nothing to unroll here if failure -- re-run createSplitsDir will +571 // clean this up. +572 int nbFiles = 0; +573 final Map<String, Collection<StoreFileInfo>> files = +574 new HashMap<String, Collection<StoreFileInfo>>(regionFs.getFamilies().size()); +575 for (String family: regionFs.getFamilies()) { +576 Collection<StoreFileInfo> sfis = regionFs.getStoreFiles(family); +577 if (sfis == null) continue; +578 Collection<StoreFileInfo> filteredSfis = null; +579 for (StoreFileInfo sfi: sfis) { +580 // Filter. There is a lag cleaning up compacted reference files. They get cleared +581 // after a delay in case outstanding Scanners still have references. Because of this, +582 // the listing of the Store content may have straggler reference files. Skip these. +583 // It should be safe to skip references at this point because we checked above with +584 // the region if it thinks it is splittable and if we are here, it thinks it is +585 // splitable. +586 if (sfi.isReference()) { +587 LOG.info("Skipping split of " + sfi + "; presuming ready for archiving."); +588 continue; +589 } +590 if (f