Return-Path: X-Original-To: apmail-ambari-commits-archive@www.apache.org Delivered-To: apmail-ambari-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id B83961823C for ; Tue, 25 Aug 2015 13:07:29 +0000 (UTC) Received: (qmail 55860 invoked by uid 500); 25 Aug 2015 13:07:29 -0000 Delivered-To: apmail-ambari-commits-archive@ambari.apache.org Received: (qmail 55831 invoked by uid 500); 25 Aug 2015 13:07:29 -0000 Mailing-List: contact commits-help@ambari.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: ambari-dev@ambari.apache.org Delivered-To: mailing list commits@ambari.apache.org Received: (qmail 55815 invoked by uid 99); 25 Aug 2015 13:07:29 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 25 Aug 2015 13:07:29 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 752D4E00D6; Tue, 25 Aug 2015 13:07:29 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jonathanhurley@apache.org To: commits@ambari.apache.org Message-Id: <6fdf0777d57a4db19eb8ca6fa74212a7@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: ambari git commit: AMBARI-12867 - Do Not Automatically Abort Stack Repository Installation When A Host Timed Out (jonathanhurley) Date: Tue, 25 Aug 2015 13:07:29 +0000 (UTC) Repository: ambari Updated Branches: refs/heads/branch-2.1 ea04dac05 -> 20346a34c AMBARI-12867 - Do Not Automatically Abort Stack Repository Installation When A Host Timed Out (jonathanhurley) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/20346a34 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/20346a34 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/20346a34 Branch: refs/heads/branch-2.1 Commit: 20346a34c497dcfb8ef3bd5cbcd9c867dd2ec474 Parents: ea04dac Author: Jonathan Hurley Authored: Mon Aug 24 19:28:03 2015 -0400 Committer: Jonathan Hurley Committed: Tue Aug 25 09:07:19 2015 -0400 ---------------------------------------------------------------------- .../java/org/apache/ambari/server/Role.java | 1 + .../ClusterStackVersionResourceProvider.java | 111 +++++++++++-------- ...ClusterStackVersionResourceProviderTest.java | 11 +- 3 files changed, 77 insertions(+), 46 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/20346a34/ambari-server/src/main/java/org/apache/ambari/server/Role.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/Role.java b/ambari-server/src/main/java/org/apache/ambari/server/Role.java index 636df3f..c684981 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/Role.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/Role.java @@ -113,6 +113,7 @@ public class Role { public static final Role METRICS_MONITOR = valueOf("METRICS_MONITOR"); public static final Role AMS_SERVICE_CHECK = valueOf("AMBARI_METRICS_SERVICE_CHECK"); public static final Role ACCUMULO_CLIENT = valueOf("ACCUMULO_CLIENT"); + public static final Role INSTALL_PACKAGES = valueOf("install_packages"); private String name = null; http://git-wip-us.apache.org/repos/asf/ambari/blob/20346a34/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java index 972226d..6590d00 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java @@ -20,7 +20,6 @@ package org.apache.ambari.server.controller.internal; import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.JDK_LOCATION; import java.util.ArrayList; -import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -61,7 +60,7 @@ import org.apache.ambari.server.orm.dao.ClusterDAO; import org.apache.ambari.server.orm.dao.ClusterVersionDAO; import org.apache.ambari.server.orm.dao.HostVersionDAO; import org.apache.ambari.server.orm.dao.RepositoryVersionDAO; -import org.apache.ambari.server.orm.dao.StackDAO; +import org.apache.ambari.server.orm.entities.ClusterEntity; import org.apache.ambari.server.orm.entities.ClusterVersionEntity; import org.apache.ambari.server.orm.entities.HostEntity; import org.apache.ambari.server.orm.entities.HostVersionEntity; @@ -78,7 +77,7 @@ import org.apache.ambari.server.state.ServiceInfo; import org.apache.ambari.server.state.ServiceOsSpecific; import org.apache.ambari.server.state.StackId; import org.apache.ambari.server.utils.StageUtils; -import org.apache.ambari.server.orm.entities.ClusterEntity; +import org.apache.commons.lang.StringUtils; import com.google.gson.Gson; import com.google.inject.Inject; @@ -93,17 +92,30 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou // ----- Property ID constants --------------------------------------------- - protected static final String CLUSTER_STACK_VERSION_ID_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions", "id"); - protected static final String CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions", "cluster_name"); - protected static final String CLUSTER_STACK_VERSION_STACK_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions", "stack"); - protected static final String CLUSTER_STACK_VERSION_VERSION_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions", "version"); - protected static final String CLUSTER_STACK_VERSION_STATE_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions", "state"); - protected static final String CLUSTER_STACK_VERSION_HOST_STATES_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions", "host_states"); - protected static final String CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions", "repository_version"); + protected static final String CLUSTER_STACK_VERSION_ID_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions", "id"); + protected static final String CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions", "cluster_name"); + protected static final String CLUSTER_STACK_VERSION_STACK_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions", "stack"); + protected static final String CLUSTER_STACK_VERSION_VERSION_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions", "version"); + protected static final String CLUSTER_STACK_VERSION_STATE_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions", "state"); + protected static final String CLUSTER_STACK_VERSION_HOST_STATES_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions", "host_states"); + protected static final String CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions", "repository_version"); + protected static final String CLUSTER_STACK_VERSION_STAGE_SUCCESS_FACTOR = PropertyHelper.getPropertyId("ClusterStackVersions", "success_factor"); protected static final String INSTALL_PACKAGES_ACTION = "install_packages"; protected static final String INSTALL_PACKAGES_FULL_NAME = "Install version"; + /** + * The default success factor that will be used when determining if a stage's + * failure should cause other stages to abort. Consider a scenario with 1000 + * hosts, broken up into 10 stages. Each stage would have 100 hosts. If the + * success factor was 100%, then any failure in stage 1 woudl cause all 9 + * other stages to abort. If set to 90%, then 10 hosts would need to fail for + * the other stages to abort. This is necessary to prevent the abortion of + * stages based on 1 or 2 errant hosts failing in a large cluster's stack + * distribution. + */ + private static final float INSTALL_PACKAGES_SUCCESS_FACTOR = 0.85f; + @SuppressWarnings("serial") private static Set pkPropertyIds = new HashSet() { { @@ -126,6 +138,7 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou add(CLUSTER_STACK_VERSION_HOST_STATES_PROPERTY_ID); add(CLUSTER_STACK_VERSION_STATE_PROPERTY_ID); add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID); + add(CLUSTER_STACK_VERSION_STAGE_SUCCESS_FACTOR); } }; @@ -147,9 +160,6 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou private static HostVersionDAO hostVersionDAO; @Inject - private static StackDAO stackDAO; - - @Inject private static RepositoryVersionDAO repositoryVersionDAO; @Inject @@ -269,14 +279,14 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou if (request.getProperties().size() != 1) { throw new UnsupportedOperationException("Multiple requests cannot be executed at the same time."); } + Map propertyMap = iterator.next(); - Set requiredProperties = new HashSet(){{ - add(CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID); - add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID); - add(CLUSTER_STACK_VERSION_STACK_PROPERTY_ID); - add(CLUSTER_STACK_VERSION_VERSION_PROPERTY_ID); - }}; + Set requiredProperties = new HashSet(); + requiredProperties.add(CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID); + requiredProperties.add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID); + requiredProperties.add(CLUSTER_STACK_VERSION_STACK_PROPERTY_ID); + requiredProperties.add(CLUSTER_STACK_VERSION_VERSION_PROPERTY_ID); for (String requiredProperty : requiredProperties) { if (! propertyMap.containsKey(requiredProperty)) { @@ -318,13 +328,22 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou stackId = currentStackVersion; } + // why does the JSON body parser convert JSON primitives into strings!? + Float successFactor = INSTALL_PACKAGES_SUCCESS_FACTOR; + String successFactorProperty = (String) propertyMap.get(CLUSTER_STACK_VERSION_STAGE_SUCCESS_FACTOR); + if (StringUtils.isNotBlank(successFactorProperty)) { + successFactor = Float.valueOf(successFactorProperty); + } + RepositoryVersionEntity repoVersionEnt = repositoryVersionDAO.findByStackAndVersion( stackId, desiredRepoVersion); + if (repoVersionEnt == null) { throw new IllegalArgumentException(String.format( "Repo version %s is not available for stack %s", desiredRepoVersion, stackId)); } + List operatingSystems = repoVersionEnt.getOperatingSystems(); Map> perOsRepos = new HashMap>(); for (OperatingSystemEntity operatingSystem : operatingSystems) { @@ -348,27 +367,32 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou } ArrayList stages = new ArrayList(batchCount); - for (int batchId = 1; batchId <= batchCount ; batchId++) { + for (int batchId = 1; batchId <= batchCount; batchId++) { // Create next stage String stageName; if (batchCount > 1) { stageName = INSTALL_PACKAGES_FULL_NAME; } else { - stageName = String.format(INSTALL_PACKAGES_FULL_NAME + - ". Batch %d of %d", batchId, batchCount); + stageName = String.format(INSTALL_PACKAGES_FULL_NAME + ". Batch %d of %d", batchId, + batchCount); } - Stage stage = stageFactory.createNew(req.getId(), - "/tmp/ambari", - cluster.getClusterName(), - cluster.getClusterId(), - stageName, - "{}", - "{}", - hostParamsJson - ); + + Stage stage = stageFactory.createNew(req.getId(), "/tmp/ambari", cluster.getClusterName(), + cluster.getClusterId(), stageName, "{}", "{}", hostParamsJson); + + // if you have 1000 hosts (10 stages with 100 installs), we want to ensure + // that a single failure doesn't cause all other stages to abort; set the + // success factor ratio in order to tolerate some failures in a single + // stage + stage.getSuccessFactors().put(Role.INSTALL_PACKAGES, successFactor); + + // set and increment stage ID stage.setStageId(stageId); - stages.add(stage); stageId++; + + // add the stage that was just created + stages.add(stage); + // Populate with commands for host for (int i = 0; i < maxTasks && hostsForClusterIter.hasNext(); i++) { Host host = hostsForClusterIter.next(); @@ -381,6 +405,7 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou try { ClusterVersionEntity existingCSVer = clusterVersionDAO.findByClusterAndStackAndVersion( clName, stackId, desiredRepoVersion); + if (existingCSVer == null) { try { // Create/persist new cluster stack version @@ -453,12 +478,11 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou final String packageList = gson.toJson(packages); final String repoList = gson.toJson(repoInfo); - Map params = new HashMap() {{ - put("stack_id", stackId.getStackId()); - put("repository_version", desiredRepoVersion); - put("base_urls", repoList); - put("package_list", packageList); - }}; + Map params = new HashMap(); + params.put("stack_id", stackId.getStackId()); + params.put("repository_version", desiredRepoVersion); + params.put("base_urls", repoList); + params.put("package_list", packageList); // add host to this stage RequestResourceFilter filter = new RequestResourceFilter(null, null, @@ -505,11 +529,10 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou } Map propertyMap = iterator.next(); - Set requiredProperties = new HashSet() {{ - add(CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID); - add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID); - add(CLUSTER_STACK_VERSION_STATE_PROPERTY_ID); - }}; + Set requiredProperties = new HashSet(); + requiredProperties.add(CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID); + requiredProperties.add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID); + requiredProperties.add(CLUSTER_STACK_VERSION_STATE_PROPERTY_ID); for (String requiredProperty : requiredProperties) { if (!propertyMap.containsKey(requiredProperty)) { @@ -571,7 +594,7 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou command.setCommandParams(args); command.setClusterName(clName); finalizeUpgradeAction.setExecutionCommand(command); - + HostRoleCommand hostRoleCommand = hostRoleCommandFactory.create(defaultHostName, Role.AMBARI_SERVER_ACTION, null, null); finalizeUpgradeAction.setHostRoleCommand(hostRoleCommand); http://git-wip-us.apache.org/repos/asf/ambari/blob/20346a34/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java b/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java index 1dfa2fb..ece45f9 100644 --- a/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java +++ b/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java @@ -39,6 +39,7 @@ import java.util.Map; import java.util.Properties; import java.util.Set; +import org.apache.ambari.server.Role; import org.apache.ambari.server.actionmanager.ActionManager; import org.apache.ambari.server.actionmanager.ExecutionCommandWrapper; import org.apache.ambari.server.actionmanager.HostRoleCommand; @@ -58,7 +59,6 @@ import org.apache.ambari.server.controller.spi.ResourceProvider; import org.apache.ambari.server.controller.utilities.PropertyHelper; import org.apache.ambari.server.orm.GuiceJpaInitializer; import org.apache.ambari.server.orm.InMemoryDefaultTestModule; -import org.apache.ambari.server.orm.PersistenceType; import org.apache.ambari.server.orm.dao.ClusterDAO; import org.apache.ambari.server.orm.dao.HostDAO; import org.apache.ambari.server.orm.dao.RepositoryVersionDAO; @@ -224,6 +224,9 @@ public class ClusterStackVersionResourceProviderTest { expect(stage.getExecutionCommandWrapper(anyObject(String.class), anyObject(String.class))). andReturn(executionCommandWrapper).anyTimes(); + Map successFactors = new HashMap<>(); + expect(stage.getSuccessFactors()).andReturn(successFactors).atLeastOnce(); + // Check that we create proper stage count expect(stageFactory.createNew(anyLong(), anyObject(String.class), anyObject(String.class), anyLong(), @@ -268,10 +271,14 @@ public class ClusterStackVersionResourceProviderTest { Request request = PropertyHelper.getCreateRequest(propertySet, null); RequestStatus status = provider.createResources(request); + Assert.assertNotNull(status); // verify - verify(managementController, response, clusters, stageFactory); + verify(managementController, response, clusters, stageFactory, stage); + // check that the success factor was populated in the stage + Float successFactor = successFactors.get(Role.INSTALL_PACKAGES); + Assert.assertEquals(Float.valueOf(0.85f), successFactor); }