ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jonathanhur...@apache.org
Subject ambari git commit: AMBARI-12867 - Do Not Automatically Abort Stack Repository Installation When A Host Timed Out (jonathanhurley)
Date Tue, 25 Aug 2015 13:07:29 GMT
Repository: ambari
Updated Branches:
  refs/heads/branch-2.1 ea04dac05 -> 20346a34c


AMBARI-12867 - Do Not Automatically Abort Stack Repository Installation When A Host Timed
Out (jonathanhurley)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/20346a34
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/20346a34
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/20346a34

Branch: refs/heads/branch-2.1
Commit: 20346a34c497dcfb8ef3bd5cbcd9c867dd2ec474
Parents: ea04dac
Author: Jonathan Hurley <jhurley@hortonworks.com>
Authored: Mon Aug 24 19:28:03 2015 -0400
Committer: Jonathan Hurley <jhurley@hortonworks.com>
Committed: Tue Aug 25 09:07:19 2015 -0400

----------------------------------------------------------------------
 .../java/org/apache/ambari/server/Role.java     |   1 +
 .../ClusterStackVersionResourceProvider.java    | 111 +++++++++++--------
 ...ClusterStackVersionResourceProviderTest.java |  11 +-
 3 files changed, 77 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/20346a34/ambari-server/src/main/java/org/apache/ambari/server/Role.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/Role.java b/ambari-server/src/main/java/org/apache/ambari/server/Role.java
index 636df3f..c684981 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/Role.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/Role.java
@@ -113,6 +113,7 @@ public class Role {
   public static final Role METRICS_MONITOR = valueOf("METRICS_MONITOR");
   public static final Role AMS_SERVICE_CHECK = valueOf("AMBARI_METRICS_SERVICE_CHECK");
   public static final Role ACCUMULO_CLIENT = valueOf("ACCUMULO_CLIENT");
+  public static final Role INSTALL_PACKAGES = valueOf("install_packages");
 
   private String name = null;
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/20346a34/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java
b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java
index 972226d..6590d00 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java
@@ -20,7 +20,6 @@ package org.apache.ambari.server.controller.internal;
 import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.JDK_LOCATION;
 
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -61,7 +60,7 @@ import org.apache.ambari.server.orm.dao.ClusterDAO;
 import org.apache.ambari.server.orm.dao.ClusterVersionDAO;
 import org.apache.ambari.server.orm.dao.HostVersionDAO;
 import org.apache.ambari.server.orm.dao.RepositoryVersionDAO;
-import org.apache.ambari.server.orm.dao.StackDAO;
+import org.apache.ambari.server.orm.entities.ClusterEntity;
 import org.apache.ambari.server.orm.entities.ClusterVersionEntity;
 import org.apache.ambari.server.orm.entities.HostEntity;
 import org.apache.ambari.server.orm.entities.HostVersionEntity;
@@ -78,7 +77,7 @@ import org.apache.ambari.server.state.ServiceInfo;
 import org.apache.ambari.server.state.ServiceOsSpecific;
 import org.apache.ambari.server.state.StackId;
 import org.apache.ambari.server.utils.StageUtils;
-import org.apache.ambari.server.orm.entities.ClusterEntity;
+import org.apache.commons.lang.StringUtils;
 
 import com.google.gson.Gson;
 import com.google.inject.Inject;
@@ -93,17 +92,30 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
 
   // ----- Property ID constants ---------------------------------------------
 
-  protected static final String CLUSTER_STACK_VERSION_ID_PROPERTY_ID                   =
PropertyHelper.getPropertyId("ClusterStackVersions", "id");
-  protected static final String CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID         =
PropertyHelper.getPropertyId("ClusterStackVersions", "cluster_name");
-  protected static final String CLUSTER_STACK_VERSION_STACK_PROPERTY_ID                =
PropertyHelper.getPropertyId("ClusterStackVersions", "stack");
-  protected static final String CLUSTER_STACK_VERSION_VERSION_PROPERTY_ID              =
PropertyHelper.getPropertyId("ClusterStackVersions", "version");
-  protected static final String CLUSTER_STACK_VERSION_STATE_PROPERTY_ID                =
PropertyHelper.getPropertyId("ClusterStackVersions", "state");
-  protected static final String CLUSTER_STACK_VERSION_HOST_STATES_PROPERTY_ID          =
PropertyHelper.getPropertyId("ClusterStackVersions", "host_states");
-  protected static final String CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID   =
PropertyHelper.getPropertyId("ClusterStackVersions", "repository_version");
+  protected static final String CLUSTER_STACK_VERSION_ID_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions",
"id");
+  protected static final String CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions",
"cluster_name");
+  protected static final String CLUSTER_STACK_VERSION_STACK_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions",
"stack");
+  protected static final String CLUSTER_STACK_VERSION_VERSION_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions",
"version");
+  protected static final String CLUSTER_STACK_VERSION_STATE_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions",
"state");
+  protected static final String CLUSTER_STACK_VERSION_HOST_STATES_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions",
"host_states");
+  protected static final String CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID  = PropertyHelper.getPropertyId("ClusterStackVersions",
"repository_version");
+  protected static final String CLUSTER_STACK_VERSION_STAGE_SUCCESS_FACTOR  = PropertyHelper.getPropertyId("ClusterStackVersions",
"success_factor");
 
   protected static final String INSTALL_PACKAGES_ACTION = "install_packages";
   protected static final String INSTALL_PACKAGES_FULL_NAME = "Install version";
 
+  /**
+   * The default success factor that will be used when determining if a stage's
+   * failure should cause other stages to abort. Consider a scenario with 1000
+   * hosts, broken up into 10 stages. Each stage would have 100 hosts. If the
+   * success factor was 100%, then any failure in stage 1 woudl cause all 9
+   * other stages to abort. If set to 90%, then 10 hosts would need to fail for
+   * the other stages to abort. This is necessary to prevent the abortion of
+   * stages based on 1 or 2 errant hosts failing in a large cluster's stack
+   * distribution.
+   */
+  private static final float INSTALL_PACKAGES_SUCCESS_FACTOR = 0.85f;
+
   @SuppressWarnings("serial")
   private static Set<String> pkPropertyIds = new HashSet<String>() {
     {
@@ -126,6 +138,7 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
       add(CLUSTER_STACK_VERSION_HOST_STATES_PROPERTY_ID);
       add(CLUSTER_STACK_VERSION_STATE_PROPERTY_ID);
       add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID);
+      add(CLUSTER_STACK_VERSION_STAGE_SUCCESS_FACTOR);
     }
   };
 
@@ -147,9 +160,6 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
   private static HostVersionDAO hostVersionDAO;
 
   @Inject
-  private static StackDAO stackDAO;
-
-  @Inject
   private static RepositoryVersionDAO repositoryVersionDAO;
 
   @Inject
@@ -269,14 +279,14 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
     if (request.getProperties().size() != 1) {
       throw new UnsupportedOperationException("Multiple requests cannot be executed at the
same time.");
     }
+
     Map<String, Object> propertyMap = iterator.next();
 
-    Set<String> requiredProperties = new HashSet<String>(){{
-      add(CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID);
-      add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID);
-      add(CLUSTER_STACK_VERSION_STACK_PROPERTY_ID);
-      add(CLUSTER_STACK_VERSION_VERSION_PROPERTY_ID);
-    }};
+    Set<String> requiredProperties = new HashSet<String>();
+    requiredProperties.add(CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID);
+    requiredProperties.add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID);
+    requiredProperties.add(CLUSTER_STACK_VERSION_STACK_PROPERTY_ID);
+    requiredProperties.add(CLUSTER_STACK_VERSION_VERSION_PROPERTY_ID);
 
     for (String requiredProperty : requiredProperties) {
       if (! propertyMap.containsKey(requiredProperty)) {
@@ -318,13 +328,22 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
       stackId = currentStackVersion;
     }
 
+    // why does the JSON body parser convert JSON primitives into strings!?
+    Float successFactor = INSTALL_PACKAGES_SUCCESS_FACTOR;
+    String successFactorProperty = (String) propertyMap.get(CLUSTER_STACK_VERSION_STAGE_SUCCESS_FACTOR);
+    if (StringUtils.isNotBlank(successFactorProperty)) {
+      successFactor = Float.valueOf(successFactorProperty);
+    }
+
     RepositoryVersionEntity repoVersionEnt = repositoryVersionDAO.findByStackAndVersion(
         stackId, desiredRepoVersion);
+
     if (repoVersionEnt == null) {
       throw new IllegalArgumentException(String.format(
               "Repo version %s is not available for stack %s",
               desiredRepoVersion, stackId));
     }
+
     List<OperatingSystemEntity> operatingSystems = repoVersionEnt.getOperatingSystems();
     Map<String, List<RepositoryEntity>> perOsRepos = new HashMap<String, List<RepositoryEntity>>();
     for (OperatingSystemEntity operatingSystem : operatingSystems) {
@@ -348,27 +367,32 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
     }
 
     ArrayList<Stage> stages = new ArrayList<Stage>(batchCount);
-    for (int batchId = 1; batchId <= batchCount ; batchId++) {
+    for (int batchId = 1; batchId <= batchCount; batchId++) {
       // Create next stage
       String stageName;
       if (batchCount > 1) {
         stageName = INSTALL_PACKAGES_FULL_NAME;
       } else {
-        stageName = String.format(INSTALL_PACKAGES_FULL_NAME +
-                ". Batch %d of %d", batchId, batchCount);
+        stageName = String.format(INSTALL_PACKAGES_FULL_NAME + ". Batch %d of %d", batchId,
+            batchCount);
       }
-      Stage stage = stageFactory.createNew(req.getId(),
-              "/tmp/ambari",
-              cluster.getClusterName(),
-              cluster.getClusterId(),
-              stageName,
-              "{}",
-              "{}",
-              hostParamsJson
-      );
+
+      Stage stage = stageFactory.createNew(req.getId(), "/tmp/ambari", cluster.getClusterName(),
+          cluster.getClusterId(), stageName, "{}", "{}", hostParamsJson);
+
+      // if you have 1000 hosts (10 stages with 100 installs), we want to ensure
+      // that a single failure doesn't cause all other stages to abort; set the
+      // success factor ratio in order to tolerate some failures in a single
+      // stage
+      stage.getSuccessFactors().put(Role.INSTALL_PACKAGES, successFactor);
+
+      // set and increment stage ID
       stage.setStageId(stageId);
-      stages.add(stage);
       stageId++;
+
+      // add the stage that was just created
+      stages.add(stage);
+
       // Populate with commands for host
       for (int i = 0; i < maxTasks && hostsForClusterIter.hasNext(); i++) {
         Host host = hostsForClusterIter.next();
@@ -381,6 +405,7 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
     try {
       ClusterVersionEntity existingCSVer = clusterVersionDAO.findByClusterAndStackAndVersion(
           clName, stackId, desiredRepoVersion);
+
       if (existingCSVer == null) {
         try {
           // Create/persist new cluster stack version
@@ -453,12 +478,11 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
     final String packageList = gson.toJson(packages);
     final String repoList = gson.toJson(repoInfo);
 
-    Map<String, String> params = new HashMap<String, String>() {{
-      put("stack_id", stackId.getStackId());
-      put("repository_version", desiredRepoVersion);
-      put("base_urls", repoList);
-      put("package_list", packageList);
-    }};
+    Map<String, String> params = new HashMap<String, String>();
+    params.put("stack_id", stackId.getStackId());
+    params.put("repository_version", desiredRepoVersion);
+    params.put("base_urls", repoList);
+    params.put("package_list", packageList);
 
     // add host to this stage
     RequestResourceFilter filter = new RequestResourceFilter(null, null,
@@ -505,11 +529,10 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
       }
       Map<String, Object> propertyMap = iterator.next();
 
-      Set<String> requiredProperties = new HashSet<String>() {{
-        add(CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID);
-        add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID);
-        add(CLUSTER_STACK_VERSION_STATE_PROPERTY_ID);
-      }};
+      Set<String> requiredProperties = new HashSet<String>();
+      requiredProperties.add(CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID);
+      requiredProperties.add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID);
+      requiredProperties.add(CLUSTER_STACK_VERSION_STATE_PROPERTY_ID);
 
       for (String requiredProperty : requiredProperties) {
         if (!propertyMap.containsKey(requiredProperty)) {
@@ -571,7 +594,7 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
       command.setCommandParams(args);
       command.setClusterName(clName);
       finalizeUpgradeAction.setExecutionCommand(command);
-      
+
       HostRoleCommand hostRoleCommand = hostRoleCommandFactory.create(defaultHostName,
               Role.AMBARI_SERVER_ACTION, null, null);
       finalizeUpgradeAction.setHostRoleCommand(hostRoleCommand);

http://git-wip-us.apache.org/repos/asf/ambari/blob/20346a34/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java
b/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java
index 1dfa2fb..ece45f9 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java
@@ -39,6 +39,7 @@ import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
 
+import org.apache.ambari.server.Role;
 import org.apache.ambari.server.actionmanager.ActionManager;
 import org.apache.ambari.server.actionmanager.ExecutionCommandWrapper;
 import org.apache.ambari.server.actionmanager.HostRoleCommand;
@@ -58,7 +59,6 @@ import org.apache.ambari.server.controller.spi.ResourceProvider;
 import org.apache.ambari.server.controller.utilities.PropertyHelper;
 import org.apache.ambari.server.orm.GuiceJpaInitializer;
 import org.apache.ambari.server.orm.InMemoryDefaultTestModule;
-import org.apache.ambari.server.orm.PersistenceType;
 import org.apache.ambari.server.orm.dao.ClusterDAO;
 import org.apache.ambari.server.orm.dao.HostDAO;
 import org.apache.ambari.server.orm.dao.RepositoryVersionDAO;
@@ -224,6 +224,9 @@ public class ClusterStackVersionResourceProviderTest {
     expect(stage.getExecutionCommandWrapper(anyObject(String.class), anyObject(String.class))).
             andReturn(executionCommandWrapper).anyTimes();
 
+    Map<Role, Float> successFactors = new HashMap<>();
+    expect(stage.getSuccessFactors()).andReturn(successFactors).atLeastOnce();
+
     // Check that we create proper stage count
     expect(stageFactory.createNew(anyLong(), anyObject(String.class),
             anyObject(String.class), anyLong(),
@@ -268,10 +271,14 @@ public class ClusterStackVersionResourceProviderTest {
     Request request = PropertyHelper.getCreateRequest(propertySet, null);
 
     RequestStatus status = provider.createResources(request);
+    Assert.assertNotNull(status);
 
     // verify
-    verify(managementController, response, clusters, stageFactory);
+    verify(managementController, response, clusters, stageFactory, stage);
 
+    // check that the success factor was populated in the stage
+    Float successFactor = successFactors.get(Role.INSTALL_PACKAGES);
+    Assert.assertEquals(Float.valueOf(0.85f), successFactor);
   }
 
 


Mime
View raw message