ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jonathanhur...@apache.org
Subject ambari git commit: AMBARI-12867 - Do Not Automatically Abort Stack Repository Installation When A Host Timed Out (jonathanhurley
Date Tue, 25 Aug 2015 13:03:52 GMT
Repository: ambari
Updated Branches:
  refs/heads/trunk 5b0acfbdc -> bba679959


AMBARI-12867 - Do Not Automatically Abort Stack Repository Installation When A Host Timed
Out (jonathanhurley


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/bba67995
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/bba67995
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/bba67995

Branch: refs/heads/trunk
Commit: bba679959b3edc16db507faeacd84d33167bbcf4
Parents: 5b0acfb
Author: Jonathan Hurley <jhurley@hortonworks.com>
Authored: Mon Aug 24 19:28:03 2015 -0400
Committer: Jonathan Hurley <jhurley@hortonworks.com>
Committed: Tue Aug 25 09:03:24 2015 -0400

----------------------------------------------------------------------
 .../java/org/apache/ambari/server/Role.java     |   1 +
 .../ClusterStackVersionResourceProvider.java    | 112 +++++++++++--------
 ...ClusterStackVersionResourceProviderTest.java |  17 +--
 3 files changed, 76 insertions(+), 54 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/bba67995/ambari-server/src/main/java/org/apache/ambari/server/Role.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/Role.java b/ambari-server/src/main/java/org/apache/ambari/server/Role.java
index 636df3f..c684981 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/Role.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/Role.java
@@ -113,6 +113,7 @@ public class Role {
   public static final Role METRICS_MONITOR = valueOf("METRICS_MONITOR");
   public static final Role AMS_SERVICE_CHECK = valueOf("AMBARI_METRICS_SERVICE_CHECK");
   public static final Role ACCUMULO_CLIENT = valueOf("ACCUMULO_CLIENT");
+  public static final Role INSTALL_PACKAGES = valueOf("install_packages");
 
   private String name = null;
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/bba67995/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java
b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java
index 6133885..2f3e959 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java
@@ -58,11 +58,9 @@ import org.apache.ambari.server.controller.spi.UnsupportedPropertyException;
 import org.apache.ambari.server.controller.utilities.PropertyHelper;
 import org.apache.ambari.server.events.ActionFinalReportReceivedEvent;
 import org.apache.ambari.server.events.publishers.AmbariEventPublisher;
-import org.apache.ambari.server.orm.dao.ClusterDAO;
 import org.apache.ambari.server.orm.dao.ClusterVersionDAO;
 import org.apache.ambari.server.orm.dao.HostVersionDAO;
 import org.apache.ambari.server.orm.dao.RepositoryVersionDAO;
-import org.apache.ambari.server.orm.dao.StackDAO;
 import org.apache.ambari.server.orm.entities.ClusterVersionEntity;
 import org.apache.ambari.server.orm.entities.HostVersionEntity;
 import org.apache.ambari.server.orm.entities.OperatingSystemEntity;
@@ -79,6 +77,7 @@ import org.apache.ambari.server.state.ServiceInfo;
 import org.apache.ambari.server.state.ServiceOsSpecific;
 import org.apache.ambari.server.state.StackId;
 import org.apache.ambari.server.utils.StageUtils;
+import org.apache.commons.lang.StringUtils;
 
 import com.google.gson.Gson;
 import com.google.inject.Inject;
@@ -93,17 +92,30 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
 
   // ----- Property ID constants ---------------------------------------------
 
-  protected static final String CLUSTER_STACK_VERSION_ID_PROPERTY_ID                   =
PropertyHelper.getPropertyId("ClusterStackVersions", "id");
-  protected static final String CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID         =
PropertyHelper.getPropertyId("ClusterStackVersions", "cluster_name");
-  protected static final String CLUSTER_STACK_VERSION_STACK_PROPERTY_ID                =
PropertyHelper.getPropertyId("ClusterStackVersions", "stack");
-  protected static final String CLUSTER_STACK_VERSION_VERSION_PROPERTY_ID              =
PropertyHelper.getPropertyId("ClusterStackVersions", "version");
-  protected static final String CLUSTER_STACK_VERSION_STATE_PROPERTY_ID                =
PropertyHelper.getPropertyId("ClusterStackVersions", "state");
-  protected static final String CLUSTER_STACK_VERSION_HOST_STATES_PROPERTY_ID          =
PropertyHelper.getPropertyId("ClusterStackVersions", "host_states");
-  protected static final String CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID   =
PropertyHelper.getPropertyId("ClusterStackVersions", "repository_version");
+  protected static final String CLUSTER_STACK_VERSION_ID_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions",
"id");
+  protected static final String CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions",
"cluster_name");
+  protected static final String CLUSTER_STACK_VERSION_STACK_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions",
"stack");
+  protected static final String CLUSTER_STACK_VERSION_VERSION_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions",
"version");
+  protected static final String CLUSTER_STACK_VERSION_STATE_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions",
"state");
+  protected static final String CLUSTER_STACK_VERSION_HOST_STATES_PROPERTY_ID = PropertyHelper.getPropertyId("ClusterStackVersions",
"host_states");
+  protected static final String CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID  = PropertyHelper.getPropertyId("ClusterStackVersions",
"repository_version");
+  protected static final String CLUSTER_STACK_VERSION_STAGE_SUCCESS_FACTOR  = PropertyHelper.getPropertyId("ClusterStackVersions",
"success_factor");
 
   protected static final String INSTALL_PACKAGES_ACTION = "install_packages";
   protected static final String INSTALL_PACKAGES_FULL_NAME = "Install version";
 
+  /**
+   * The default success factor that will be used when determining if a stage's
+   * failure should cause other stages to abort. Consider a scenario with 1000
+   * hosts, broken up into 10 stages. Each stage would have 100 hosts. If the
+   * success factor was 100%, then any failure in stage 1 woudl cause all 9
+   * other stages to abort. If set to 90%, then 10 hosts would need to fail for
+   * the other stages to abort. This is necessary to prevent the abortion of
+   * stages based on 1 or 2 errant hosts failing in a large cluster's stack
+   * distribution.
+   */
+  private static final float INSTALL_PACKAGES_SUCCESS_FACTOR = 0.85f;
+
   @SuppressWarnings("serial")
   private static Set<String> pkPropertyIds = new HashSet<String>() {
     {
@@ -126,6 +138,7 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
       add(CLUSTER_STACK_VERSION_HOST_STATES_PROPERTY_ID);
       add(CLUSTER_STACK_VERSION_STATE_PROPERTY_ID);
       add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID);
+      add(CLUSTER_STACK_VERSION_STAGE_SUCCESS_FACTOR);
     }
   };
 
@@ -147,9 +160,6 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
   private static HostVersionDAO hostVersionDAO;
 
   @Inject
-  private static StackDAO stackDAO;
-
-  @Inject
   private static RepositoryVersionDAO repositoryVersionDAO;
 
   @Inject
@@ -164,9 +174,6 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
   private static StageFactory stageFactory;
 
   @Inject
-  private static ClusterDAO clusterDAO;
-
-  @Inject
   private static RequestFactory requestFactory;
 
   @Inject
@@ -272,14 +279,14 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
     if (request.getProperties().size() != 1) {
       throw new UnsupportedOperationException("Multiple requests cannot be executed at the
same time.");
     }
+
     Map<String, Object> propertyMap = iterator.next();
 
-    Set<String> requiredProperties = new HashSet<String>(){{
-      add(CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID);
-      add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID);
-      add(CLUSTER_STACK_VERSION_STACK_PROPERTY_ID);
-      add(CLUSTER_STACK_VERSION_VERSION_PROPERTY_ID);
-    }};
+    Set<String> requiredProperties = new HashSet<String>();
+    requiredProperties.add(CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID);
+    requiredProperties.add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID);
+    requiredProperties.add(CLUSTER_STACK_VERSION_STACK_PROPERTY_ID);
+    requiredProperties.add(CLUSTER_STACK_VERSION_VERSION_PROPERTY_ID);
 
     for (String requiredProperty : requiredProperties) {
       if (! propertyMap.containsKey(requiredProperty)) {
@@ -319,13 +326,22 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
       stackId = currentStackVersion;
     }
 
+    // why does the JSON body parser convert JSON primitives into strings!?
+    Float successFactor = INSTALL_PACKAGES_SUCCESS_FACTOR;
+    String successFactorProperty = (String) propertyMap.get(CLUSTER_STACK_VERSION_STAGE_SUCCESS_FACTOR);
+    if (StringUtils.isNotBlank(successFactorProperty)) {
+      successFactor = Float.valueOf(successFactorProperty);
+    }
+
     RepositoryVersionEntity repoVersionEnt = repositoryVersionDAO.findByStackAndVersion(
         stackId, desiredRepoVersion);
+
     if (repoVersionEnt == null) {
       throw new IllegalArgumentException(String.format(
               "Repo version %s is not available for stack %s",
               desiredRepoVersion, stackId));
     }
+
     List<OperatingSystemEntity> operatingSystems = repoVersionEnt.getOperatingSystems();
     Map<String, List<RepositoryEntity>> perOsRepos = new HashMap<String, List<RepositoryEntity>>();
     for (OperatingSystemEntity operatingSystem : operatingSystems) {
@@ -351,27 +367,32 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
     }
 
     ArrayList<Stage> stages = new ArrayList<Stage>(batchCount);
-    for (int batchId = 1; batchId <= batchCount ; batchId++) {
+    for (int batchId = 1; batchId <= batchCount; batchId++) {
       // Create next stage
       String stageName;
       if (batchCount > 1) {
         stageName = INSTALL_PACKAGES_FULL_NAME;
       } else {
-        stageName = String.format(INSTALL_PACKAGES_FULL_NAME +
-                ". Batch %d of %d", batchId, batchCount);
+        stageName = String.format(INSTALL_PACKAGES_FULL_NAME + ". Batch %d of %d", batchId,
+            batchCount);
       }
-      Stage stage = stageFactory.createNew(req.getId(),
-              "/tmp/ambari",
-              cluster.getClusterName(),
-              cluster.getClusterId(),
-              stageName,
-              "{}",
-              "{}",
-              hostParamsJson
-      );
+
+      Stage stage = stageFactory.createNew(req.getId(), "/tmp/ambari", cluster.getClusterName(),
+          cluster.getClusterId(), stageName, "{}", "{}", hostParamsJson);
+
+      // if you have 1000 hosts (10 stages with 100 installs), we want to ensure
+      // that a single failure doesn't cause all other stages to abort; set the
+      // success factor ratio in order to tolerate some failures in a single
+      // stage
+      stage.getSuccessFactors().put(Role.INSTALL_PACKAGES, successFactor);
+
+      // set and increment stage ID
       stage.setStageId(stageId);
-      stages.add(stage);
       stageId++;
+
+      // add the stage that was just created
+      stages.add(stage);
+
       // Populate with commands for host
       for (int i = 0; i < maxTasks && hostsForClusterIter.hasNext(); i++) {
         Host host = hostsForClusterIter.next();
@@ -389,6 +410,7 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
     try {
       ClusterVersionEntity existingCSVer = clusterVersionDAO.findByClusterAndStackAndVersion(
           clName, stackId, desiredRepoVersion);
+
       if (existingCSVer == null) {
         try {
           // Create/persist new cluster stack version
@@ -467,12 +489,11 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
     final String packageList = gson.toJson(packages);
     final String repoList = gson.toJson(repoInfo);
 
-    Map<String, String> params = new HashMap<String, String>() {{
-      put("stack_id", stackId.getStackId());
-      put("repository_version", desiredRepoVersion);
-      put("base_urls", repoList);
-      put("package_list", packageList);
-    }};
+    Map<String, String> params = new HashMap<String, String>();
+    params.put("stack_id", stackId.getStackId());
+    params.put("repository_version", desiredRepoVersion);
+    params.put("base_urls", repoList);
+    params.put("package_list", packageList);
 
     // add host to this stage
     RequestResourceFilter filter = new RequestResourceFilter(null, null,
@@ -571,11 +592,10 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
       }
       Map<String, Object> propertyMap = iterator.next();
 
-      Set<String> requiredProperties = new HashSet<String>() {{
-        add(CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID);
-        add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID);
-        add(CLUSTER_STACK_VERSION_STATE_PROPERTY_ID);
-      }};
+      Set<String> requiredProperties = new HashSet<String>();
+      requiredProperties.add(CLUSTER_STACK_VERSION_CLUSTER_NAME_PROPERTY_ID);
+      requiredProperties.add(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID);
+      requiredProperties.add(CLUSTER_STACK_VERSION_STATE_PROPERTY_ID);
 
       for (String requiredProperty : requiredProperties) {
         if (!propertyMap.containsKey(requiredProperty)) {
@@ -634,7 +654,7 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
       command.setCommandParams(args);
       command.setClusterName(clName);
       finalizeUpgradeAction.setExecutionCommand(command);
-      
+
       HostRoleCommand hostRoleCommand = hostRoleCommandFactory.create(defaultHostName,
               Role.AMBARI_SERVER_ACTION, null, null);
       finalizeUpgradeAction.setHostRoleCommand(hostRoleCommand);

http://git-wip-us.apache.org/repos/asf/ambari/blob/bba67995/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java
b/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java
index a56823b..5b24edc 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java
@@ -31,7 +31,6 @@ import static org.easymock.EasyMock.verify;
 
 import java.lang.reflect.Field;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
@@ -41,6 +40,7 @@ import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
 
+import org.apache.ambari.server.Role;
 import org.apache.ambari.server.actionmanager.ActionManager;
 import org.apache.ambari.server.actionmanager.ExecutionCommandWrapper;
 import org.apache.ambari.server.actionmanager.HostRoleCommand;
@@ -61,7 +61,6 @@ import org.apache.ambari.server.controller.spi.ResourceProvider;
 import org.apache.ambari.server.controller.utilities.PropertyHelper;
 import org.apache.ambari.server.orm.GuiceJpaInitializer;
 import org.apache.ambari.server.orm.InMemoryDefaultTestModule;
-import org.apache.ambari.server.orm.PersistenceType;
 import org.apache.ambari.server.orm.dao.ClusterDAO;
 import org.apache.ambari.server.orm.dao.ClusterVersionDAO;
 import org.apache.ambari.server.orm.dao.HostDAO;
@@ -70,7 +69,6 @@ import org.apache.ambari.server.orm.dao.ResourceTypeDAO;
 import org.apache.ambari.server.orm.dao.StackDAO;
 import org.apache.ambari.server.orm.entities.ClusterEntity;
 import org.apache.ambari.server.orm.entities.ClusterVersionEntity;
-import org.apache.ambari.server.orm.entities.HostEntity;
 import org.apache.ambari.server.orm.entities.RepositoryVersionEntity;
 import org.apache.ambari.server.orm.entities.ResourceEntity;
 import org.apache.ambari.server.orm.entities.ResourceTypeEntity;
@@ -266,6 +264,9 @@ public class ClusterStackVersionResourceProviderTest {
     expect(stage.getExecutionCommandWrapper(anyObject(String.class), anyObject(String.class))).
             andReturn(executionCommandWrapper).anyTimes();
 
+    Map<Role, Float> successFactors = new HashMap<>();
+    expect(stage.getSuccessFactors()).andReturn(successFactors).atLeastOnce();
+
     // Check that we create proper stage count
     expect(stageFactory.createNew(anyLong(), anyObject(String.class),
             anyObject(String.class), anyLong(),
@@ -314,18 +315,18 @@ public class ClusterStackVersionResourceProviderTest {
 
     propertySet.add(properties);
 
-
-
-
-
     // create the request
     Request request = PropertyHelper.getCreateRequest(propertySet, null);
 
     RequestStatus status = provider.createResources(request);
+    Assert.assertNotNull(status);
 
     // verify
-    verify(managementController, response, clusters, stageFactory);
+    verify(managementController, response, clusters, stageFactory, stage);
 
+    // check that the success factor was populated in the stage
+    Float successFactor = successFactors.get(Role.INSTALL_PACKAGES);
+    Assert.assertEquals(Float.valueOf(0.85f), successFactor);
   }
 
 


Mime
View raw message