flink-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (FLINK-7076) Implement container release to support dynamic scaling
Date Fri, 03 Nov 2017 15:28:07 GMT

    [ https://issues.apache.org/jira/browse/FLINK-7076?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16237751#comment-16237751
] 

ASF GitHub Bot commented on FLINK-7076:
---------------------------------------

Github user tillrohrmann commented on a diff in the pull request:

    https://github.com/apache/flink/pull/4729#discussion_r148811288
  
    --- Diff: flink-yarn/src/test/java/org/apache/flink/yarn/YarnResourceManagerTest.java
---
    @@ -0,0 +1,354 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.flink.yarn;
    +
    +import org.apache.flink.api.common.JobID;
    +import org.apache.flink.api.common.time.Time;
    +import org.apache.flink.configuration.Configuration;
    +import org.apache.flink.configuration.ResourceManagerOptions;
    +import org.apache.flink.runtime.clusterframework.types.AllocationID;
    +import org.apache.flink.runtime.clusterframework.types.ResourceID;
    +import org.apache.flink.runtime.clusterframework.types.ResourceProfile;
    +import org.apache.flink.runtime.concurrent.ScheduledExecutor;
    +import org.apache.flink.runtime.concurrent.ScheduledExecutorServiceAdapter;
    +import org.apache.flink.runtime.heartbeat.HeartbeatServices;
    +import org.apache.flink.runtime.heartbeat.TestingHeartbeatServices;
    +import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
    +import org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices;
    +import org.apache.flink.runtime.leaderelection.TestingLeaderElectionService;
    +import org.apache.flink.runtime.metrics.MetricRegistry;
    +import org.apache.flink.runtime.registration.RegistrationResponse;
    +import org.apache.flink.runtime.resourcemanager.JobLeaderIdService;
    +import org.apache.flink.runtime.resourcemanager.ResourceManagerConfiguration;
    +import org.apache.flink.runtime.resourcemanager.SlotRequest;
    +import org.apache.flink.runtime.resourcemanager.slotmanager.SlotManager;
    +import org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerException;
    +import org.apache.flink.runtime.rpc.FatalErrorHandler;
    +import org.apache.flink.runtime.rpc.RpcService;
    +import org.apache.flink.runtime.rpc.TestingRpcService;
    +import org.apache.flink.runtime.taskexecutor.SlotReport;
    +import org.apache.flink.runtime.taskexecutor.TaskExecutorGateway;
    +import org.apache.flink.runtime.testutils.DirectScheduledExecutorService;
    +import org.apache.flink.runtime.util.TestingFatalErrorHandler;
    +
    +import org.apache.flink.util.TestLogger;
    +
    +import org.apache.flink.shaded.guava18.com.google.common.collect.ImmutableList;
    +
    +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
    +import org.apache.hadoop.yarn.api.records.ApplicationId;
    +import org.apache.hadoop.yarn.api.records.Container;
    +import org.apache.hadoop.yarn.api.records.ContainerId;
    +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
    +import org.apache.hadoop.yarn.api.records.NodeId;
    +import org.apache.hadoop.yarn.api.records.Priority;
    +import org.apache.hadoop.yarn.api.records.Resource;
    +import org.apache.hadoop.yarn.client.api.AMRMClient;
    +import org.apache.hadoop.yarn.client.api.NMClient;
    +import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
    +import org.junit.After;
    +import org.junit.Before;
    +import org.junit.Rule;
    +import org.junit.Test;
    +import org.junit.rules.TemporaryFolder;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import java.io.File;
    +import java.util.HashMap;
    +import java.util.Map;
    +import java.util.UUID;
    +import java.util.concurrent.TimeUnit;
    +
    +import static org.apache.flink.yarn.YarnConfigKeys.ENV_APP_ID;
    +import static org.apache.flink.yarn.YarnConfigKeys.ENV_CLIENT_HOME_DIR;
    +import static org.apache.flink.yarn.YarnConfigKeys.ENV_CLIENT_SHIP_FILES;
    +import static org.apache.flink.yarn.YarnConfigKeys.ENV_FLINK_CLASSPATH;
    +import static org.apache.flink.yarn.YarnConfigKeys.ENV_HADOOP_USER_NAME;
    +import static org.apache.flink.yarn.YarnConfigKeys.FLINK_JAR_PATH;
    +import static org.junit.Assert.assertTrue;
    +import static org.mockito.Matchers.any;
    +import static org.mockito.Matchers.eq;
    +import static org.mockito.Mockito.mock;
    +import static org.mockito.Mockito.verify;
    +
    +/**
    + * General tests for the YARN resource manager component.
    + */
    +public class YarnResourceManagerTest extends TestLogger {
    +
    +	private static final Logger LOG = LoggerFactory.getLogger(YarnResourceManagerTest.class);
    +
    +	private static Configuration flinkConfig = new Configuration();
    +
    +	private static Map<String, String> env = new HashMap<>();
    +
    +	private static final Time timeout = Time.seconds(10L);
    +
    +	@Rule
    +	public TemporaryFolder folder = new TemporaryFolder();
    +
    +	@Before
    +	public void setup() {
    +		flinkConfig.setInteger(ResourceManagerOptions.CONTAINERIZED_HEAP_CUTOFF_MIN, 100);
    +		File root = folder.getRoot();
    +		File home = new File(root, "home");
    +		boolean created = home.mkdir();
    +		assertTrue(created);
    +
    +		env.put(ENV_APP_ID, "foo");
    +		env.put(ENV_CLIENT_HOME_DIR, home.getAbsolutePath());
    +		env.put(ENV_CLIENT_SHIP_FILES, "");
    +		env.put(ENV_FLINK_CLASSPATH, "");
    +		env.put(ENV_HADOOP_USER_NAME, "foo");
    +		env.put(FLINK_JAR_PATH, root.toURI().toString());
    +	}
    +
    +	@After
    +	public void teardown() {
    +		env.clear();
    +	}
    +
    +	static class TestingYarnResourceManager extends YarnResourceManager {
    +		public AMRMClientAsync<AMRMClient.ContainerRequest> mockResourceManagerClient;
    +		public NMClient mockNMClient;
    +
    +		public TestingYarnResourceManager(
    +				RpcService rpcService,
    +				String resourceManagerEndpointId,
    +				ResourceID resourceId,
    +				Configuration flinkConfig,
    +				Map<String, String> env,
    +				ResourceManagerConfiguration resourceManagerConfiguration,
    +				HighAvailabilityServices highAvailabilityServices,
    +				HeartbeatServices heartbeatServices,
    +				SlotManager slotManager,
    +				MetricRegistry metricRegistry,
    +				JobLeaderIdService jobLeaderIdService,
    +				FatalErrorHandler fatalErrorHandler,
    +				AMRMClientAsync<AMRMClient.ContainerRequest> mockResourceManagerClient,
    +				NMClient mockNMClient) {
    +			super(rpcService, resourceManagerEndpointId, resourceId, flinkConfig, env,
    +					resourceManagerConfiguration, highAvailabilityServices, heartbeatServices,
    +					slotManager, metricRegistry, jobLeaderIdService, fatalErrorHandler);
    +			this.mockNMClient = mockNMClient;
    +			this.mockResourceManagerClient = mockResourceManagerClient;
    +		}
    +
    +		public void runInMainThread(Runnable runnable) {
    +			super.getMainThreadExecutor().execute(runnable);
    +		}
    +
    +		@Override
    +		protected AMRMClientAsync<AMRMClient.ContainerRequest> createAndStartResourceManagerClient()
{
    +			return mockResourceManagerClient;
    +		}
    +
    +		@Override
    +		protected NMClient createAndStartNodeManagerClient() {
    +			return mockNMClient;
    +		}
    +	}
    +
    +	static class Context {
    +
    +		// services
    +		TestingRpcService rpcService;
    +		TestingFatalErrorHandler fatalErrorHandler;
    +		MockResourceManagerRuntimeServices rmServices;
    +
    +		// RM
    +		ResourceManagerConfiguration rmConfiguration;
    +		ResourceID rmResourceID;
    +		static final String RM_ADDRESS = "resourceManager";
    +		TestingYarnResourceManager resourceManager;
    +
    +		// domain objects for test purposes
    +		final ResourceProfile resourceProfile1 = new ResourceProfile(1.0, 200);
    +
    +		public ContainerId task = ContainerId.newInstance(
    +				ApplicationAttemptId.newInstance(ApplicationId.newInstance(1L, 0), 0), 1);
    +		public String taskHost = "host1";
    +
    +		SlotReport slotReport = new SlotReport();
    +
    +		public NMClient mockNMClient = mock(NMClient.class);
    +		public AMRMClientAsync<AMRMClient.ContainerRequest> mockResourceManagerClient
=
    +				mock(AMRMClientAsync.class);
    +
    +		/**
    +		 * Create mock RM dependencies.
    +		 */
    +		Context() throws Exception {
    +			rpcService = new TestingRpcService();
    +			fatalErrorHandler = new TestingFatalErrorHandler();
    +			rmServices = new MockResourceManagerRuntimeServices();
    +
    +			// resource manager
    +			rmConfiguration = new ResourceManagerConfiguration(
    +					Time.seconds(5L),
    +					Time.seconds(5L));
    +			rmResourceID = ResourceID.generate();
    +			resourceManager =
    +					new TestingYarnResourceManager(
    +							rpcService,
    +							RM_ADDRESS,
    +							rmResourceID,
    +							flinkConfig,
    +							env,
    +							rmConfiguration,
    +							rmServices.highAvailabilityServices,
    +							rmServices.heartbeatServices,
    +							rmServices.slotManager,
    +							rmServices.metricRegistry,
    +							rmServices.jobLeaderIdService,
    +							fatalErrorHandler,
    +							mockResourceManagerClient,
    +							mockNMClient
    +					);
    +		}
    +
    +		/**
    +		 * Mock services needed by the resource manager.
    +		 */
    +		class MockResourceManagerRuntimeServices {
    +
    +			public final ScheduledExecutor scheduledExecutor;
    +			public final TestingHighAvailabilityServices highAvailabilityServices;
    +			public final HeartbeatServices heartbeatServices;
    +			public final MetricRegistry metricRegistry;
    +			public final TestingLeaderElectionService rmLeaderElectionService;
    +			public final JobLeaderIdService jobLeaderIdService;
    +			public final SlotManager slotManager;
    +
    +			public UUID rmLeaderSessionId;
    +
    +			MockResourceManagerRuntimeServices() throws Exception {
    +				scheduledExecutor = mock(ScheduledExecutor.class);
    +				highAvailabilityServices = new TestingHighAvailabilityServices();
    +				rmLeaderElectionService = new TestingLeaderElectionService();
    +				highAvailabilityServices.setResourceManagerLeaderElectionService(rmLeaderElectionService);
    +				heartbeatServices = new TestingHeartbeatServices(5L, 5L, scheduledExecutor);
    +				metricRegistry = mock(MetricRegistry.class);
    --- End diff --
    
    here we could use the `NoOpMetricRegistry`.


> Implement container release to support dynamic scaling
> ------------------------------------------------------
>
>                 Key: FLINK-7076
>                 URL: https://issues.apache.org/jira/browse/FLINK-7076
>             Project: Flink
>          Issue Type: Sub-task
>          Components: ResourceManager
>            Reporter: Till Rohrmann
>            Assignee: Shuyi Chen
>            Priority: Major
>              Labels: flip-6
>
> In order to support dynamic scaling, the {{YarnResourceManager}} has to be able to dynamically
free containers. We have to implement the {{YarnResourceManager#stopWorker}} method.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Mime
View raw message