hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gtcarre...@apache.org
Subject [45/50] [abbrv] hadoop git commit: YARN-3995. Some of the NM events are not getting published due race condition when AM container finishes in NM (Naganarasimha G R via sjlee)
Date Wed, 20 Jan 2016 09:14:02 GMT
YARN-3995. Some of the NM events are not getting published due race condition when AM container
finishes in NM (Naganarasimha G R via sjlee)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5157c306
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5157c306
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5157c306

Branch: refs/heads/feature-YARN-2928
Commit: 5157c306d8915e0cf07921b0f713508848ffe451
Parents: 0352b97
Author: Sangjin Lee <sjlee@apache.org>
Authored: Mon Jan 11 10:09:34 2016 -0800
Committer: Li Lu <gtcarrera9@apache.org>
Committed: Tue Jan 19 18:03:31 2016 -0800

----------------------------------------------------------------------
 hadoop-yarn-project/CHANGES.txt                 |  3 +++
 .../hadoop/yarn/conf/YarnConfiguration.java     |  5 ++++
 .../src/main/resources/yarn-default.xml         |  7 ++++++
 .../PerNodeTimelineCollectorsAuxService.java    | 25 +++++++++++++-------
 ...TestPerNodeTimelineCollectorsAuxService.java | 11 +++++----
 5 files changed, 38 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/5157c306/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 5bac262..7827d77 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -208,6 +208,9 @@ Branch YARN-2928: Timeline Server Next Generation: Phase 1
     YARN-4350. TestDistributedShell fails for V2 scenarios. (Naganarasimha G R
     via varunsaxena)
 
+    YARN-3995. Some of the NM events are not getting published due race
+    condition when AM container finishes in NM (Naganarasimha G R via sjlee)
+
 Trunk - Unreleased
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5157c306/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 61b37c6..6a12d8f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1756,6 +1756,11 @@ public class YarnConfiguration extends Configuration {
   public static final int
       DEFAULT_TIMELINE_SERVICE_WRITER_FLUSH_INTERVAL_SECONDS = 60;
 
+  public static final String ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS =
+      TIMELINE_SERVICE_PREFIX + "app-collector.linger-period.ms";
+
+  public static final int DEFAULT_ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS = 1000;
+
   // mark app-history related configs @Private as application history is going
   // to be integrated into the timeline service
   @Private

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5157c306/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 077fb5d..b521599 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -2074,6 +2074,13 @@
     <value>60</value>
   </property>
 
+  <property>
+    <description>Time period till which the application collector will be alive
+     in NM, after the  application master container finishes.</description>
+    <name>yarn.timeline-service.app-collector.linger-period.ms</name>
+    <value>1000</value>
+  </property>
+
   <!--  Shared Cache Configuration -->
 
   <property>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5157c306/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java
index 0319e34..b738530 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java
@@ -19,6 +19,9 @@
 package org.apache.hadoop.yarn.server.timelineservice.collector;
 
 import java.nio.ByteBuffer;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -54,6 +57,8 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService
{
   private static final int SHUTDOWN_HOOK_PRIORITY = 30;
 
   private final NodeTimelineCollectorManager collectorManager;
+  private long collectorLingerPeriod;
+  private ScheduledExecutorService scheduler;
 
   public PerNodeTimelineCollectorsAuxService() {
     this(new NodeTimelineCollectorManager());
@@ -70,6 +75,10 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService
{
     if (!YarnConfiguration.timelineServiceV2Enabled(conf)) {
       throw new YarnException("Timeline service v2 is not enabled");
     }
+    collectorLingerPeriod =
+        conf.getLong(YarnConfiguration.ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS,
+            YarnConfiguration.DEFAULT_ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS);
+    scheduler = Executors.newSingleThreadScheduledExecutor();
     collectorManager.init(conf);
     super.serviceInit(conf);
   }
@@ -82,6 +91,12 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService
{
 
   @Override
   protected void serviceStop() throws Exception {
+    scheduler.shutdown();
+    if (!scheduler.awaitTermination(collectorLingerPeriod,
+        TimeUnit.MILLISECONDS)) {
+      LOG.warn(
+          "Scheduler terminated before removing the application collectors");
+    }
     collectorManager.stop();
     super.serviceStop();
   }
@@ -141,17 +156,11 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService
{
     if (context.getContainerType() == ContainerType.APPLICATION_MASTER) {
       final ApplicationId appId =
           context.getContainerId().getApplicationAttemptId().getApplicationId();
-      new Thread(new Runnable() {
+      scheduler.schedule(new Runnable() {
         public void run() {
-          try {
-            // TODO Temporary Fix until solution for YARN-3995 is finalized.
-            Thread.sleep(1000l);
-          } catch (InterruptedException e) {
-            e.printStackTrace();
-          }
           removeApplication(appId);
         }
-      }).start();
+      }, collectorLingerPeriod, TimeUnit.MILLISECONDS);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5157c306/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java
index 4fdf47e..f2775d5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/collector/TestPerNodeTimelineCollectorsAuxService.java
@@ -22,12 +22,14 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
-import static org.mockito.Mockito.any;
+import static org.mockito.Matchers.any;
 import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.when;
 
+import java.io.IOException;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.ExitUtil;
 import org.apache.hadoop.util.Shell;
@@ -45,8 +47,6 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.GetTimelineCollectorCon
 import org.junit.After;
 import org.junit.Test;
 
-import java.io.IOException;
-
 public class TestPerNodeTimelineCollectorsAuxService {
   private ApplicationAttemptId appAttemptId;
   private PerNodeTimelineCollectorsAuxService auxService;
@@ -103,8 +103,9 @@ public class TestPerNodeTimelineCollectorsAuxService {
     when(context.getContainerType()).thenReturn(
         ContainerType.APPLICATION_MASTER);
     auxService.stopContainer(context);
-
-    // TODO Temporary Fix until solution for YARN-3995 is finalized
+    // auxService should have the app's collector and need to remove only after
+    // a configured period
+    assertTrue(auxService.hasApplication(appAttemptId.getApplicationId()));
     for (int i = 0; i < 4; i++) {
       Thread.sleep(500l);
       if (!auxService.hasApplication(appAttemptId.getApplicationId())) {


Mime
View raw message