reef-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (REEF-2025) A new module containing the new Java bridge
Date Thu, 07 Jun 2018 23:34:03 GMT

    [ https://issues.apache.org/jira/browse/REEF-2025?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16505499#comment-16505499
] 

ASF GitHub Bot commented on REEF-2025:
--------------------------------------

motus commented on a change in pull request #1466: [REEF-2025] A new module containing the
new Java bridge
URL: https://github.com/apache/reef/pull/1466#discussion_r193917609
 
 

 ##########
 File path: lang/java/reef-bridge-proto-java/src/main/java/org/apache/reef/bridge/driver/service/grpc/GRPCDriverService.java
 ##########
 @@ -0,0 +1,1084 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.reef.bridge.driver.service.grpc;
+
+import com.google.protobuf.ByteString;
+import io.grpc.*;
+import io.grpc.stub.StreamObserver;
+import org.apache.commons.lang.StringUtils;
+import org.apache.reef.bridge.driver.service.DriverClientException;
+import org.apache.reef.bridge.driver.service.IDriverService;
+import org.apache.reef.bridge.service.parameters.DriverClientCommand;
+import org.apache.reef.bridge.proto.*;
+import org.apache.reef.bridge.proto.Void;
+import org.apache.reef.driver.context.ActiveContext;
+import org.apache.reef.driver.context.ClosedContext;
+import org.apache.reef.driver.context.ContextMessage;
+import org.apache.reef.driver.context.FailedContext;
+import org.apache.reef.driver.evaluator.*;
+import org.apache.reef.driver.restart.DriverRestartCompleted;
+import org.apache.reef.driver.restart.DriverRestarted;
+import org.apache.reef.driver.task.*;
+import org.apache.reef.runtime.common.driver.context.EvaluatorContext;
+import org.apache.reef.runtime.common.driver.evaluator.AllocatedEvaluatorImpl;
+import org.apache.reef.runtime.common.driver.idle.IdleMessage;
+import org.apache.reef.runtime.common.utils.ExceptionCodec;
+import org.apache.reef.tang.annotations.Parameter;
+import org.apache.reef.tang.formats.ConfigurationSerializer;
+import org.apache.reef.util.OSUtils;
+import org.apache.reef.util.Optional;
+import org.apache.reef.wake.EventHandler;
+import org.apache.reef.wake.remote.ports.TcpPortProvider;
+import org.apache.reef.wake.time.Clock;
+import org.apache.reef.wake.time.event.Alarm;
+import org.apache.reef.wake.time.event.StartTime;
+import org.apache.reef.wake.time.event.StopTime;
+
+import javax.inject.Inject;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * GRPC DriverBridgeService that interacts with higher-level languages.
+ */
+public final class GRPCDriverService implements IDriverService {
+  private static final Logger LOG = Logger.getLogger(GRPCDriverService.class.getName());
+
+  private static final Void VOID = Void.newBuilder().build();
+
+  private Process driverProcess;
+
+  private enum StreamType { STDOUT, STDERR }
+
+  private Server server;
+
+  private DriverClientGrpc.DriverClientFutureStub clientStub;
+
+  private final Clock clock;
+
+  private final ExceptionCodec exceptionCodec;
+
+  private final ConfigurationSerializer configurationSerializer;
+
+  private final EvaluatorRequestor evaluatorRequestor;
+
+  private final JVMProcessFactory jvmProcessFactory;
+
+  private final CLRProcessFactory clrProcessFactory;
+
+  private final TcpPortProvider tcpPortProvider;
+
+  private final String driverClientCommand;
+
+  private final Map<String, AllocatedEvaluator> allocatedEvaluatorMap = new HashMap<>();
+
+  private final Map<String, ActiveContext> activeContextMap = new HashMap<>();
+
+  private final Map<String, RunningTask> runningTaskMap = new HashMap<>();
+
+  private boolean stopped = false;
+
+  @Inject
+  private GRPCDriverService(
+      final Clock clock,
+      final EvaluatorRequestor evaluatorRequestor,
+      final ConfigurationSerializer configurationSerializer,
+      final JVMProcessFactory jvmProcessFactory,
+      final CLRProcessFactory clrProcessFactory,
+      final TcpPortProvider tcpPortProvider,
+      final ExceptionCodec exceptionCodec,
+      @Parameter(DriverClientCommand.class) final String driverClientCommand) {
+    this.clock = clock;
+    this.exceptionCodec = exceptionCodec;
+    this.configurationSerializer = configurationSerializer;
+    this.jvmProcessFactory = jvmProcessFactory;
+    this.clrProcessFactory = clrProcessFactory;
+    this.evaluatorRequestor = evaluatorRequestor;
+    this.driverClientCommand = driverClientCommand;
+    this.tcpPortProvider = tcpPortProvider;
+  }
+
+  private void start() throws IOException, InterruptedException {
+    for (final Integer port : this.tcpPortProvider) {
+      try {
+        this.server = ServerBuilder.forPort(port)
+            .addService(new DriverBridgeServiceImpl())
+            .build()
+            .start();
+        LOG.info("Server started, listening on " + port);
+        break;
+      } catch (IOException e) {
+        LOG.log(Level.WARNING, "Unable to bind to port [{0}]", port);
+      }
+    }
+    if (this.server == null || this.server.isTerminated()) {
+      throw new IOException("Unable to start gRPC server");
+    } else {
+      final String cmd = this.driverClientCommand + " " + this.server.getPort();
+      final List<String> cmdOs = OSUtils.isWindows() ?
+          Arrays.asList("cmd.exe", "/c", cmd) : Arrays.asList("/bin/sh", "-c", cmd);
+      LOG.log(Level.INFO, "CMD: " + cmdOs);
+      this.driverProcess = new ProcessBuilder()
+          .command(cmdOs)
+          .redirectError(new File("driverclient.stderr"))
+          .redirectOutput(new File("driverclient.stdout"))
+          .directory(new File(System.getProperty("user.dir")))
+          .start();
+      synchronized (this) {
+        int attempts = 10; // give some time
+        // wait for driver client process to register
+        while (attempts-- > 0 && this.clientStub == null && driverProcessIsAlive())
{
+          LOG.log(Level.INFO, "waiting for driver process to register");
+          this.wait(1000); // a second
+        }
+      }
+      if (driverProcessIsAlive()) {
+        Thread closeChildThread = new Thread() {
+          public void run() {
+            synchronized (GRPCDriverService.this) {
+              if (GRPCDriverService.this.driverProcess != null) {
+                GRPCDriverService.this.driverProcess.destroy();
+                GRPCDriverService.this.driverProcess = null;
+              }
+            }
+          }
+        };
+        Runtime.getRuntime().addShutdownHook(closeChildThread);
+      }
+    }
+  }
+
+  private void stop() {
+    stop(null);
+  }
+
+  private void stop(final Throwable t) {
+    LOG.log(Level.INFO, "STOP: gRPC Driver Service", t);
+    if (!stopped) {
+      try {
+        if (!clock.isClosed()) {
+          if (t != null) {
+            clock.stop(t);
+          } else {
+            clock.stop();
+          }
+        }
+        if (server != null) {
+          LOG.log(Level.INFO, "Shutdown gRPC");
+          this.server.shutdown();
+          this.server = null;
+        }
+        if (this.driverProcess != null) {
+          LOG.log(Level.INFO, "shutdown driver process");
+          dump();
+          this.driverProcess.destroy();
+          this.driverProcess = null;
+        }
+      } finally {
+        LOG.log(Level.INFO, "COMPLETED STOP: gRPC Driver Service");
+        stopped = true;
+      }
+    }
+  }
+
+  private void dump() {
+    if (!driverProcessIsAlive()) {
+      LOG.log(Level.INFO, "Exit code: " + this.driverProcess.exitValue());
+    }
+    dumpStream(StreamType.STDOUT);
+    dumpStream(StreamType.STDERR);
+  }
+
+  private void dumpStream(final StreamType type) {
+    StringBuffer buffer = new StringBuffer();
+
+    String name = "";
+    InputStream stream = null;
+    switch(type) {
+    case STDOUT:
+      name = "stdout";
+      stream = this.driverProcess.getInputStream();
+      break;
+    case STDERR:
+      name = "stderr";
+      stream = this.driverProcess.getErrorStream();
+      break;
+    default:
+      LOG.log(Level.WARNING, "Invalid stream type value");
+    }
+
+    LOG.log(Level.INFO, "capturing driver process " + name);
+    try {
+      int nextChar;
+      buffer.append("\n==============================================\n");
+      while ((nextChar = stream.read()) != -1) {
+        buffer.append((char) nextChar);
+      }
+      buffer.append("\n==============================================\n");
+    } catch (IOException e) {
+      LOG.log(Level.WARNING, "Error while capturing output stream: " + e.getMessage());
+    }
+    LOG.log(Level.INFO, buffer.toString());
+  }
+
+  /**
+   * Determines if the driver process is still alive by
+   * testing for its exit value, which throws {@link IllegalThreadStateException}
+   * if process is still running.
+   * @return true if driver process is alive, false otherwise
+   */
+  private boolean driverProcessIsAlive() {
+    if (this.driverProcess != null) {
+      try {
+        this.driverProcess.exitValue();
+      } catch (IllegalThreadStateException e) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  private EvaluatorDescriptorInfo toEvaluatorDescriptorInfo(final EvaluatorDescriptor descriptor)
{
+    if (descriptor == null) {
+      return null;
+    } else {
+      return EvaluatorDescriptorInfo.newBuilder()
+          .setCores(descriptor.getNumberOfCores())
+          .setMemory(descriptor.getMemory())
+          .setRuntimeName(descriptor.getRuntimeName())
+          .build();
+    }
+  }
+
+  @Override
+  public IdleMessage getIdleStatus() {
+    final String componentName = "Java Bridge DriverService";
+    if (this.clientStub != null) {
+      try {
+        final IdleStatus idleStatus = this.clientStub.idlenessCheckHandler(VOID).get();
+        LOG.log(Level.INFO, "is idle: " + idleStatus.getIsIdle());
+        return new IdleMessage(
+            componentName,
+            idleStatus.getReason(),
+            idleStatus.getIsIdle());
+      } catch (ExecutionException | InterruptedException e) {
+        stop(e);
+      }
+    }
+    return new IdleMessage(
+        componentName,
+        "stub not initialized",
+        true);
+  }
+
+  @Override
+  public void startHandler(final StartTime startTime) {
+    try {
+      start();
+      synchronized (this) {
+        if (this.clientStub != null) {
+          this.clientStub.startHandler(
+              StartTimeInfo.newBuilder().setStartTime(startTime.getTimestamp()).build());
+        } else {
+          stop(new IllegalStateException("Unable to start driver client"));
+        }
+      }
+    } catch (IOException | InterruptedException e) {
+      stop(e);
+    }
+  }
+
+  @Override
+  public void stopHandler(final StopTime stopTime) {
+    synchronized (this) {
+      if (clientStub != null) {
+        final Future<ExceptionInfo> callCompletion = this.clientStub.stopHandler(
+            StopTimeInfo.newBuilder().setStopTime(stopTime.getTimestamp()).build());
+        try {
+          try {
+            final ExceptionInfo error = callCompletion.get(5L, TimeUnit.MINUTES);
+            if (!error.getNoError()) {
+              final Optional<Throwable> t = parseException(error);
+              if (t.isPresent()) {
+                throw new RuntimeException("driver stop exception",
+                    t.get().getCause() != null ? t.get().getCause() : t.get());
+              } else {
+                throw new RuntimeException(error.getMessage() != null ? error.getMessage()
: error.getName());
+              }
+            }
+          } catch (TimeoutException e) {
+            throw new RuntimeException("stop handler timed out", e);
+          }
+        } catch (InterruptedException | ExecutionException e) {
+          throw new RuntimeException(e);
+        } finally {
+          stop();
+        }
+      }
+    }
+  }
+
+  @Override
+  public void allocatedEvaluatorHandler(final AllocatedEvaluator eval) {
+    synchronized (this) {
+      this.allocatedEvaluatorMap.put(eval.getId(), eval);
+      this.clientStub.allocatedEvaluatorHandler(
+          EvaluatorInfo.newBuilder()
+              .setEvaluatorId(eval.getId())
+              .setDescriptorInfo(toEvaluatorDescriptorInfo(eval.getEvaluatorDescriptor()))
+              .build());
+    }
+  }
+
+  @Override
+  public void completedEvaluatorHandler(final CompletedEvaluator eval) {
+    synchronized (this) {
+      this.allocatedEvaluatorMap.remove(eval.getId());
+      this.clientStub.completedEvaluatorHandler(
+          EvaluatorInfo.newBuilder().setEvaluatorId(eval.getId()).build());
+    }
+  }
+
+  @Override
+  public void failedEvaluatorHandler(final FailedEvaluator eval) {
+    synchronized (this) {
+      this.allocatedEvaluatorMap.remove(eval.getId());
+      this.clientStub.failedEvaluatorHandler(
+          EvaluatorInfo.newBuilder().setEvaluatorId(eval.getId()).build());
+    }
+  }
+
+  @Override
+  public void activeContextHandler(final ActiveContext context) {
+    synchronized (this) {
+      this.activeContextMap.put(context.getId(), context);
+      this.clientStub.activeContextHandler(
+          ContextInfo.newBuilder()
+              .setContextId(context.getId())
+              .setEvaluatorId(context.getEvaluatorId())
+              .setParentId(
+                  context.getParentId().isPresent() ?
+                      context.getParentId().get() : "")
+              .setEvaluatorDescriptorInfo(toEvaluatorDescriptorInfo(
+                  context.getEvaluatorDescriptor()))
+              .build());
+    }
+  }
+
+  @Override
+  public void closedContextHandler(final ClosedContext context) {
+    synchronized (this) {
+      this.activeContextMap.remove(context.getId());
+      this.clientStub.closedContextHandler(
+          ContextInfo.newBuilder()
+              .setContextId(context.getId())
+              .setEvaluatorId(context.getEvaluatorId())
+              .setParentId(context.getParentContext().getId())
+              .setEvaluatorDescriptorInfo(toEvaluatorDescriptorInfo(
+                  context.getEvaluatorDescriptor()))
+              .build());
+    }
+  }
+
+  @Override
+  public void failedContextHandler(final FailedContext context) {
+    synchronized (this) {
+      final ContextInfo.Builder contextInfoBuilder =
+          ContextInfo.newBuilder()
+              .setContextId(context.getId())
+              .setEvaluatorId(context.getEvaluatorId())
+              .setParentId(
+                  context.getParentContext().isPresent() ?
+                      context.getParentContext().get().getId() : "")
+              .setEvaluatorDescriptorInfo(toEvaluatorDescriptorInfo(
+                  context.getEvaluatorDescriptor()));
+      if (context.getReason().isPresent()) {
+        final Throwable reason = context.getReason().get();
+        contextInfoBuilder.setException(ExceptionInfo.newBuilder()
+            .setName(reason.toString())
+            .setMessage(context.getMessage() != null ? context.getMessage() : "")
+            .setData(ByteString.copyFrom(exceptionCodec.toBytes(reason)))
+            .build());
+      } else if (context.getData().isPresent()) {
+        contextInfoBuilder.setException(ExceptionInfo.newBuilder()
+            .setName(context.toString())
+            .setMessage(context.getDescription().isPresent() ?
+                context.getDescription().get() :
+                context.getMessage() != null ? context.getMessage() : "")
+            .setData(ByteString.copyFrom(context.getData().get()))
+            .build());
+      } else {
+        final Throwable reason = context.asError();
+        contextInfoBuilder.setException(ExceptionInfo.newBuilder()
+            .setName(reason.toString())
+            .setMessage(context.getMessage() != null ? context.getMessage() : "")
+            .setData(ByteString.copyFrom(exceptionCodec.toBytes(reason)))
+            .build());
+
+      }
+      this.activeContextMap.remove(context.getId());
+      this.clientStub.failedContextHandler(contextInfoBuilder.build());
+    }
+  }
+
+  @Override
+  public void contextMessageHandler(final ContextMessage message) {
+    synchronized (this) {
+      this.clientStub.contextMessageHandler(
+          ContextMessageInfo.newBuilder()
+              .setContextId(message.getId())
+              .setMessageSourceId(message.getMessageSourceID())
+              .setSequenceNumber(message.getSequenceNumber())
+              .setPayload(ByteString.copyFrom(message.get()))
+              .build());
+    }
+  }
+
+  @Override
+  public void runningTaskHandler(final RunningTask task) {
+    synchronized (this) {
+      final ActiveContext context = task.getActiveContext();
+      if (!this.activeContextMap.containsKey(context.getId())) {
+        this.activeContextMap.put(context.getId(), context);
+      }
+      this.runningTaskMap.put(task.getId(), task);
+      this.clientStub.runningTaskHandler(
+          TaskInfo.newBuilder()
+              .setTaskId(task.getId())
+              .setContext(ContextInfo.newBuilder()
+                  .setContextId(context.getId())
+                  .setEvaluatorId(context.getEvaluatorId())
+                  .setParentId(context.getParentId().isPresent() ? context.getParentId().get()
: "")
+                  .setEvaluatorDescriptorInfo(toEvaluatorDescriptorInfo(
+                      task.getActiveContext().getEvaluatorDescriptor()))
+                  .build())
+              .build());
+    }
+  }
+
+  @Override
+  public void failedTaskHandler(final FailedTask task) {
+    synchronized (this) {
+      if (task.getActiveContext().isPresent() &&
+          !this.activeContextMap.containsKey(task.getActiveContext().get().getId())) {
+        this.activeContextMap.put(task.getActiveContext().get().getId(), task.getActiveContext().get());
+      }
+      final TaskInfo.Builder taskInfoBuilder = TaskInfo.newBuilder()
+          .setTaskId(task.getId());
+      if (task.getActiveContext().isPresent()) {
+        taskInfoBuilder.setContext(ContextInfo.newBuilder()
+            .setContextId(task.getActiveContext().get().getId())
+            .setEvaluatorId(task.getActiveContext().get().getEvaluatorId())
+            .setParentId(task.getActiveContext().get().getParentId().isPresent() ?
+                task.getActiveContext().get().getParentId().get() : "")
+            .build());
+      }
+      if (task.getReason().isPresent()) {
+        final Throwable reason = task.getReason().get();
+        taskInfoBuilder.setException(ExceptionInfo.newBuilder()
 
 Review comment:
   use `GRPCUtils.createExceptionInfo()`

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


> A new module containing the new Java bridge
> -------------------------------------------
>
>                 Key: REEF-2025
>                 URL: https://issues.apache.org/jira/browse/REEF-2025
>             Project: REEF
>          Issue Type: Sub-task
>          Components: REEF Bridge
>    Affects Versions: 0.17
>            Reporter: Tyson Condie
>            Assignee: Tyson Condie
>            Priority: Major
>             Fix For: 0.17
>
>
> This Jira introduces the module containing the new bridge. 



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Mime
View raw message