beam-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Work logged] (BEAM-4130) Portable Flink runner JobService entry point in a Docker container
Date Wed, 13 Jun 2018 22:26:01 GMT

     [ https://issues.apache.org/jira/browse/BEAM-4130?focusedWorklogId=111707&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-111707
]

ASF GitHub Bot logged work on BEAM-4130:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 13/Jun/18 22:25
            Start Date: 13/Jun/18 22:25
    Worklog Time Spent: 10m 
      Work Description: angoenka commented on a change in pull request #5493: [BEAM-4130]
Add job submission capabilities to Flink runner.
URL: https://github.com/apache/beam/pull/5493#discussion_r195210651
 
 

 ##########
 File path: runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkJobInvocation.java
 ##########
 @@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.common.util.concurrent.FutureCallback;
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.common.util.concurrent.ListeningExecutorService;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.function.Consumer;
+import javax.annotation.Nullable;
+import org.apache.beam.model.jobmanagement.v1.JobApi.JobMessage;
+import org.apache.beam.model.jobmanagement.v1.JobApi.JobState.Enum;
+import org.apache.beam.model.pipeline.v1.RunnerApi;
+import org.apache.beam.runners.core.construction.PipelineOptionsTranslation;
+import org.apache.beam.runners.core.construction.graph.GreedyPipelineFuser;
+import org.apache.beam.runners.fnexecution.jobsubmission.JobInvocation;
+import org.apache.beam.runners.fnexecution.provisioning.JobInfo;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.metrics.MetricsEnvironment;
+import org.apache.flink.api.common.JobExecutionResult;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Invocation of a Flink Job via {@link FlinkRunner}.
+ */
+public class FlinkJobInvocation implements JobInvocation {
+  private static final Logger LOG = LoggerFactory.getLogger(FlinkJobInvocation.class);
+
+  public static FlinkJobInvocation create(String id, ListeningExecutorService executorService,
+      RunnerApi.Pipeline pipeline, FlinkPipelineOptions pipelineOptions) {
+    return new FlinkJobInvocation(id, executorService, pipeline, pipelineOptions);
+  }
+
+  private final String id;
+  private final ListeningExecutorService executorService;
+  private final RunnerApi.Pipeline pipeline;
+  private final FlinkPipelineOptions pipelineOptions;
+  private Enum jobState;
+  private List<Consumer<Enum>> stateObservers;
+
+  @Nullable
+  private ListenableFuture<PipelineResult> invocationFuture;
+
+  private FlinkJobInvocation(String id, ListeningExecutorService executorService,
+      RunnerApi.Pipeline pipeline, FlinkPipelineOptions pipelineOptions) {
+    this.id = id;
+    this.executorService = executorService;
+    this.pipeline = pipeline;
+    this.pipelineOptions = pipelineOptions;
+    this.invocationFuture = null;
+    this.jobState = Enum.STOPPED;
+    this.stateObservers = new ArrayList<>();
+  }
+
+  private PipelineResult runPipeline() throws Exception {
+    LOG.trace("Translating pipeline from proto");
+
+    MetricsEnvironment.setMetricsSupported(true);
+
+    LOG.info("Translating pipeline to Flink program.");
+    // Fused pipeline proto.
+    RunnerApi.Pipeline fusedPipeline = GreedyPipelineFuser.fuse(pipeline).toPipeline();
+    JobInfo jobInfo = JobInfo.create(
+        id, pipelineOptions.getJobName(), PipelineOptionsTranslation.toProto(pipelineOptions));
+    final JobExecutionResult result;
+
+    if (!pipelineOptions.isStreaming() && !hasUnboundedPCollections(fusedPipeline))
{
+      // TODO: Do we need to inspect for unbounded sources before fusing?
+      // batch translation
+      FlinkBatchPortablePipelineTranslator translator =
+          FlinkBatchPortablePipelineTranslator.createTranslator();
+      FlinkBatchPortablePipelineTranslator.BatchTranslationContext context =
+          FlinkBatchPortablePipelineTranslator.createTranslationContext(jobInfo);
+      translator.translate(context, fusedPipeline);
+      result = context.getExecutionEnvironment().execute(pipelineOptions.getJobName());
+    } else {
+      // streaming translation
+      FlinkStreamingPortablePipelineTranslator translator =
+          new FlinkStreamingPortablePipelineTranslator();
+      FlinkStreamingPortablePipelineTranslator.StreamingTranslationContext context =
+          FlinkStreamingPortablePipelineTranslator.createTranslationContext(jobInfo);
+      translator.translate(context, fusedPipeline);
+      result = context.getExecutionEnvironment().execute(pipelineOptions.getJobName());
+    }
+
+    return FlinkRunner.createPipelineResult(result, pipelineOptions);
+  }
+
+  @Override
+  public void start() {
+    LOG.trace("Starting job invocation {}", getId());
+    synchronized (this) {
+      setState(Enum.STARTING);
+      invocationFuture = executorService.submit(this::runPipeline);
+      setState(Enum.RUNNING);
+      Futures.addCallback(
+          invocationFuture,
+          new FutureCallback<PipelineResult>() {
+            @Override
+            public void onSuccess(
+                @Nullable PipelineResult pipelineResult) {
+              setState(Enum.DONE);
+            }
+
+            @Override
+            public void onFailure(Throwable throwable) {
+              String message = String.format("Error during job invocation %s.", getId());
+              LOG.error(message, throwable);
+              setState(Enum.FAILED);
+            }
+          },
+          executorService
+      );
+    }
+  }
+
+  @Override
+  public String getId() {
+    return id;
+  }
+
+  @Override
+  public void cancel() {
+    LOG.trace("Canceling job invocation {}", getId());
+    synchronized (this) {
+      if (this.invocationFuture != null) {
+        this.invocationFuture.cancel(true /* mayInterruptIfRunning */);
+      }
+    }
+  }
+
+  @Override
+  public synchronized Enum getState() {
+    return this.jobState;
+  }
+
+  @Override
+  public synchronized void addStateListener(Consumer<Enum> stateStreamObserver) {
+    stateStreamObserver.accept(getState());
+    stateObservers.add(stateStreamObserver);
+  }
+
+  @Override
+  public synchronized void addMessageListener(Consumer<JobMessage> messageStreamObserver)
{
+    LOG.warn("addMessageObserver() not yet implemented.");
+  }
+
+  private synchronized void setState(Enum state) {
+    this.jobState = state;
+    for (Consumer<Enum> observer : stateObservers) {
+      observer.accept(state);
 
 Review comment:
   We should not call external code from synchronized block.
   We can delegate the execution to an executor which also maintain task ordering.
   
   But we can keep this as it is if we want to block state change till all the observers are
done processing.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 111707)
    Time Spent: 2h 50m  (was: 2h 40m)

> Portable Flink runner JobService entry point in a Docker container
> ------------------------------------------------------------------
>
>                 Key: BEAM-4130
>                 URL: https://issues.apache.org/jira/browse/BEAM-4130
>             Project: Beam
>          Issue Type: New Feature
>          Components: runner-flink
>            Reporter: Ben Sidhom
>            Priority: Minor
>          Time Spent: 2h 50m
>  Remaining Estimate: 0h
>
> The portable Flink runner exists as a Job Service that runs somewhere. We need a main
entry point that itself spins up the job service (and artifact staging service). The main
program itself should be packaged into an uberjar such that it can be run locally or submitted
to a Flink deployment via `flink run`.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Mime
View raw message