beam-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From k...@apache.org
Subject [1/3] beam git commit: Inline PTransform to GraphNode, removing generic design
Date Fri, 24 Feb 2017 23:07:16 GMT
Repository: beam
Updated Branches:
  refs/heads/master 00b789d9e -> 49c9de1e8


Inline PTransform to GraphNode, removing generic design

The GraphNode structure was made more generic to allow the Runner API
and Fn API to share the graph data structure while carrying distinct
payloads on nodes and edges. It seems that the Runner API was already
sufficiently flexible for the Fn API to use its existing payload
design.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/672d12af
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/672d12af
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/672d12af

Branch: refs/heads/master
Commit: 672d12af848bfab3b7d05c2ec0086157db4604fe
Parents: 00b789d
Author: Kenneth Knowles <klk@google.com>
Authored: Thu Feb 23 17:51:10 2017 -0800
Committer: Kenneth Knowles <klk@google.com>
Committed: Fri Feb 24 14:38:27 2017 -0800

----------------------------------------------------------------------
 .../src/main/proto/beam_runner_api.proto        | 135 +++++++------------
 1 file changed, 48 insertions(+), 87 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/672d12af/sdks/common/runner-api/src/main/proto/beam_runner_api.proto
----------------------------------------------------------------------
diff --git a/sdks/common/runner-api/src/main/proto/beam_runner_api.proto b/sdks/common/runner-api/src/main/proto/beam_runner_api.proto
index c030e73..32c53fb 100644
--- a/sdks/common/runner-api/src/main/proto/beam_runner_api.proto
+++ b/sdks/common/runner-api/src/main/proto/beam_runner_api.proto
@@ -33,10 +33,8 @@ import "google/protobuf/any.proto";
 // A set of mappings from id to message. This is included as an optional field
 // on any proto message that may contain references needing resolution.
 message Components {
-  // (Required) A map from pipeline-scoped id to GraphNode<PTransform, PCollection>
-  //
-  // Each node is required to contain a PTransform specification.
-  map<string, GraphNode> transform_nodes = 1;
+  // (Required) A map from pipeline-scoped id to PTransform.
+  map<string, PTransform> transforms = 1;
 
   // (Required) A map from pipeline-scoped id to PCollection.
   map<string, PCollection> pcollections = 2;
@@ -72,7 +70,6 @@ message MessageWithComponents {
     Coder coder = 2;
     CombinePayload combine_payload = 3;
     FunctionSpec function_spec = 4;
-    GraphNode graph_node = 5;
     ParDoPayload par_do_payload = 6;
     PTransform ptransform = 7;
     PCollection pcollection = 8;
@@ -100,45 +97,56 @@ message Pipeline {
   // this pipeline.
   Components components = 1;
 
-  // (Required) The graph node that is the root of the graph.
-  string root_graph_node = 2;
+  // (Required) The id of the PTransform that is the root of the pipeline
+  string root_transform_id = 2;
 
   // (Required) Static display data for the pipeline.
   DisplayData display_data = 3;
 }
 
-// A generic node in a bipartite directed hierarchical graph.
-//
-// You can think of this as a GraphNode<NodeT, ConnectionT> where
-// NodeT and ConnectionT are the two types of nodes in the bipartite
-// graph.
-//
-// There is one NodeT on each GraphNode, while the ConnectionT values
-// are named and ordered in the `inputs` and `outputs` fields. For
-// arbitrary graph structures, they are expected to be by-reference.
-//
-// For the Runner API, the type is GraphNode<Transform, PCollection>
-message GraphNode {
+// An applied PTransform! This does not contain the graph data, but only the
+// fields specific to a graph node that is a Runner API transform
+// between PCollections.
+message PTransform {
 
-  // (Required) A URN that describes what kind graph node this is.
+  // (Required) A unique name for the application node.
   //
-  // Specifically, the URN should be enough to decipher the inputs,
-  // outputs, and payload.
-  string urn = 1;
-
-  // (Optional) if this node is contained within a composite, a pointer to the
-  // parent.
-  string parent_id = 2;
+  // Ideally, this should be stable over multiple evolutions of a pipeline
+  // for the purposes of logging and associating pipeline state with a node,
+  // etc.
+  //
+  // If it is not stable, then the runner decides what will happen. But, most
+  // importantly, it must always be here, even if it is autogenerated.
+  string unique_name = 5;
 
-  // (Required) A list of ordered, named inputs to this node.
+  // (Optional) A URN and payload that, together, fully defined the semantics
+  // of this transform.
   //
-  // The URN for the graph node may clarify the type of the inputs
-  // (resp. outputs). For example:
+  // If absent, this must be an "anonymous" composite transform.
+  //
+  // For primitive transform in the Runner API, this is required, and the
+  // payloads are as follows:
+  //
+  //  - when the URN is "urn:beam:transforms:pardo" it is a ParDoPayload
+  //  - when the URN is "urn:beam:transforms:read" it is a ReadPayload
+  //  - when the URN is "urn:beam:transforms:gbk" it is a GroupByKeyPayload
+  //  - when the URN is "urn:beam:transforms:window" it is a WindowPayload
+  //  - when the URN is "urn:beam:transforms:flatten" it is absent
+  //
+  // For some special composite transforms, the payload is also officially
+  // defined:
   //
-  //  - in the Runner API these are PCollections
-  //  - in the Fn API they may be Grpc ports
+  //  - when the URN is "urn:beam:transforms:combine" it is a CombinePayload
   //
-  // The payload for this graph node may clarify the relationship of these
+  UrnWithParameter spec = 1;
+
+  // (Optional) if this node is a composite, a list of the ids of
+  // transforms that it contains.
+  repeated string subtransforms = 2;
+
+  // (Required) A map from local names of inputs to PCollection ids.
+  //
+  // The payload for this transform may clarify the relationship of these
   // inputs. For example:
   //
   //  - for a Flatten transform they are merged
@@ -147,28 +155,20 @@ message GraphNode {
   // All inputs are recorded here so that the topological ordering of
   // the graph is consistent whether or not the payload is understood.
   //
-  repeated Connection inputs = 3;
+  map<string, string> inputs = 3;
 
-  // (Required) A list of ordered, named outputs from this node.
+  // (Required) A map from local names of outputs to PCollection ids.
   //
-  // The URN or payload for the graph node may clarify the type and
-  // relationship of these. For example:
+  // The URN or payload for this transform node may clarify the type and
+  // relationship of these outputs. For example:
   //
   //  - for a ParDo transform, these are tags on PCollections, which will be
   //    embedded in the DoFn.
-  //  - in the Runner API the targets are PCollections
-  //  - in the Fn API the targets may be Grpc ports
   //
-  repeated Connection outputs = 4;
+  map<string, string> outputs = 4;
 
-  // (Required) A payload fully specifying this node.
-  // Schema is determined by the URN.
-  google.protobuf.Any payload = 5;
-
-  message Connection {
-    string name = 1;
-    google.protobuf.Any target = 2;
-  }
+  // (Required) Static display data for this PTransform application.
+  DisplayData display_data = 6;
 }
 
 // A PCollection!
@@ -197,45 +197,6 @@ message PCollection {
   DisplayData display_data = 5;
 }
 
-// An applied PTransform! This does not contain the graph data, but only the
-// fields specific to a graph node that is a Runner API transform
-// between PCollections.
-message PTransform {
-
-  // (Required) A unique name for the application node.
-  //
-  // Ideally, this should be stable over multiple evolutions of a pipeline
-  // for the purposes of logging and associating pipeline state with a node,
-  // etc.
-  //
-  // If it is not stable, then the runner decides what will happen. But, most
-  // importantly, it must always be here, even if it is autogenerated.
-  string unique_name = 1;
-
-  // (Optional) A URN and payload that, together, fully defined the semantics
-  // of this transform.
-  //
-  // If absent, this must be an "anonymous" composite transform.
-  //
-  // For primitive transform in the Runner API, this is required, and the
-  // payloads are as follows:
-  //
-  //  - when the URN is "urn:beam:transforms:pardo" it is a ParDoPayload
-  //  - when the URN is "urn:beam:transforms:read" it is a ReadPayload
-  //  - when the URN is "urn:beam:transforms:gbk" it is a GroupByKeyPayload
-  //  - when the URN is "urn:beam:transforms:window" it is a WindowPayload
-  //  - when the URN is "urn:beam:transforms:flatten" it is absent
-  //
-  // For some special composite transforms, the payload is also well-defined:
-  //
-  //  - when the URN is "urn:beam:transforms:combine" it is a CombinePayload
-  //
-  UrnWithParameter spec = 2;
-
-  // (Required) Static display data for this PTransform application.
-  DisplayData display_data = 4;
-}
-
 // The payload for the primitive ParDo transform.
 message ParDoPayload {
 


Mime
View raw message