beam-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dhalp...@apache.org
Subject [37/50] [abbrv] incubator-beam git commit: Remove the DataflowRunner instructions from examples
Date Tue, 13 Sep 2016 00:41:08 GMT
Remove the DataflowRunner instructions from examples


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/c92e45dd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/c92e45dd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/c92e45dd

Branch: refs/heads/gearpump-runner
Commit: c92e45dd4019e613a7670e4bb0e1fcc4b7e2c462
Parents: 4bf3a3b
Author: Pei He <peihe@google.com>
Authored: Thu Aug 25 14:20:30 2016 -0700
Committer: Dan Halperin <dhalperi@google.com>
Committed: Mon Sep 12 17:40:12 2016 -0700

----------------------------------------------------------------------
 .../beam/examples/DebuggingWordCount.java       | 16 +++++++------
 .../apache/beam/examples/MinimalWordCount.java  |  7 +++---
 .../apache/beam/examples/WindowedWordCount.java | 22 +++++++----------
 .../org/apache/beam/examples/WordCount.java     | 22 +++++------------
 .../beam/examples/complete/AutoComplete.java    | 25 +++++++-------------
 .../examples/complete/StreamingWordExtract.java |  4 ++--
 .../apache/beam/examples/complete/TfIdf.java    | 18 +++++---------
 .../examples/complete/TopWikipediaSessions.java | 12 ++++------
 .../examples/complete/TrafficMaxLaneFlow.java   |  4 ++--
 .../beam/examples/complete/TrafficRoutes.java   |  4 ++--
 .../examples/cookbook/BigQueryTornadoes.java    | 18 ++++----------
 .../cookbook/CombinePerKeyExamples.java         | 18 ++++----------
 .../examples/cookbook/DatastoreWordCount.java   | 17 ++++++-------
 .../beam/examples/cookbook/DeDupExample.java    | 16 ++++++-------
 .../beam/examples/cookbook/FilterExamples.java  | 21 ++++------------
 .../beam/examples/cookbook/JoinExamples.java    | 18 ++++----------
 .../examples/cookbook/MaxPerKeyExamples.java    | 19 ++++-----------
 .../beam/examples/cookbook/TriggerExample.java  | 16 ++++++-------
 18 files changed, 92 insertions(+), 185 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java
b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java
index be3aa41..eb38227 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java
@@ -46,12 +46,12 @@ import org.slf4j.LoggerFactory;
  *
  * <p>Basic concepts, also in the MinimalWordCount and WordCount examples:
  * Reading text files; counting a PCollection; executing a Pipeline both locally
- * and using the Dataflow service; defining DoFns.
+ * and using a selected runner; defining DoFns.
  *
  * <p>New Concepts:
  * <pre>
  *   1. Logging to Cloud Logging
- *   2. Controlling Dataflow worker log levels
+ *   2. Controlling worker log levels
  *   3. Creating a custom aggregator
  *   4. Testing your Pipeline via PAssert
  * </pre>
@@ -62,12 +62,14 @@ import org.slf4j.LoggerFactory;
  * }
  * </pre>
  *
- * <p>To execute this pipeline using the Dataflow service and the additional logging
discussed
- * below, specify pipeline configuration:
+ * <p>To change the runner, specify:
+ * <pre>{@code
+ *   --runner=YOUR_SELECTED_RUNNER
+ * }
+ * </pre>
+ *
+ * <p>To use the additional logging discussed below, specify:
  * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=BlockingDataflowRunner
  *   --workerLogLevelOverrides={"org.apache.beam.examples":"DEBUG"}
  * }
  * </pre>

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java
index f28a20c..f772dd5 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java
@@ -66,12 +66,11 @@ public class MinimalWordCount {
 
     // In order to run your pipeline, you need to make following runner specific changes:
     //
-    // CHANGE 1/3: Select a Beam runner, such as BlockingDataflowRunner
-    // or FlinkRunner.
+    // CHANGE 1/3: Select a Beam runner, such as DataflowRunner or FlinkRunner.
     // CHANGE 2/3: Specify runner-required options.
-    // For BlockingDataflowRunner, set project and temp location as follows:
+    // For DataflowRunner, set project and temp location as follows:
     //   DataflowPipelineOptions dataflowOptions = options.as(DataflowPipelineOptions.class);
-    //   dataflowOptions.setRunner(BlockingDataflowRunner.class);
+    //   dataflowOptions.setRunner(DataflowRunner.class);
     //   dataflowOptions.setProject("SET_YOUR_PROJECT_ID_HERE");
     //   dataflowOptions.setTempLocation("gs://SET_YOUR_BUCKET_NAME_HERE/AND_TEMP_DIRECTORY");
     // For FlinkRunner, set the runner as follows. See {@code FlinkPipelineOptions}

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/WindowedWordCount.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/WindowedWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/WindowedWordCount.java
index 7af354c..c8bd9d3 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/WindowedWordCount.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/WindowedWordCount.java
@@ -54,7 +54,7 @@ import org.joda.time.Instant;
  *
  * <p>Basic concepts, also in the MinimalWordCount, WordCount, and DebuggingWordCount
examples:
  * Reading text files; counting a PCollection; writing to GCS; executing a Pipeline both
locally
- * and using the Dataflow service; defining DoFns; creating a custom aggregator;
+ * and using a selected runner; defining DoFns; creating a custom aggregator;
  * user-defined PTransforms; defining PipelineOptions.
  *
  * <p>New Concepts:
@@ -66,19 +66,13 @@ import org.joda.time.Instant;
  *   5. Writing to BigQuery
  * </pre>
  *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
+ * <p>By default, the examples will run with the {@code DirectRunner}.
+ * To change the runner, specify:
  * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=BlockingDataflowRunner
+ *   --runner=YOUR_SELECTED_RUNNER
  * }
  * </pre>
+ * See examples/java/README.md for instructions about how to configure different runners.
  *
  * <p>Optionally specify the input file path via:
  * {@code --inputFile=gs://INPUT_PATH},
@@ -86,7 +80,7 @@ import org.joda.time.Instant;
  *
  * <p>Specify an output BigQuery dataset and optionally, a table for the output. If
you don't
  * specify the table, one will be created for you using the job name. If you don't specify
the
- * dataset, a dataset called {@code dataflow-examples} must already exist in your project.
+ * dataset, a dataset called {@code beam_examples} must already exist in your project.
  * {@code --bigQueryDataset=YOUR-DATASET --bigQueryTable=YOUR-NEW-TABLE-NAME}.
  *
  * <p>By default, the pipeline will do fixed windowing, on 1-minute windows.  You can
@@ -190,7 +184,7 @@ public class WindowedWordCount {
     Pipeline pipeline = Pipeline.create(options);
 
     /**
-     * Concept #1: the Dataflow SDK lets us run the same pipeline with either a bounded or
+     * Concept #1: the Beam SDK lets us run the same pipeline with either a bounded or
      * unbounded input source.
      */
     PCollection<String> input = pipeline
@@ -229,7 +223,7 @@ public class WindowedWordCount {
 
     PipelineResult result = pipeline.run();
 
-    // dataflowUtils will try to cancel the pipeline before the program exists.
+    // ExampleUtils will try to cancel the pipeline before the program exists.
     exampleUtils.waitToFinish(result);
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/WordCount.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java
index 793ee4b..498b069 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java
@@ -48,8 +48,8 @@ import org.apache.beam.sdk.values.PCollection;
  * pipeline, for introduction of additional concepts.
  *
  * <p>For a detailed walkthrough of this example, see
- *   <a href="https://cloud.google.com/dataflow/java-sdk/wordcount-example">
- *   https://cloud.google.com/dataflow/java-sdk/wordcount-example
+ *   <a href="http://beam.incubator.apache.org/use/walkthroughs/">
+ *   http://beam.incubator.apache.org/use/walkthroughs/
  *   </a>
  *
  * <p>Basic concepts, also in the MinimalWordCount example:
@@ -66,27 +66,17 @@ import org.apache.beam.sdk.values.PCollection;
  * <p>Concept #1: you can execute this pipeline either locally or using the selected
runner.
  * These are now command-line options and not hard-coded as they were in the MinimalWordCount
  * example.
- * To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- * and a local output file or output prefix on GCS:
+ * To execute this pipeline locally, specify a local output file or output prefix on GCS:
  * <pre>{@code
  *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
  * }</pre>
  *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To change the runner, specify:
  * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=BlockingDataflowRunner
+ *   --runner=YOUR_SELECTED_RUNNER
  * }
  * </pre>
- * and an output prefix on GCS:
- * <pre>{@code
- *   --output=gs://YOUR_OUTPUT_PREFIX
- * }</pre>
+ * See examples/java/README.md for instructions about how to configure different runners.
  *
  * <p>The input file defaults to {@code gs://apache-beam-samples/shakespeare/kinglear.txt}
  * and can be overridden with {@code --inputFile}.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/complete/AutoComplete.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/AutoComplete.java
b/examples/java/src/main/java/org/apache/beam/examples/complete/AutoComplete.java
index 2182e6d..c3ac614 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/complete/AutoComplete.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/complete/AutoComplete.java
@@ -77,24 +77,17 @@ import org.joda.time.Duration;
  * <p>Concepts: Using the same pipeline in both streaming and batch, combiners,
  *              composite transforms.
  *
- * <p>To execute this pipeline using the Dataflow service in batch mode,
- * specify pipeline configuration:
+ * <p>To execute this pipeline in streaming mode, specify:
  * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=DataflowRunner
- *   --inputFile=gs://path/to/input*.txt
+ *   --streaming
  * }</pre>
  *
- * <p>To execute this pipeline using the Dataflow service in streaming mode,
- * specify pipeline configuration:
+ * <p>To change the runner, specify:
  * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=DataflowRunner
- *   --inputFile=gs://YOUR_INPUT_DIRECTORY/*.txt
- *   --streaming
- * }</pre>
+ *   --runner=YOUR_SELECTED_RUNNER
+ * }
+ * </pre>
+ * See examples/java/README.md for instructions about how to configure different runners.
  *
  * <p>This will update the Cloud Datastore every 10 seconds based on the last
  * 30 minutes of data received.
@@ -417,7 +410,7 @@ public class AutoComplete {
   /**
    * Options supported by this class.
    *
-   * <p>Inherits standard Dataflow configuration options.
+   * <p>Inherits standard Beam example configuration options.
    */
   private static interface Options
       extends ExampleOptions, ExampleBigQueryTableOptions, StreamingOptions {
@@ -510,7 +503,7 @@ public class AutoComplete {
     // Run the pipeline.
     PipelineResult result = p.run();
 
-    // dataflowUtils will try to cancel the pipeline and the injector before the program
exists.
+    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
     exampleUtils.waitToFinish(result);
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/complete/StreamingWordExtract.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/StreamingWordExtract.java
b/examples/java/src/main/java/org/apache/beam/examples/complete/StreamingWordExtract.java
index 869ea69..e8d8950 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/complete/StreamingWordExtract.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/complete/StreamingWordExtract.java
@@ -44,7 +44,7 @@ import org.apache.beam.sdk.transforms.ParDo;
  * a BigQuery table.
  *
  * <p>The example is configured to use the default BigQuery table from the example
common package
- * (there are no defaults for a general Dataflow pipeline).
+ * (there are no defaults for a general Beam pipeline).
  * You can override them by using the {@literal --bigQueryDataset}, and {@literal --bigQueryTable}
  * options. If the BigQuery table do not exist, the example will try to create them.
  *
@@ -141,7 +141,7 @@ public class StreamingWordExtract {
 
     PipelineResult result = pipeline.run();
 
-    // dataflowUtils will try to cancel the pipeline before the program exists.
+    // ExampleUtils will try to cancel the pipeline before the program exists.
     exampleUtils.waitToFinish(result);
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/complete/TfIdf.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/TfIdf.java b/examples/java/src/main/java/org/apache/beam/examples/complete/TfIdf.java
index 6684553..59bbd49 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/complete/TfIdf.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/complete/TfIdf.java
@@ -65,23 +65,17 @@ import org.slf4j.LoggerFactory;
  *
  * <p>Concepts: joining data; side inputs; logging
  *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }</pre>
- * and a local output file or output prefix on GCS:
+ * <p>To execute this pipeline locally, specify a local output file or output prefix
on GCS:
  * <pre>{@code
  *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
  * }</pre>
  *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To change the runner, specify:
  * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=BlockingDataflowRunner
- * and an output prefix on GCS:
- *   --output=gs://YOUR_OUTPUT_PREFIX
- * }</pre>
+ *   --runner=YOUR_SELECTED_RUNNER
+ * }
+ * </pre>
+ * See examples/java/README.md for instructions about how to configure different runners.
  *
  * <p>The default input is {@code gs://apache-beam-samples/shakespeare/} and can be
overridden with
  * {@code --input}.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/complete/TopWikipediaSessions.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/TopWikipediaSessions.java
b/examples/java/src/main/java/org/apache/beam/examples/complete/TopWikipediaSessions.java
index d597258..0f594d7 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/complete/TopWikipediaSessions.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/complete/TopWikipediaSessions.java
@@ -52,17 +52,13 @@ import org.joda.time.Instant;
  * <p>It is not recommended to execute this pipeline locally, given the size of the
default input
  * data.
  *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To execute this pipeline using a selected runner and an output prefix on GCS,
specify:
  * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=BlockingDataflowRunner
+ *   --runner=YOUR_SELECTED_RUNNER
+ *   --output=gs://YOUR_OUTPUT_PREFIX
  * }
  * </pre>
- * and an output prefix on GCS:
- * <pre>{@code
- *   --output=gs://YOUR_OUTPUT_PREFIX
- * }</pre>
+ * See examples/java/README.md for instructions about how to configure different runners.
  *
  * <p>The default input is {@code gs://apache-beam-samples/wikipedia_edits/*.json}
and can be
  * overridden with {@code --input}.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficMaxLaneFlow.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficMaxLaneFlow.java
b/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficMaxLaneFlow.java
index e456960..0c367d4 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficMaxLaneFlow.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficMaxLaneFlow.java
@@ -66,7 +66,7 @@ import org.joda.time.format.DateTimeFormatter;
  * <p>The pipeline reads traffic sensor data from {@literal --inputFile}.
  *
  * <p>The example is configured to use the default BigQuery table from the example
common package
- * (there are no defaults for a general Dataflow pipeline).
+ * (there are no defaults for a general Beam pipeline).
  * You can override them by using the {@literal --bigQueryDataset}, and {@literal --bigQueryTable}
  * options. If the BigQuery table do not exist, the example will try to create them.
  *
@@ -354,7 +354,7 @@ public class TrafficMaxLaneFlow {
     // Run the pipeline.
     PipelineResult result = pipeline.run();
 
-    // dataflowUtils will try to cancel the pipeline and the injector before the program
exists.
+    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
     exampleUtils.waitToFinish(result);
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java
b/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java
index 95336c6..14cee4d 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java
@@ -69,7 +69,7 @@ import org.joda.time.format.DateTimeFormatter;
  * <p>The pipeline reads traffic sensor data from {@literal --inputFile}.
  *
  * <p>The example is configured to use the default BigQuery table from the example
common package
- * (there are no defaults for a general Dataflow pipeline).
+ * (there are no defaults for a general Beam pipeline).
  * You can override them by using the {@literal --bigQueryDataset}, and {@literal --bigQueryTable}
  * options. If the BigQuery table do not exist, the example will try to create them.
  *
@@ -365,7 +365,7 @@ public class TrafficRoutes {
     // Run the pipeline.
     PipelineResult result = pipeline.run();
 
-    // dataflowUtils will try to cancel the pipeline and the injector before the program
exists.
+    // ExampleUtils will try to cancel the pipeline and the injector before the program exists.
     exampleUtils.waitToFinish(result);
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/cookbook/BigQueryTornadoes.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/BigQueryTornadoes.java
b/examples/java/src/main/java/org/apache/beam/examples/cookbook/BigQueryTornadoes.java
index 439cf02..1e4918d 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/BigQueryTornadoes.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/BigQueryTornadoes.java
@@ -45,27 +45,17 @@ import org.apache.beam.sdk.values.PCollection;
  * <p>Note: Before running this example, you must create a BigQuery dataset to contain
your output
  * table.
  *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- * and the BigQuery table for the output, with the form
+ * <p>To execute this pipeline locally, specify the BigQuery table for the output with
the form:
  * <pre>{@code
  *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
  * }</pre>
  *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To change the runner, specify:
  * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=BlockingDataflowRunner
+ *   --runner=YOUR_SELECTED_RUNNER
  * }
  * </pre>
- * and the BigQuery table for the output:
- * <pre>{@code
- *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
- * }</pre>
+ * See examples/java/README.md for instructions about how to configure different runners.
  *
  * <p>The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations}
  * and can be overridden with {@code --input}.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/cookbook/CombinePerKeyExamples.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/CombinePerKeyExamples.java
b/examples/java/src/main/java/org/apache/beam/examples/cookbook/CombinePerKeyExamples.java
index 1d280a6..fc11ac9 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/CombinePerKeyExamples.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/CombinePerKeyExamples.java
@@ -52,27 +52,17 @@ import org.apache.beam.sdk.values.PCollection;
  * <p>Note: Before running this example, you must create a BigQuery dataset to contain
your output
  * table.
  *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- * and the BigQuery table for the output:
+ * <p>To execute this pipeline locally, specify the BigQuery table for the output:
  * <pre>{@code
  *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
  * }</pre>
  *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To change the runner, specify:
  * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://<STAGING DIRECTORY>
- *   --runner=BlockingDataflowRunner
+ *   --runner=YOUR_SELECTED_RUNNER
  * }
  * </pre>
- * and the BigQuery table for the output:
- * <pre>{@code
- *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
- * }</pre>
+ * See examples/java/README.md for instructions about how to configure different runners.
  *
  * <p>The BigQuery input table defaults to {@code publicdata:samples.shakespeare} and
can
  * be overridden with {@code --input}.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/cookbook/DatastoreWordCount.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DatastoreWordCount.java
b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DatastoreWordCount.java
index 434e9fb..c0066e6 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DatastoreWordCount.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DatastoreWordCount.java
@@ -58,14 +58,15 @@ import org.apache.beam.sdk.transforms.ParDo;
  *
  * <p>To run this pipeline locally, the following options must be provided:
  * <pre>{@code
- *   --project=YOUR_PROJECT_ID
  *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PATH]
  * }</pre>
  *
- * <p>To run this example using Dataflow service, you must additionally
- * provide either {@literal --tempLocation} or {@literal --tempLocation}, and
- * select one of the Dataflow pipeline runners, eg
- * {@literal --runner=BlockingDataflowRunner}.
+ * <p>To change the runner, specify:
+ * <pre>{@code
+ *   --runner=YOUR_SELECTED_RUNNER
+ * }
+ * </pre>
+ * See examples/java/README.md for instructions about how to configure different runners.
  *
  * <p><b>Note:</b> this example creates entities with <i>Ancestor
keys</i> to ensure that all
  * entities created are in the same entity group. Similarly, the query used to read from
the Cloud
@@ -239,13 +240,9 @@ public class DatastoreWordCount {
   }
 
   /**
-   * An example to demo how to use {@link DatastoreIO}.  The runner here is
-   * customizable, which means users could pass either {@code DirectRunner}
-   * or {@code DataflowRunner} in the pipeline options.
+   * An example to demo how to use {@link DatastoreIO}.
    */
   public static void main(String args[]) {
-    // The options are used in two places, for Dataflow service, and
-    // building DatastoreIO.Read object
     Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
 
     if (!options.isReadOnly()) {

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/cookbook/DeDupExample.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DeDupExample.java
b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DeDupExample.java
index 5791710..594d52d 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DeDupExample.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DeDupExample.java
@@ -35,17 +35,15 @@ import org.apache.beam.sdk.util.gcsfs.GcsPath;
  * Demonstrates {@link org.apache.beam.sdk.io.TextIO.Read}/
  * {@link RemoveDuplicates}/{@link org.apache.beam.sdk.io.TextIO.Write}.
  *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- *   --project=YOUR_PROJECT_ID
- * and a local output file or output prefix on GCS:
+ * <p>To execute this pipeline locally, specify a local output file or output prefix
on GCS:
  *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
  *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=BlockingDataflowRunner
- * and an output prefix on GCS:
- *   --output=gs://YOUR_OUTPUT_PREFIX
+ * <p>To change the runner, specify:
+ * <pre>{@code
+ *   --runner=YOUR_SELECTED_RUNNER
+ * }
+ * </pre>
+ * See examples/java/README.md for instructions about how to configure different runners.
  *
  * <p>The input defaults to {@code gs://apache-beam-samples/shakespeare/*} and can
be
  * overridden with {@code --input}.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/cookbook/FilterExamples.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/FilterExamples.java
b/examples/java/src/main/java/org/apache/beam/examples/cookbook/FilterExamples.java
index 6c42520..01d668b 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/FilterExamples.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/FilterExamples.java
@@ -54,12 +54,7 @@ import org.apache.beam.sdk.values.PCollectionView;
  * <p>Note: Before running this example, you must create a BigQuery dataset to contain
your output
  * table.
  *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- * and the BigQuery table for the output:
+ * <p>To execute this pipeline locally, specify the BigQuery table for the output:
  * <pre>{@code
  *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
  *   [--monthFilter=<month_number>]
@@ -67,20 +62,12 @@ import org.apache.beam.sdk.values.PCollectionView;
  * </pre>
  * where optional parameter {@code --monthFilter} is set to a number 1-12.
  *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To change the runner, specify:
  * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=BlockingDataflowRunner
+ *   --runner=YOUR_SELECTED_RUNNER
  * }
  * </pre>
- * and the BigQuery table for the output:
- * <pre>{@code
- *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
- *   [--monthFilter=<month_number>]
- * }
- * </pre>
- * where optional parameter {@code --monthFilter} is set to a number 1-12.
+ * See examples/java/README.md for instructions about how to configure different runners.
  *
  * <p>The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations}
  * and can be overridden with {@code --input}.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/cookbook/JoinExamples.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/JoinExamples.java
b/examples/java/src/main/java/org/apache/beam/examples/cookbook/JoinExamples.java
index 1b91bf1..799cad3 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/JoinExamples.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/JoinExamples.java
@@ -41,27 +41,17 @@ import org.apache.beam.sdk.values.TupleTag;
  *
  * <p>Concepts: Join operation; multiple input sources.
  *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- * and a local output file or output prefix on GCS:
+ * <p>To execute this pipeline locally, specify a local output file or output prefix
on GCS:
  * <pre>{@code
  *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
  * }</pre>
  *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
+ * <p>To change the runner, specify:
  * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=BlockingDataflowRunner
+ *   --runner=YOUR_SELECTED_RUNNER
  * }
  * </pre>
- * and an output prefix on GCS:
- * <pre>{@code
- *   --output=gs://YOUR_OUTPUT_PREFIX
- * }</pre>
+ * See examples/java/README.md for instructions about how to configure different runners.
  */
 public class JoinExamples {
 

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/cookbook/MaxPerKeyExamples.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/MaxPerKeyExamples.java
b/examples/java/src/main/java/org/apache/beam/examples/cookbook/MaxPerKeyExamples.java
index 3772a7b..3a4fa26 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/MaxPerKeyExamples.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/MaxPerKeyExamples.java
@@ -46,27 +46,16 @@ import org.apache.beam.sdk.values.PCollection;
  * <p>Note: Before running this example, you must create a BigQuery dataset to contain
your output
  * table.
  *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }
- * </pre>
- * and the BigQuery table for the output, with the form
+ * <p>To execute this pipeline locally, specify the BigQuery table for the output with
the form:
  * <pre>{@code
  *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
  * }</pre>
  *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --tempLocation=gs://YOUR_TEMP_DIRECTORY
- *   --runner=BlockingDataflowRunner
- * }
- * </pre>
- * and the BigQuery table for the output:
+ * <p>To change the runner, specify:
  * <pre>{@code
- *   --output=YOUR_PROJECT_ID:DATASET_ID.TABLE_ID
+ *   --runner=YOUR_SELECTED_RUNNER
  * }</pre>
+ * See examples/java/README.md for instructions about how to configure different runners.
  *
  * <p>The BigQuery input table defaults to {@code clouddataflow-readonly:samples.weather_stations
}
  * and can be overridden with {@code --input}.

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/c92e45dd/examples/java/src/main/java/org/apache/beam/examples/cookbook/TriggerExample.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/TriggerExample.java
b/examples/java/src/main/java/org/apache/beam/examples/cookbook/TriggerExample.java
index 2630541..68d4d32 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/TriggerExample.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/TriggerExample.java
@@ -73,15 +73,13 @@ import org.joda.time.Instant;
  *   4. Combining late data and speculative estimates
  * </pre>
  *
- * <p> Before running this example, it will be useful to familiarize yourself with
Dataflow triggers
+ * <p> Before running this example, it will be useful to familiarize yourself with
Beam triggers
  * and understand the concept of 'late data',
- * See:  <a href="https://cloud.google.com/dataflow/model/triggers">
- * https://cloud.google.com/dataflow/model/triggers </a> and
- * <a href="https://cloud.google.com/dataflow/model/windowing#Advanced">
- * https://cloud.google.com/dataflow/model/windowing#Advanced </a>
+ * See: <a href="http://beam.incubator.apache.org/use/walkthroughs/">
+ * http://beam.incubator.apache.org/use/walkthroughs/</a>
  *
  * <p> The example is configured to use the default BigQuery table from the example
common package
- * (there are no defaults for a general Dataflow pipeline).
+ * (there are no defaults for a general Beam pipeline).
  * You can override them by using the {@code --bigQueryDataset}, and {@code --bigQueryTable}
  * options. If the BigQuery table do not exist, the example will try to create them.
  *
@@ -155,7 +153,7 @@ public class TriggerExample {
    * 5             | 60                 | 10:27:20   | 10:27:25
    * 5             | 60                 | 10:29:00   | 11:11:00
    *
-   * <p> Dataflow tracks a watermark which records up to what point in event time the
data is
+   * <p> Beam tracks a watermark which records up to what point in event time the data
is
    * complete. For the purposes of the example, we'll assume the watermark is approximately
15m
    * behind the current processing time. In practice, the actual value would vary over time
based
    * on the systems knowledge of the current delay and contents of the backlog (data
@@ -176,7 +174,7 @@ public class TriggerExample {
     public PCollectionList<TableRow> apply(PCollection<KV<String, Integer>>
flowInfo) {
 
       // Concept #1: The default triggering behavior
-      // By default Dataflow uses a trigger which fires when the watermark has passed the
end of the
+      // By default Beam uses a trigger which fires when the watermark has passed the end
of the
       // window. This would be written {@code Repeatedly.forever(AfterWatermark.pastEndOfWindow())}.
 
       // The system also defaults to dropping late data -- data which arrives after the watermark
@@ -459,7 +457,7 @@ public class TriggerExample {
 
     PipelineResult result = pipeline.run();
 
-    // dataflowUtils will try to cancel the pipeline and the injector before the program
exits.
+    // ExampleUtils will try to cancel the pipeline and the injector before the program exits.
     exampleUtils.waitToFinish(result);
   }
 



Mime
View raw message