beam-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From al...@apache.org
Subject [1/2] beam git commit: [BEAM-1320] Fix documentation in the sdk and fail for new errors
Date Wed, 22 Feb 2017 15:56:21 GMT
Repository: beam
Updated Branches:
  refs/heads/master 8b2e8f295 -> d7ed2e23a


[BEAM-1320] Fix documentation in the sdk and fail for new errors


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/228b18e8
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/228b18e8
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/228b18e8

Branch: refs/heads/master
Commit: 228b18e851e535fecfa3ac5086afd299f91dd1c3
Parents: 8b2e8f2
Author: Sourabh Bajaj <sourabhbajaj@google.com>
Authored: Wed Feb 22 02:34:54 2017 -0800
Committer: Ahmet Altay <altay@google.com>
Committed: Wed Feb 22 07:55:44 2017 -0800

----------------------------------------------------------------------
 .../examples/complete/top_wikipedia_sessions.py | 10 ++--
 .../examples/cookbook/bigquery_tornadoes.py     |  5 +-
 .../examples/cookbook/multiple_output_pardo.py  | 11 +++--
 .../apache_beam/examples/snippets/snippets.py   |  6 +++
 .../io/google_cloud_platform/bigquery.py        |  4 +-
 sdks/python/apache_beam/io/iobase.py            |  2 +
 sdks/python/apache_beam/io/source_test_utils.py | 39 +++++++--------
 sdks/python/apache_beam/io/textio.py            |  3 +-
 sdks/python/apache_beam/metrics/execution.py    |  1 +
 sdks/python/apache_beam/metrics/metricbase.py   |  3 +-
 .../runners/direct/bundle_factory.py            |  6 ++-
 sdks/python/apache_beam/transforms/core.py      |  4 +-
 sdks/python/apache_beam/transforms/display.py   |  1 +
 sdks/python/apache_beam/utils/annotations.py    | 50 +++++++++++---------
 sdks/python/apache_beam/utils/profiler.py       |  6 ++-
 sdks/python/generate_pydoc.sh                   | 10 +++-
 16 files changed, 96 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/examples/complete/top_wikipedia_sessions.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/examples/complete/top_wikipedia_sessions.py b/sdks/python/apache_beam/examples/complete/top_wikipedia_sessions.py
index d7fbe30..e6cab18 100644
--- a/sdks/python/apache_beam/examples/complete/top_wikipedia_sessions.py
+++ b/sdks/python/apache_beam/examples/complete/top_wikipedia_sessions.py
@@ -22,19 +22,21 @@ user with the longest string of edits separated by no more than an hour
within
 each 30 day period.
 
 To execute this pipeline locally using the DirectRunner, specify an
-output prefix on GCS:
+output prefix on GCS:::
+
   --output gs://YOUR_OUTPUT_PREFIX
 
 To execute this pipeline using the Google Cloud Dataflow service, specify
-pipeline configuration in addition to the above:
+pipeline configuration in addition to the above:::
+
   --job_name NAME_FOR_YOUR_JOB
   --project YOUR_PROJECT_ID
   --staging_location gs://YOUR_STAGING_DIRECTORY
   --temp_location gs://YOUR_TEMPORARY_DIRECTORY
   --runner DataflowRunner
 
-The default input is gs://dataflow-samples/wikipedia_edits/*.json and can be
-overridden with --input.
+The default input is ``gs://dataflow-samples/wikipedia_edits/*.json`` and can
+be overridden with --input.
 """
 
 from __future__ import absolute_import

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes.py b/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes.py
index 1eade9d..3857111 100644
--- a/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes.py
+++ b/sdks/python/apache_beam/examples/cookbook/bigquery_tornadoes.py
@@ -24,8 +24,9 @@ is a boolean field.
 
 The workflow will compute the number of tornadoes in each month and output
 the results to a table (created if needed) with the following schema:
-  - month: number
-  - tornado_count: number
+
+- month: number
+- tornado_count: number
 
 This example uses the default behavior for BigQuery source and sinks that
 represents table rows as plain Python dictionaries.

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py
index 26e97c7..978e4ed 100644
--- a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py
+++ b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py
@@ -24,25 +24,28 @@ the corresponding result (a PCollection) for that side output.
 
 This is a slightly modified version of the basic wordcount example. In this
 example words are divided into 2 buckets as shorts words (3 characters in length
-or less) and words (all other words). There will be 3 output files:
+or less) and words (all other words). There will be 3 output files:::
 
   [OUTPUT]-chars        :   Character count for the input.
   [OUTPUT]-short-words  :   Word count for short words only.
   [OUTPUT]-words        :   Word count for all other words.
 
 To execute this pipeline locally, specify a local output file or output prefix
-on GCS:
+on GCS:::
+
   --output [YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
 
 To execute this pipeline using the Google Cloud Dataflow service, specify
-pipeline configuration:
+pipeline configuration:::
+
   --project YOUR_PROJECT_ID
   --staging_location gs://YOUR_STAGING_DIRECTORY
   --temp_location gs://YOUR_TEMP_DIRECTORY
   --job_name YOUR_JOB_NAME
   --runner DataflowRunner
 
-and an output prefix on GCS:
+and an output prefix on GCS:::
+
   --output gs://YOUR_OUTPUT_PREFIX
 """
 

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/examples/snippets/snippets.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/examples/snippets/snippets.py b/sdks/python/apache_beam/examples/snippets/snippets.py
index e7f28b0..b5dfe8f 100644
--- a/sdks/python/apache_beam/examples/snippets/snippets.py
+++ b/sdks/python/apache_beam/examples/snippets/snippets.py
@@ -583,6 +583,7 @@ def model_custom_source(count):
   distributed to a large number of end users.
 
   This method runs two pipelines.
+
   (1) A pipeline that uses ``CountingSource`` directly using the ``df.Read``
       transform.
   (2) A pipeline that uses a custom ``PTransform`` that wraps
@@ -591,6 +592,7 @@ def model_custom_source(count):
   Args:
     count: the size of the counting source to be used in the pipeline
            demonstrated in this method.
+
   """
 
   import apache_beam as beam
@@ -708,6 +710,7 @@ def model_custom_sink(simplekv, KVs, final_table_name_no_ptransform,
   distributed to a large number of end users.
 
   This method runs two pipelines.
+
   (1) A pipeline that uses ``SimpleKVSink`` directly using the ``df.Write``
       transform.
   (2) A pipeline that uses a custom ``PTransform`` that wraps
@@ -715,10 +718,13 @@ def model_custom_sink(simplekv, KVs, final_table_name_no_ptransform,
 
   Args:
     simplekv: an object that mocks the key-value storage.
+
     KVs: the set of key-value pairs to be written in the example pipeline.
+
     final_table_name_no_ptransform: the prefix of final set of tables to be
                                     created by the example pipeline that uses
                                     ``SimpleKVSink`` directly.
+
     final_table_name_with_ptransform: the prefix of final set of tables to be
                                       created by the example pipeline that uses
                                       a ``PTransform`` that wraps

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/io/google_cloud_platform/bigquery.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/google_cloud_platform/bigquery.py b/sdks/python/apache_beam/io/google_cloud_platform/bigquery.py
index a5d5ab2..56b3e46 100644
--- a/sdks/python/apache_beam/io/google_cloud_platform/bigquery.py
+++ b/sdks/python/apache_beam/io/google_cloud_platform/bigquery.py
@@ -43,7 +43,7 @@ completely every time a ParDo DoFn gets executed. In the example below the
 lambda function implementing the DoFn for the Map transform will get on each
 call *one* row of the main table and *all* rows of the side table. The runner
 may use some caching techniques to share the side inputs between calls in order
-to avoid excessive reading:
+to avoid excessive reading:::
 
   main_table = pipeline | 'very_big' >> beam.io.Read(beam.io.BigQuerySource()
   side_table = pipeline | 'not_big' >> beam.io.Read(beam.io.BigQuerySource()
@@ -69,7 +69,7 @@ a query.
 
 Users may provide a query to read from rather than reading all of a BigQuery
 table. If specified, the result obtained by executing the specified query will
-be used as the data of the input transform.
+be used as the data of the input transform.::
 
   query_results = pipeline | beam.io.Read(beam.io.BigQuerySource(
       query='SELECT year, mean_temp FROM samples.weather_stations'))

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/io/iobase.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/iobase.py b/sdks/python/apache_beam/io/iobase.py
index 2905111..bd56466 100644
--- a/sdks/python/apache_beam/io/iobase.py
+++ b/sdks/python/apache_beam/io/iobase.py
@@ -86,6 +86,7 @@ class BoundedSource(HasDisplayData):
     ``RangeTracker``.
 
   A runner will perform reading the source in two steps.
+
   (1) Method ``get_range_tracker()`` will be invoked with start and end
       positions to obtain a ``RangeTracker`` for the range of positions the
       runner intends to read. Source must define a default initial start and end
@@ -137,6 +138,7 @@ class BoundedSource(HasDisplayData):
 
     Framework may invoke ``read()`` method with the RangeTracker object returned
     here to read data from the source.
+
     Args:
       start_position: starting position of the range. If 'None' default start
                       position of the source must be used.

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/io/source_test_utils.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/source_test_utils.py b/sdks/python/apache_beam/io/source_test_utils.py
index f5e599f..07de738 100644
--- a/sdks/python/apache_beam/io/source_test_utils.py
+++ b/sdks/python/apache_beam/io/source_test_utils.py
@@ -364,28 +364,29 @@ def assertSplitAtFractionSucceedsAndConsistent(source,
                                                split_fraction):
   """Verifies some consistency properties of dynamic work rebalancing.
 
-  Equivalent to the following pseudocode:
-
-  original_range_tracker = source.getRangeTracker(None, None)
-  original_reader = source.read(original_range_tracker)
-  items_before_split = read N items from original_reader
-  suggested_split_position = original_range_tracker.position_for_fraction(
-    split_fraction)
-  original_stop_position - original_range_tracker.stop_position()
-  split_result = range_tracker.try_split()
-  split_position, split_fraction = split_result
-  primary_range_tracker = source.get_range_tracker(
-    original_range_tracker.start_position(), split_position)
-  residual_range_tracker = source.get_range_tracker(split_position,
-    original_stop_position)
-
-  assert that: items when reading source.read(primary_range_tracker) ==
-    items_before_split + items from continuing to read 'original_reader'
-  assert that: items when reading source.read(original_range_tracker) =
-    items when reading source.read(primary_range_tracker) + items when reading
+  Equivalent to the following pseudocode:::
+
+    original_range_tracker = source.getRangeTracker(None, None)
+    original_reader = source.read(original_range_tracker)
+    items_before_split = read N items from original_reader
+    suggested_split_position = original_range_tracker.position_for_fraction(
+      split_fraction)
+    original_stop_position - original_range_tracker.stop_position()
+    split_result = range_tracker.try_split()
+    split_position, split_fraction = split_result
+    primary_range_tracker = source.get_range_tracker(
+      original_range_tracker.start_position(), split_position)
+    residual_range_tracker = source.get_range_tracker(split_position,
+      original_stop_position)
+
+    assert that: items when reading source.read(primary_range_tracker) ==
+      items_before_split + items from continuing to read 'original_reader'
+    assert that: items when reading source.read(original_range_tracker) =
+      items when reading source.read(primary_range_tracker) + items when reading
     source.read(residual_range_tracker)
 
   Args:
+
     source: source to perform dynamic work rebalancing on.
     num_items_to_read_before_split: number of items to read before splitting.
     split_fraction: fraction to split at.

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/io/textio.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/textio.py b/sdks/python/apache_beam/io/textio.py
index a06ad4b..2ddaf02 100644
--- a/sdks/python/apache_beam/io/textio.py
+++ b/sdks/python/apache_beam/io/textio.py
@@ -336,7 +336,8 @@ class ReadFromText(PTransform):
   UTF-8 encoding. Supports newline delimiters '\n' and '\r\n'.
 
   This implementation only supports reading text encoded using UTF-8 or ASCII.
-  This does not support other encodings such as UTF-16 or UTF-32."""
+  This does not support other encodings such as UTF-16 or UTF-32.
+  """
   def __init__(
       self,
       file_pattern=None,

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/metrics/execution.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/metrics/execution.py b/sdks/python/apache_beam/metrics/execution.py
index 3ba1735..f6c8990 100644
--- a/sdks/python/apache_beam/metrics/execution.py
+++ b/sdks/python/apache_beam/metrics/execution.py
@@ -21,6 +21,7 @@ Internal classes for Metrics API.
 The classes in this file keep shared state, and organize metrics information.
 
 Available classes:
+
 - MetricKey - Internal key for a metric.
 - MetricResult - Current status of a metric's updates/commits.
 - MetricsEnvironment - Keeps track of MetricsContainer and other metrics

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/metrics/metricbase.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/metrics/metricbase.py b/sdks/python/apache_beam/metrics/metricbase.py
index 1ad6962..fa0ca75 100644
--- a/sdks/python/apache_beam/metrics/metricbase.py
+++ b/sdks/python/apache_beam/metrics/metricbase.py
@@ -18,9 +18,10 @@
 """
 The classes in this file are interfaces for metrics. They are not intended
 to be subclassed or created directly by users. To work with and access metrics,
- users should use the classes and methods exposed in metric.py.
+users should use the classes and methods exposed in metric.py.
 
 Available classes:
+
 - Metric - Base interface of a metrics object.
 - Counter - Counter metric interface. Allows a count to be incremented or
     decremented during pipeline execution.

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/runners/direct/bundle_factory.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/direct/bundle_factory.py b/sdks/python/apache_beam/runners/direct/bundle_factory.py
index 38a0484..63319af 100644
--- a/sdks/python/apache_beam/runners/direct/bundle_factory.py
+++ b/sdks/python/apache_beam/runners/direct/bundle_factory.py
@@ -62,7 +62,8 @@ class Bundle(object):
   to restore WindowedValues upon get_elements() call.
 
   When this optimization is not desired, it can be avoided by an option when
-  creating bundles, like:
+  creating bundles, like:::
+
     b = Bundle(stacked=False)
   """
 
@@ -71,7 +72,8 @@ class Bundle(object):
 
     It must be initialized from a single WindowedValue.
 
-    Example:
+    Example:::
+
       s = StackedWindowedValues(windowed_value)
       if (another_windowed_value.timestamp == s.timestamp and
           another_windowed_value.windows == s.windows):

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/transforms/core.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/core.py b/sdks/python/apache_beam/transforms/core.py
index 2efe38d..7abd784 100644
--- a/sdks/python/apache_beam/transforms/core.py
+++ b/sdks/python/apache_beam/transforms/core.py
@@ -142,8 +142,6 @@ class DoFn(WithTypeHints, HasDisplayData):
 
     Args:
       element: The element to be processed
-      context: a DoFnProcessContext object containing. See the
-        DoFnProcessContext documentation for details.
       *args: side inputs
       **kwargs: keyword side inputs
     """
@@ -915,7 +913,7 @@ class CombinePerKey(PTransformWithSideInputs):
   Args:
     pcoll: input pcollection.
     fn: instance of CombineFn to apply to all values under the same key in
-      pcoll, or a callable whose signature is f(iterable, *args, **kwargs)
+      pcoll, or a callable whose signature is ``f(iterable, *args, **kwargs)``
       (e.g., sum, max).
     *args: arguments and side inputs, passed directly to the CombineFn.
     **kwargs: arguments and side inputs, passed directly to the CombineFn.

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/transforms/display.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/display.py b/sdks/python/apache_beam/transforms/display.py
index 6e74512..5e25060 100644
--- a/sdks/python/apache_beam/transforms/display.py
+++ b/sdks/python/apache_beam/transforms/display.py
@@ -25,6 +25,7 @@ add static display data to a component, you can override the display_data
 method of the HasDisplayData class.
 
 Available classes:
+
 - HasDisplayData - Components that inherit from this class can have static
     display data shown in the UI.
 - DisplayDataItem - This class represents static display data elements.

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/utils/annotations.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/utils/annotations.py b/sdks/python/apache_beam/utils/annotations.py
index aa53554..1ec0848 100644
--- a/sdks/python/apache_beam/utils/annotations.py
+++ b/sdks/python/apache_beam/utils/annotations.py
@@ -25,33 +25,37 @@ Both 'deprecated' and 'experimental' annotations can specify the
 current recommended version to use by means of a 'current' parameter.
 
 The following example illustrates how to annotate coexisting versions of the
-same function 'multiply'.
-def multiply(arg1, arg2):
-  print arg1, '*', arg2, '=',
-  return arg1*arg2
+same function 'multiply'.::
+
+  def multiply(arg1, arg2):
+    print arg1, '*', arg2, '=',
+    return arg1*arg2
 
 # This annotation marks 'old_multiply' as deprecated since 'v.1' and suggests
-# using 'multiply' instead.
-@deprecated(since='v.1', current='multiply')
-def old_multiply(arg1, arg2):
-  result = 0
-  for i in xrange(arg1):
-      result += arg2
-  print arg1, '*', arg2, '(the old way)=',
-  return result
+# using 'multiply' instead.::
+
+  @deprecated(since='v.1', current='multiply')
+  def old_multiply(arg1, arg2):
+    result = 0
+    for i in xrange(arg1):
+        result += arg2
+    print arg1, '*', arg2, '(the old way)=',
+    return result
 
 # This annotation marks 'exp_multiply' as experimental and suggests
-# using 'multiply' instead.
-@experimental(since='v.1', current='multiply')
-def exp_multiply(arg1, arg2):
-  print arg1, '*', arg2, '(the experimental way)=',
-  return (arg1*arg2)*(arg1/arg2)*(arg2/arg1)
-
-# Set a warning filter to control how often warnings are produced
-warnings.simplefilter("always")
-print multiply(5, 6)
-print old_multiply(5,6)
-print exp_multiply(5,6)
+# using 'multiply' instead.::
+
+  @experimental(since='v.1', current='multiply')
+  def exp_multiply(arg1, arg2):
+    print arg1, '*', arg2, '(the experimental way)=',
+    return (arg1*arg2)*(arg1/arg2)*(arg2/arg1)
+
+# Set a warning filter to control how often warnings are produced.::
+
+  warnings.simplefilter("always")
+  print multiply(5, 6)
+  print old_multiply(5,6)
+  print exp_multiply(5,6)
 """
 
 import warnings

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/apache_beam/utils/profiler.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/utils/profiler.py b/sdks/python/apache_beam/utils/profiler.py
index 3599f98..2a2df17 100644
--- a/sdks/python/apache_beam/utils/profiler.py
+++ b/sdks/python/apache_beam/utils/profiler.py
@@ -72,7 +72,8 @@ class Profile(object):
 
 class MemoryReporter(object):
   """A memory reporter that reports the memory usage and heap profile.
-  Usage:
+  Usage:::
+
     mr = MemoryReporter(interval_second=30.0)
     mr.start()
     while ...
@@ -88,7 +89,8 @@ class MemoryReporter(object):
       while ...
         <do some thing>
 
-  Also it could report on demand without continuous reporting.
+  Also it could report on demand without continuous reporting.::
+
     mr = MemoryReporter()  # default interval 60s but not started.
     <do something>
     mr.report_once()

http://git-wip-us.apache.org/repos/asf/beam/blob/228b18e8/sdks/python/generate_pydoc.sh
----------------------------------------------------------------------
diff --git a/sdks/python/generate_pydoc.sh b/sdks/python/generate_pydoc.sh
index f96a649..d680f30 100755
--- a/sdks/python/generate_pydoc.sh
+++ b/sdks/python/generate_pydoc.sh
@@ -29,7 +29,10 @@ set -e
 mkdir -p target/docs/source
 
 # Exclude autogenerated API message definition files that aren't part of SDK.
-excluded_internal_clients=(apache_beam/internal/clients/)
+excluded_internal_clients=(
+    apache_beam/internal/clients/
+    apache_beam/io/google_cloud_platform/internal/clients/
+    apache_beam/runners/google_cloud_dataflow/internal/clients/)
 python $(type -p sphinx-apidoc) -f -o target/docs/source apache_beam \
     "${excluded_internal_clients[@]}"
 
@@ -59,8 +62,11 @@ EOF
 
 # Build the documentation using sphinx
 python $(type -p sphinx-build) -q target/docs/source target/docs/_build -c target/docs/source
\
-    -w "/tmp/sphinx-build.warnings.log"
+    -w "target/docs/sphinx-build.warnings.log"
 
 # Message is useful only when this script is run locally.  In a remote
 # test environment, this path will be removed when the test completes.
 echo "Browse to file://$PWD/target/docs/_build/index.html"
+
+# Fail if there are errors in docs
+! grep -q "ERROR:" target/docs/sphinx-build.warnings.log


Mime
View raw message