beam-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rober...@apache.org
Subject [1/2] incubator-beam git commit: Making sure that GcsBufferedReader implements the iterator protocol
Date Fri, 07 Oct 2016 23:40:32 GMT
Repository: incubator-beam
Updated Branches:
  refs/heads/python-sdk 4c1ad11d1 -> 7d0758b64


Making sure that GcsBufferedReader implements the iterator protocol

Adding raise StopIteration to GcsBufferedReader.next

Adding unit tests, and __next__ for Python 3

Fixing some lint issues in unit test

Improving test readability.


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/07c7aafb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/07c7aafb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/07c7aafb

Branch: refs/heads/python-sdk
Commit: 07c7aafbab1f4e700f30467dbd84cea3ca19d76e
Parents: 4c1ad11
Author: polecito.em@gmail.com <polecito.em@gmail.com>
Authored: Thu Sep 29 15:41:25 2016 -0700
Committer: Robert Bradshaw <robertwb@gmail.com>
Committed: Fri Oct 7 16:40:23 2016 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/io/gcsio.py      | 16 ++++++++++++++++
 sdks/python/apache_beam/io/gcsio_test.py | 21 +++++++++++++++++++++
 2 files changed, 37 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/07c7aafb/sdks/python/apache_beam/io/gcsio.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcsio.py b/sdks/python/apache_beam/io/gcsio.py
index 5a83004..9fcce5b 100644
--- a/sdks/python/apache_beam/io/gcsio.py
+++ b/sdks/python/apache_beam/io/gcsio.py
@@ -307,6 +307,22 @@ class GcsBufferedReader(object):
   def _get_object_metadata(self, get_request):
     return self.client.objects.Get(get_request)
 
+  def __iter__(self):
+    return self
+
+  def __next__(self):
+    """Read one line delimited by '\\n' from the file.
+    """
+    return self.next()
+
+  def next(self):
+    """Read one line delimited by '\\n' from the file.
+    """
+    line = self.readline()
+    if not line:
+      raise StopIteration
+    return line
+
   def read(self, size=-1):
     """Read data from a GCS file.
 

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/07c7aafb/sdks/python/apache_beam/io/gcsio_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcsio_test.py b/sdks/python/apache_beam/io/gcsio_test.py
index 919e9d2..2e9945a 100644
--- a/sdks/python/apache_beam/io/gcsio_test.py
+++ b/sdks/python/apache_beam/io/gcsio_test.py
@@ -341,6 +341,27 @@ class TestGCSIO(unittest.TestCase):
           f.read(end - start + 1), random_file.contents[start:end + 1])
       self.assertEqual(f.tell(), end + 1)
 
+  def test_file_iterator(self):
+    file_name = 'gs://gcsio-test/iterating_file'
+    lines = []
+    line_count = 10
+    for _ in range(line_count):
+      line_length = random.randint(100, 500)
+      line = os.urandom(line_length).replace('\n', ' ') + '\n'
+      lines.append(line)
+
+    contents = ''.join(lines)
+    bucket, name = gcsio.parse_gcs_path(file_name)
+    self.client.objects.add_file(FakeFile(bucket, name, contents, 1))
+
+    f = self.gcs.open(file_name)
+
+    read_lines = 0
+    for line in f:
+      read_lines += 1
+
+    self.assertEqual(read_lines, line_count)
+
   def test_file_read_line(self):
     file_name = 'gs://gcsio-test/read_line_file'
     lines = []


Mime
View raw message