arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-727: [Python] Ensure that NativeFile.write accepts any bytes, unicode, or object providing buffer protocol. Rename build_arrow_buffer to pyarrow.frombuffer
Date Thu, 30 Mar 2017 22:42:58 GMT
Repository: arrow
Updated Branches:
  refs/heads/master ba4f478e7 -> edd6cfcd9


ARROW-727: [Python] Ensure that NativeFile.write accepts any bytes, unicode, or object providing
buffer protocol. Rename build_arrow_buffer to pyarrow.frombuffer

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #464 from wesm/ARROW-727 and squashes the following commits:

c93edb0 [Wes McKinney] Rename build_arrow_buffer to pyarrow.frombuffer. Ensure that NativeFile.write
accepts any bytes, unicode, or object providing buffer protocol


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/edd6cfcd
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/edd6cfcd
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/edd6cfcd

Branch: refs/heads/master
Commit: edd6cfcd9bfc02b2ed093f22acf830a57422f7b3
Parents: ba4f478
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Thu Mar 30 18:42:52 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Thu Mar 30 18:42:52 2017 -0400

----------------------------------------------------------------------
 python/pyarrow/__init__.py      |  3 ++-
 python/pyarrow/io.pyx           | 16 ++++++++++------
 python/pyarrow/tests/test_io.py | 29 +++++++++++++++++++++++------
 3 files changed, 35 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/edd6cfcd/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index c6f0be0..dce4389 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -43,7 +43,8 @@ from pyarrow.error import ArrowException
 
 from pyarrow.filesystem import Filesystem, HdfsClient, LocalFilesystem
 from pyarrow.io import (HdfsFile, NativeFile, PythonFileInterface,
-                        Buffer, InMemoryOutputStream, BufferReader)
+                        Buffer, InMemoryOutputStream, BufferReader,
+                        frombuffer)
 
 from pyarrow.ipc import FileReader, FileWriter, StreamReader, StreamWriter
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/edd6cfcd/python/pyarrow/io.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/io.pyx b/python/pyarrow/io.pyx
index d528bdc..d64427a 100644
--- a/python/pyarrow/io.pyx
+++ b/python/pyarrow/io.pyx
@@ -126,14 +126,18 @@ cdef class NativeFile:
 
     def write(self, data):
         """
-        Write bytes-like (unicode, encoded to UTF-8) to file
+        Write byte from any object implementing buffer protocol (bytes,
+        bytearray, ndarray, pyarrow.Buffer)
         """
         self._assert_writeable()
 
-        data = tobytes(data)
+        if isinstance(data, six.string_types):
+            data = tobytes(data)
 
-        cdef const uint8_t* buf = <const uint8_t*> cp.PyBytes_AS_STRING(data)
-        cdef int64_t bufsize = len(data)
+        cdef Buffer arrow_buffer = frombuffer(data)
+
+        cdef const uint8_t* buf = arrow_buffer.buffer.get().data()
+        cdef int64_t bufsize = len(arrow_buffer)
         with nogil:
             check_status(self.wr_file.get().Write(buf, bufsize))
 
@@ -505,7 +509,7 @@ cdef class BufferReader(NativeFile):
         if isinstance(obj, Buffer):
             self.buffer = obj
         else:
-            self.buffer = build_arrow_buffer(obj)
+            self.buffer = frombuffer(obj)
 
         self.rd_file.reset(new CBufferReader(self.buffer.buffer))
         self.is_readable = 1
@@ -513,7 +517,7 @@ cdef class BufferReader(NativeFile):
         self.is_open = True
 
 
-def build_arrow_buffer(object obj):
+def frombuffer(object obj):
     """
     Construct an Arrow buffer from a Python bytes object
     """

http://git-wip-us.apache.org/repos/asf/arrow/blob/edd6cfcd/python/pyarrow/tests/test_io.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 9cd15c4..15c5e6b 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -23,6 +23,7 @@ import pytest
 import numpy as np
 
 from pyarrow.compat import u, guid
+import pyarrow as pa
 import pyarrow.io as io
 
 # ----------------------------------------------------------------------
@@ -127,28 +128,29 @@ def test_bytes_reader_retains_parent_reference():
 def test_buffer_bytes():
     val = b'some data'
 
-    buf = io.build_arrow_buffer(val)
+    buf = pa.frombuffer(val)
     assert isinstance(buf, io.Buffer)
 
     result = buf.to_pybytes()
 
     assert result == val
 
+
 def test_buffer_memoryview():
     val = b'some data'
 
-    buf = io.build_arrow_buffer(val)
+    buf = pa.frombuffer(val)
     assert isinstance(buf, io.Buffer)
 
     result = memoryview(buf)
 
     assert result == val
 
+
 def test_buffer_bytearray():
     val = bytearray(b'some data')
 
-
-    buf = io.build_arrow_buffer(val)
+    buf = pa.frombuffer(val)
     assert isinstance(buf, io.Buffer)
 
     result = bytearray(buf)
@@ -159,7 +161,7 @@ def test_buffer_bytearray():
 def test_buffer_memoryview_is_immutable():
     val = b'some data'
 
-    buf = io.build_arrow_buffer(val)
+    buf = pa.frombuffer(val)
     assert isinstance(buf, io.Buffer)
 
     result = memoryview(buf)
@@ -198,21 +200,36 @@ def test_inmemory_write_after_closed():
     with pytest.raises(IOError):
         f.write(b'not ok')
 
+
 def test_buffer_protocol_ref_counting():
     import gc
 
     def make_buffer(bytes_obj):
-        return bytearray(io.build_arrow_buffer(bytes_obj))
+        return bytearray(pa.frombuffer(bytes_obj))
 
     buf = make_buffer(b'foo')
     gc.collect()
     assert buf == b'foo'
 
 
+def test_nativefile_write_memoryview():
+    f = io.InMemoryOutputStream()
+    data = b'ok'
+
+    arr = np.frombuffer(data, dtype='S1')
+
+    f.write(arr)
+    f.write(bytearray(data))
+
+    buf = f.get_result()
+
+    assert buf.to_pybytes() == data * 2
+
 
 # ----------------------------------------------------------------------
 # OS files and memory maps
 
+
 @pytest.fixture
 def sample_disk_data(request):
     SIZE = 4096


Mime
View raw message