arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From u..@apache.org
Subject arrow git commit: ARROW-1421: [Python] Extend Python serialization API to accept non-list types
Date Tue, 29 Aug 2017 08:22:53 GMT
Repository: arrow
Updated Branches:
  refs/heads/master e6456b6b3 -> a009aab0c


ARROW-1421: [Python] Extend Python serialization API to accept non-list types

Author: Philipp Moritz <pcmoritz@gmail.com>

Closes #1005 from pcmoritz/serialize-objects and squashes the following commits:

b194df3 [Philipp Moritz] fix comments
5d34215 [Philipp Moritz] do the wrapping in cython
91d5d38 [Philipp Moritz] allow serialization of arbitrary python objects


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/a009aab0
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/a009aab0
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/a009aab0

Branch: refs/heads/master
Commit: a009aab0c956c8b6e1e451e2bc62857cb3912a1a
Parents: e6456b6
Author: Philipp Moritz <pcmoritz@gmail.com>
Authored: Tue Aug 29 10:22:48 2017 +0200
Committer: Uwe L. Korn <uwelk@xhochy.com>
Committed: Tue Aug 29 10:22:48 2017 +0200

----------------------------------------------------------------------
 python/pyarrow/plasma.pyx                  | 7 ++-----
 python/pyarrow/serialization.pxi           | 8 +++++---
 python/pyarrow/tests/test_serialization.py | 6 +++---
 3 files changed, 10 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/a009aab0/python/pyarrow/plasma.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/plasma.pyx b/python/pyarrow/plasma.pyx
index 515b600..b72e49b 100644
--- a/python/pyarrow/plasma.pyx
+++ b/python/pyarrow/plasma.pyx
@@ -388,9 +388,7 @@ cdef class PlasmaClient:
         """
         cdef ObjectID target_id = (object_id if object_id
                                    else ObjectID.from_random())
-        # TODO(pcm): Make serialization code support non-sequences and
-        # get rid of packing the value into a list here (and unpacking in get)
-        serialized = pyarrow.serialize([value])
+        serialized = pyarrow.serialize(value)
         buffer = self.create(target_id, serialized.total_bytes)
         stream = pyarrow.FixedSizeBufferOutputStream(buffer)
         stream.set_memcopy_threads(4)
@@ -426,8 +424,7 @@ cdef class PlasmaClient:
                 # buffers[i] is None if this object was not available within
                 # the timeout
                 if buffers[i]:
-                    value, = pyarrow.deserialize(buffers[i])
-                    results.append(value)
+                    results.append(pyarrow.deserialize(buffers[i]))
                 else:
                     results.append(ObjectNotAvailable)
             return results

http://git-wip-us.apache.org/repos/asf/arrow/blob/a009aab0/python/pyarrow/serialization.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/serialization.pxi b/python/pyarrow/serialization.pxi
index 3ee34ee..062121f 100644
--- a/python/pyarrow/serialization.pxi
+++ b/python/pyarrow/serialization.pxi
@@ -172,8 +172,9 @@ cdef class SerializedPyObject:
             check_status(DeserializeObject(self.data, <PyObject*> self.base,
                                            &result))
 
-        # This is necessary to avoid a memory leak
-        return PyObject_to_object(result)
+        # PyObject_to_object is necessary to avoid a memory leak;
+        # also unpack the list the object was wrapped in in serialize
+        return PyObject_to_object(result)[0]
 
     def to_buffer(self):
         """
@@ -197,8 +198,9 @@ def serialize(object value):
     serialized : SerializedPyObject
     """
     cdef SerializedPyObject serialized = SerializedPyObject()
+    wrapped_value = [value]
     with nogil:
-        check_status(SerializeObject(value, &serialized.data))
+        check_status(SerializeObject(wrapped_value, &serialized.data))
     return serialized
 
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/a009aab0/python/pyarrow/tests/test_serialization.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py
index 12bf65b..b2aa4af 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -221,16 +221,16 @@ def large_memory_map(tmpdir_factory):
 def test_primitive_serialization(large_memory_map):
     with pa.memory_map(large_memory_map, mode="r+") as mmap:
         for obj in PRIMITIVE_OBJECTS:
-            serialization_roundtrip([obj], mmap)
+            serialization_roundtrip(obj, mmap)
 
 
 def test_complex_serialization(large_memory_map):
     with pa.memory_map(large_memory_map, mode="r+") as mmap:
         for obj in COMPLEX_OBJECTS:
-            serialization_roundtrip([obj], mmap)
+            serialization_roundtrip(obj, mmap)
 
 
 def test_custom_serialization(large_memory_map):
     with pa.memory_map(large_memory_map, mode="r+") as mmap:
         for obj in CUSTOM_OBJECTS:
-            serialization_roundtrip([obj], mmap)
+            serialization_roundtrip(obj, mmap)


Mime
View raw message