arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [arrow] branch master updated: ARROW-1893: [Python] Convert memoryview to bytes when loading from pickle in Python 2.7
Date Thu, 07 Dec 2017 16:01:03 GMT
This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new aaa978b  ARROW-1893: [Python] Convert memoryview to bytes when loading from pickle
in Python 2.7
aaa978b is described below

commit aaa978b9c072d3c436838ad2a35b0f6bde1891b6
Author: Wes McKinney <wes.mckinney@twosigma.com>
AuthorDate: Thu Dec 7 11:00:56 2017 -0500

    ARROW-1893: [Python] Convert memoryview to bytes when loading from pickle in Python 2.7
    
    It seems somewhere in the 2.7.x series, Python 2.7 acquired the ability to load from memoryview.
To be on the safe side, we'll always convert memoryview to bytes. Here's a related workaround
from IPython:
    
    https://github.com/ipython/ipython_genutils/blob/master/ipython_genutils/py3compat.py#L153
    
    Author: Wes McKinney <wes.mckinney@twosigma.com>
    
    Closes #1398 from wesm/ARROW-1893 and squashes the following commits:
    
    cbe69134 [Wes McKinney] Not all versions of Python 2.7 can load pickles directly from
memoryview
---
 python/pyarrow/compat.py             |  4 +++-
 python/pyarrow/serialization.py      | 13 +++++++++----
 python/pyarrow/tests/test_parquet.py | 20 +++++++++++---------
 3 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py
index 866cbdd..1b19ca0 100644
--- a/python/pyarrow/compat.py
+++ b/python/pyarrow/compat.py
@@ -70,7 +70,7 @@ else:
 
 
 if PY2:
-    import cPickle
+    import cPickle as builtin_pickle
 
     try:
         from cdecimal import Decimal
@@ -107,6 +107,8 @@ if PY2:
     def unichar(s):
         return unichr(s)
 else:
+    import pickle as builtin_pickle
+
     unicode_type = str
     def lzip(*x):
         return list(zip(*x))
diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py
index b6d2b02..3059dfc 100644
--- a/python/pyarrow/serialization.py
+++ b/python/pyarrow/serialization.py
@@ -16,18 +16,19 @@
 # under the License.
 
 from collections import OrderedDict, defaultdict
+import six
 import sys
-import pickle
 
 import numpy as np
 
 from pyarrow import serialize_pandas, deserialize_pandas
+from pyarrow.compat import builtin_pickle
 from pyarrow.lib import _default_serialization_context, frombuffer
 
 try:
     import cloudpickle
 except ImportError:
-    cloudpickle = pickle
+    cloudpickle = builtin_pickle
 
 
 # ----------------------------------------------------------------------
@@ -44,12 +45,16 @@ def _deserialize_numpy_array_list(data):
 
 
 def _pickle_to_buffer(x):
-    pickled = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
+    pickled = builtin_pickle.dumps(x, protocol=builtin_pickle.HIGHEST_PROTOCOL)
     return frombuffer(pickled)
 
 
 def _load_pickle_from_buffer(data):
-    return pickle.loads(memoryview(data))
+    as_memoryview = memoryview(data)
+    if six.PY2:
+        return builtin_pickle.loads(as_memoryview.tobytes())
+    else:
+        return builtin_pickle.loads(as_memoryview)
 
 
 _serialize_numpy_array_pickle = _pickle_to_buffer
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index d17d89e..2543e7d 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -1570,19 +1570,21 @@ carat        cut  color  clarity  depth  table  price     x     y
    z
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize('precision', range(1, 39))
-def test_decimal_roundtrip(tmpdir, precision):
+def test_decimal_roundtrip(tmpdir):
     num_values = 10
 
     columns = {}
 
-    for scale in range(0, precision + 1):
-        with util.random_seed(0):
-            random_decimal_values = [
-                util.randdecimal(precision, scale) for _ in range(num_values)
-            ]
-        column_name = 'dec_precision_{:d}_scale_{:d}'.format(precision, scale)
-        columns[column_name] = random_decimal_values
+    for precision in range(1, 39):
+        for scale in range(0, precision + 1):
+            with util.random_seed(0):
+                random_decimal_values = [
+                    util.randdecimal(precision, scale)
+                    for _ in range(num_values)
+                ]
+            column_name = ('dec_precision_{:d}_scale_{:d}'
+                           .format(precision, scale))
+            columns[column_name] = random_decimal_values
 
     expected = pd.DataFrame(columns)
     filename = tmpdir.join('decimals.parquet')

-- 
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <commits@arrow.apache.org>'].

Mime
View raw message