arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-1593: [Python] Pass through preserve_index to RecordBatch.from_pandas in serialize_pandas
Date Tue, 10 Oct 2017 01:03:00 GMT
Repository: arrow
Updated Branches:
  refs/heads/master a0555c04d -> bf2e3ab29


ARROW-1593: [Python] Pass through preserve_index to RecordBatch.from_pandas in serialize_pandas

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #1190 from wesm/ARROW-1593 and squashes the following commits:

4cfde4b4 [Wes McKinney] Also test passing preserve_index=True
04dc0171 [Wes McKinney] Pass through preserve_index to RecordBatch.from_pandas in serialize_pandas


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/bf2e3ab2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/bf2e3ab2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/bf2e3ab2

Branch: refs/heads/master
Commit: bf2e3ab2979ce59002c616369c62822696b7948f
Parents: a0555c0
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Mon Oct 9 21:02:48 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Mon Oct 9 21:02:48 2017 -0400

----------------------------------------------------------------------
 python/pyarrow/ipc.py            |  8 ++++++--
 python/pyarrow/pandas_compat.py  |  4 ++--
 python/pyarrow/tests/test_ipc.py | 13 +++++++++++++
 3 files changed, 21 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/bf2e3ab2/python/pyarrow/ipc.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/ipc.py b/python/pyarrow/ipc.py
index 8cb6cdd..f264f08 100644
--- a/python/pyarrow/ipc.py
+++ b/python/pyarrow/ipc.py
@@ -142,7 +142,7 @@ def open_file(source, footer_offset=None):
     return RecordBatchFileReader(source, footer_offset=footer_offset)
 
 
-def serialize_pandas(df, nthreads=None):
+def serialize_pandas(df, nthreads=None, preserve_index=True):
     """Serialize a pandas DataFrame into a buffer protocol compatible object.
 
     Parameters
@@ -150,13 +150,17 @@ def serialize_pandas(df, nthreads=None):
     df : pandas.DataFrame
     nthreads : int, default None
         Number of threads to use for conversion to Arrow, default all CPUs
+    preserve_index : boolean, default True
+        If True, preserve the pandas index data, otherwise the result will have
+        a default RangeIndex
 
     Returns
     -------
     buf : buffer
         An object compatible with the buffer protocol
     """
-    batch = pa.RecordBatch.from_pandas(df, nthreads=nthreads)
+    batch = pa.RecordBatch.from_pandas(df, nthreads=nthreads,
+                                       preserve_index=preserve_index)
     sink = pa.BufferOutputStream()
     writer = pa.RecordBatchStreamWriter(sink, batch.schema)
     writer.write_batch(batch)

http://git-wip-us.apache.org/repos/asf/arrow/blob/bf2e3ab2/python/pyarrow/pandas_compat.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index a071e56..5592d8d 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -485,8 +485,8 @@ def table_to_blockmanager(options, table, memory_pool, nthreads=1):
             )
         ]
         new_levels = [
-            level if level.dtype == dtype else level.astype(dtype)
-            for level, dtype in levels_dtypes
+            _level if _level.dtype == _dtype else _level.astype(_dtype)
+            for _level, _dtype in levels_dtypes
         ]
         columns = pd.MultiIndex(
             levels=new_levels,

http://git-wip-us.apache.org/repos/asf/arrow/blob/bf2e3ab2/python/pyarrow/tests/test_ipc.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 0d5b673..fcde582 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -416,6 +416,19 @@ def test_pandas_serialize_round_trip_not_string_columns():
     assert_frame_equal(result, df)
 
 
+def test_serialize_pandas_no_preserve_index():
+    df = pd.DataFrame({'a': [1, 2, 3]}, index=[1, 2, 3])
+    expected = pd.DataFrame({'a': [1, 2, 3]})
+
+    buf = pa.serialize_pandas(df, preserve_index=False)
+    result = pa.deserialize_pandas(buf)
+    assert_frame_equal(result, expected)
+
+    buf = pa.serialize_pandas(df, preserve_index=True)
+    result = pa.deserialize_pandas(buf)
+    assert_frame_equal(result, df)
+
+
 def test_schema_batch_serialize_methods():
     nrows = 5
     df = pd.DataFrame({


Mime
View raw message