arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-622 [Python] deprecate timestamps_to_ms in .from_pandas()
Date Thu, 07 Sep 2017 18:09:07 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 3f2fa0381 -> b698227e9


ARROW-622 [Python] deprecate timestamps_to_ms in .from_pandas()

xref https://github.com/pandas-dev/pandas/issues/17438

this was not fully resolved in https://github.com/apache/arrow/pull/944

Author: Jeff Reback <jeff@reback.net>

Closes #1046 from jreback/warn and squashes the following commits:

382592f [Jeff Reback] deprecate timestamps_to_ms in .from_pandas()


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/b698227e
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/b698227e
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/b698227e

Branch: refs/heads/master
Commit: b698227e9c20bfc24dbf04b747b062881f1f1ad7
Parents: 3f2fa03
Author: Jeff Reback <jeff@reback.net>
Authored: Thu Sep 7 14:09:01 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Thu Sep 7 14:09:01 2017 -0400

----------------------------------------------------------------------
 python/pyarrow/array.pxi                    |  2 +
 python/pyarrow/table.pxi                    |  3 ++
 python/pyarrow/tests/test_convert_pandas.py | 62 ++++++------------------
 3 files changed, 19 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/b698227e/python/pyarrow/array.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 20e778d..a693f45 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -121,6 +121,8 @@ cdef class Array:
             compatibility with other functionality like Parquet I/O which
             only supports milliseconds.
 
+            .. deprecated:: 0.7.0
+
         memory_pool: MemoryPool, optional
             Specific memory pool to use to allocate the resulting Arrow array.
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/b698227e/python/pyarrow/table.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 245371f..fc6099f 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -723,6 +723,9 @@ cdef class Table:
             Convert datetime columns to ms resolution. This is needed for
             compability with other functionality like Parquet I/O which
             only supports milliseconds.
+
+            .. deprecated:: 0.7.0
+
         schema : pyarrow.Schema, optional
             The expected schema of the Arrow Table. This can be used to
             indicate the type of columns if we cannot infer it automatically.

http://git-wip-us.apache.org/repos/asf/arrow/blob/b698227e/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 52290d6..6442434 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -69,10 +69,10 @@ class TestPandasConversion(unittest.TestCase):
         pass
 
     def _check_pandas_roundtrip(self, df, expected=None, nthreads=1,
-                                timestamps_to_ms=False, expected_schema=None,
+                                expected_schema=None,
                                 check_dtype=True, schema=None,
                                 check_index=False):
-        table = pa.Table.from_pandas(df, timestamps_to_ms=timestamps_to_ms,
+        table = pa.Table.from_pandas(df,
                                      schema=schema, preserve_index=check_index)
         result = table.to_pandas(nthreads=nthreads)
         if expected_schema:
@@ -92,9 +92,8 @@ class TestPandasConversion(unittest.TestCase):
         tm.assert_series_equal(s, result)
 
     def _check_array_roundtrip(self, values, expected=None, mask=None,
-                               timestamps_to_ms=False, type=None):
-        arr = pa.Array.from_pandas(values, timestamps_to_ms=timestamps_to_ms,
-                                   mask=mask, type=type)
+                               type=None):
+        arr = pa.Array.from_pandas(values, mask=mask, type=type)
         result = arr.to_pandas()
 
         values_nulls = pd.isnull(values)
@@ -334,21 +333,6 @@ class TestPandasConversion(unittest.TestCase):
     def test_timestamps_notimezone_no_nulls(self):
         df = pd.DataFrame({
             'datetime64': np.array([
-                '2007-07-13T01:23:34.123',
-                '2006-01-13T12:34:56.432',
-                '2010-08-13T05:46:57.437'],
-                dtype='datetime64[ms]')
-        })
-        field = pa.field('datetime64', pa.timestamp('ms'))
-        schema = pa.schema([field])
-        self._check_pandas_roundtrip(
-            df,
-            timestamps_to_ms=True,
-            expected_schema=schema,
-        )
-
-        df = pd.DataFrame({
-            'datetime64': np.array([
                 '2007-07-13T01:23:34.123456789',
                 '2006-01-13T12:34:56.432539784',
                 '2010-08-13T05:46:57.437699912'],
@@ -357,7 +341,8 @@ class TestPandasConversion(unittest.TestCase):
         field = pa.field('datetime64', pa.timestamp('ns'))
         schema = pa.schema([field])
         self._check_pandas_roundtrip(
-            df, expected_schema=schema,
+            df,
+            expected_schema=schema,
         )
 
     def test_timestamps_to_ms_explicit_schema(self):
@@ -365,8 +350,10 @@ class TestPandasConversion(unittest.TestCase):
         df = pd.DataFrame({'datetime': [datetime(2017, 1, 1)]})
         pa_type = pa.from_numpy_dtype(df['datetime'].dtype)
 
-        arr = pa.Array.from_pandas(df['datetime'], type=pa_type,
-                                   timestamps_to_ms=True)
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            arr = pa.Array.from_pandas(df['datetime'], type=pa_type,
+                                       timestamps_to_ms=True)
 
         tm.assert_almost_equal(df['datetime'].values.astype('M8[ms]'),
                                arr.to_pandas())
@@ -374,21 +361,6 @@ class TestPandasConversion(unittest.TestCase):
     def test_timestamps_notimezone_nulls(self):
         df = pd.DataFrame({
             'datetime64': np.array([
-                '2007-07-13T01:23:34.123',
-                None,
-                '2010-08-13T05:46:57.437'],
-                dtype='datetime64[ms]')
-        })
-        field = pa.field('datetime64', pa.timestamp('ms'))
-        schema = pa.schema([field])
-        self._check_pandas_roundtrip(
-            df,
-            timestamps_to_ms=True,
-            expected_schema=schema,
-        )
-
-        df = pd.DataFrame({
-            'datetime64': np.array([
                 '2007-07-13T01:23:34.123456789',
                 None,
                 '2010-08-13T05:46:57.437699912'],
@@ -397,7 +369,8 @@ class TestPandasConversion(unittest.TestCase):
         field = pa.field('datetime64', pa.timestamp('ns'))
         schema = pa.schema([field])
         self._check_pandas_roundtrip(
-            df, expected_schema=schema,
+            df,
+            expected_schema=schema,
         )
 
     def test_timestamps_with_timezone(self):
@@ -410,7 +383,7 @@ class TestPandasConversion(unittest.TestCase):
         })
         df['datetime64'] = (df['datetime64'].dt.tz_localize('US/Eastern')
                             .to_frame())
-        self._check_pandas_roundtrip(df, timestamps_to_ms=True)
+        self._check_pandas_roundtrip(df)
 
         self._check_series_roundtrip(df['datetime64'])
 
@@ -425,15 +398,8 @@ class TestPandasConversion(unittest.TestCase):
         })
         df['datetime64'] = (df['datetime64'].dt.tz_localize('US/Eastern')
                             .to_frame())
-        self._check_pandas_roundtrip(df)
-
-    def test_timestamp_with_tz_to_pandas_type(self):
-        from pyarrow.compat import DatetimeTZDtype
 
-        tz = 'America/Los_Angeles'
-        t = pa.timestamp('ns', tz=tz)
-
-        assert t.to_pandas_dtype() == DatetimeTZDtype('ns', tz=tz)
+        self._check_pandas_roundtrip(df)
 
     def test_date_infer(self):
         df = pd.DataFrame({


Mime
View raw message