Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 73B55200BC1 for ; Wed, 16 Nov 2016 22:18:59 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 727CE160B08; Wed, 16 Nov 2016 21:18:59 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 9295E160B02 for ; Wed, 16 Nov 2016 22:18:58 +0100 (CET) Received: (qmail 67414 invoked by uid 500); 16 Nov 2016 21:18:57 -0000 Mailing-List: contact commits-help@arrow.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@arrow.apache.org Delivered-To: mailing list commits@arrow.apache.org Received: (qmail 67405 invoked by uid 99); 16 Nov 2016 21:18:57 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 16 Nov 2016 21:18:57 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id A47CEE08B3; Wed, 16 Nov 2016 21:18:57 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wesm@apache.org To: commits@arrow.apache.org Message-Id: <51bfd81f8a5e44a4b41db538ab72daee@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: arrow git commit: ARROW-371: Handle pandas-nullable types correctly Date: Wed, 16 Nov 2016 21:18:57 +0000 (UTC) archived-at: Wed, 16 Nov 2016 21:18:59 -0000 Repository: arrow Updated Branches: refs/heads/master 48f9780a8 -> 78288b5fc ARROW-371: Handle pandas-nullable types correctly Author: Uwe L. Korn Closes #205 from xhochy/ARROW-371 and squashes the following commits: 1f73e8b [Uwe L. Korn] ARROW-371: Handle pandas-nullable types correctly Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/78288b5f Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/78288b5f Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/78288b5f Branch: refs/heads/master Commit: 78288b5fca8ff527257e487d45c7e68f7dbd8cd2 Parents: 48f9780 Author: Uwe L. Korn Authored: Wed Nov 16 16:18:50 2016 -0500 Committer: Wes McKinney Committed: Wed Nov 16 16:18:50 2016 -0500 ---------------------------------------------------------------------- python/pyarrow/tests/test_convert_pandas.py | 22 +++++++++++- python/src/pyarrow/adapters/pandas.cc | 46 ++++++++++++------------ 2 files changed, 44 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/78288b5f/python/pyarrow/tests/test_convert_pandas.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index 5530299..b527ca7 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -165,7 +165,7 @@ class TestPandasConversion(unittest.TestCase): expected = pd.DataFrame({'strings': values * repeats}) self._check_pandas_roundtrip(df, expected) - def test_timestamps_notimezone(self): + def test_timestamps_notimezone_no_nulls(self): df = pd.DataFrame({ 'datetime64': np.array([ '2007-07-13T01:23:34.123', @@ -184,6 +184,26 @@ class TestPandasConversion(unittest.TestCase): }) self._check_pandas_roundtrip(df, timestamps_to_ms=False) + def test_timestamps_notimezone_nulls(self): + df = pd.DataFrame({ + 'datetime64': np.array([ + '2007-07-13T01:23:34.123', + None, + '2010-08-13T05:46:57.437'], + dtype='datetime64[ms]') + }) + df.info() + self._check_pandas_roundtrip(df, timestamps_to_ms=True) + + df = pd.DataFrame({ + 'datetime64': np.array([ + '2007-07-13T01:23:34.123456789', + None, + '2010-08-13T05:46:57.437699912'], + dtype='datetime64[ns]') + }) + self._check_pandas_roundtrip(df, timestamps_to_ms=False) + # def test_category(self): # repeats = 1000 # values = [b'foo', None, u'bar', 'qux', np.nan] http://git-wip-us.apache.org/repos/asf/arrow/blob/78288b5f/python/src/pyarrow/adapters/pandas.cc ---------------------------------------------------------------------- diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc index 6a3966b..1f5b700 100644 --- a/python/src/pyarrow/adapters/pandas.cc +++ b/python/src/pyarrow/adapters/pandas.cc @@ -489,20 +489,20 @@ struct arrow_traits { static constexpr int npy_type = NPY_BOOL; static constexpr bool supports_nulls = false; static constexpr bool is_boolean = true; - static constexpr bool is_integer = false; - static constexpr bool is_floating = false; + static constexpr bool is_pandas_numeric_not_nullable = false; + static constexpr bool is_pandas_numeric_nullable = false; }; -#define INT_DECL(TYPE) \ - template <> \ - struct arrow_traits { \ - static constexpr int npy_type = NPY_##TYPE; \ - static constexpr bool supports_nulls = false; \ - static constexpr double na_value = NAN; \ - static constexpr bool is_boolean = false; \ - static constexpr bool is_integer = true; \ - static constexpr bool is_floating = false; \ - typedef typename npy_traits::value_type T; \ +#define INT_DECL(TYPE) \ + template <> \ + struct arrow_traits { \ + static constexpr int npy_type = NPY_##TYPE; \ + static constexpr bool supports_nulls = false; \ + static constexpr double na_value = NAN; \ + static constexpr bool is_boolean = false; \ + static constexpr bool is_pandas_numeric_not_nullable = true; \ + static constexpr bool is_pandas_numeric_nullable = false; \ + typedef typename npy_traits::value_type T; \ }; INT_DECL(INT8); @@ -520,8 +520,8 @@ struct arrow_traits { static constexpr bool supports_nulls = true; static constexpr float na_value = NAN; static constexpr bool is_boolean = false; - static constexpr bool is_integer = false; - static constexpr bool is_floating = true; + static constexpr bool is_pandas_numeric_not_nullable = false; + static constexpr bool is_pandas_numeric_nullable = true; typedef typename npy_traits::value_type T; }; @@ -531,8 +531,8 @@ struct arrow_traits { static constexpr bool supports_nulls = true; static constexpr double na_value = NAN; static constexpr bool is_boolean = false; - static constexpr bool is_integer = false; - static constexpr bool is_floating = true; + static constexpr bool is_pandas_numeric_not_nullable = false; + static constexpr bool is_pandas_numeric_nullable = true; typedef typename npy_traits::value_type T; }; @@ -542,8 +542,8 @@ struct arrow_traits { static constexpr bool supports_nulls = true; static constexpr int64_t na_value = std::numeric_limits::min(); static constexpr bool is_boolean = false; - static constexpr bool is_integer = true; - static constexpr bool is_floating = false; + static constexpr bool is_pandas_numeric_not_nullable = false; + static constexpr bool is_pandas_numeric_nullable = true; typedef typename npy_traits::value_type T; }; @@ -552,8 +552,8 @@ struct arrow_traits { static constexpr int npy_type = NPY_OBJECT; static constexpr bool supports_nulls = true; static constexpr bool is_boolean = false; - static constexpr bool is_integer = false; - static constexpr bool is_floating = false; + static constexpr bool is_pandas_numeric_not_nullable = false; + static constexpr bool is_pandas_numeric_nullable = false; }; @@ -655,7 +655,7 @@ class ArrowDeserializer { template inline typename std::enable_if< - arrow_traits::is_floating, Status>::type + arrow_traits::is_pandas_numeric_nullable, Status>::type ConvertValues(const std::shared_ptr& arr) { typedef typename arrow_traits::T T; @@ -668,7 +668,7 @@ class ArrowDeserializer { T* out_values = reinterpret_cast(PyArray_DATA(out_)); for (int64_t i = 0; i < arr->length(); ++i) { - out_values[i] = arr->IsNull(i) ? NAN : in_values[i]; + out_values[i] = arr->IsNull(i) ? arrow_traits::na_value : in_values[i]; } } else { // Zero-Copy. We can pass the data pointer directly to NumPy. @@ -683,7 +683,7 @@ class ArrowDeserializer { // Integer specialization template inline typename std::enable_if< - arrow_traits::is_integer, Status>::type + arrow_traits::is_pandas_numeric_not_nullable, Status>::type ConvertValues(const std::shared_ptr& arr) { typedef typename arrow_traits::T T;