arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-1630: [Serialization] Support Python datetime objects
Date Thu, 12 Oct 2017 16:12:40 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 60cb1c372 -> 0d1e69c23


ARROW-1630: [Serialization] Support Python datetime objects

An additional pair of eyes would be helpful, somewhat strangely the tests are passing for
some datetime objects and not for others.

Author: Philipp Moritz <pcmoritz@gmail.com>

Closes #1153 from pcmoritz/serialize-datetime and squashes the following commits:

f3696ae4 [Philipp Moritz] add numpy to LICENSE.txt
a94bca7d [Philipp Moritz] put PyDateTime_IMPORT higher up
0ae645e9 [Philipp Moritz] windows fixes
cbd1b222 [Philipp Moritz] get rid of gmtime_r
f3ea6699 [Philipp Moritz] use numpy datetime code to implement time conversions
e644f4f5 [Philipp Moritz] linting
f38cbd46 [Philipp Moritz] fixes
6e549c47 [Philipp Moritz] serialize datetime


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/0d1e69c2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/0d1e69c2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/0d1e69c2

Branch: refs/heads/master
Commit: 0d1e69c2391f90d1eec60cd8304b956b855fe1c1
Parents: 60cb1c3
Author: Philipp Moritz <pcmoritz@gmail.com>
Authored: Thu Oct 12 12:12:35 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Thu Oct 12 12:12:35 2017 -0400

----------------------------------------------------------------------
 LICENSE.txt                                |  39 ++++
 NOTICE.txt                                 |   5 -
 cpp/src/arrow/python/arrow_to_python.cc    |   8 +
 cpp/src/arrow/python/python_to_arrow.cc    |  14 ++
 cpp/src/arrow/python/util/datetime.h       | 255 +++++++++++++++++++-----
 python/pyarrow/tests/test_serialization.py |  24 +++
 6 files changed, 286 insertions(+), 59 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/0d1e69c2/LICENSE.txt
----------------------------------------------------------------------
diff --git a/LICENSE.txt b/LICENSE.txt
index 34879f6..00cb9ec 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -359,3 +359,42 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
+
+--------------------------------------------------------------------------------
+
+This project includes code from the NumPy project.
+
+https://github.com/numpy/numpy/blob/e1f191c46f2eebd6cb892a4bfe14d9dd43a06c4e/numpy/core/src/multiarray/multiarraymodule.c#L2910
+
+https://github.com/numpy/numpy/blob/68fd82271b9ea5a9e50d4e761061dfcca851382a/numpy/core/src/multiarray/datetime.c
+
+Copyright (c) 2005-2017, NumPy Developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+       copyright notice, this list of conditions and the following
+       disclaimer in the documentation and/or other materials provided
+       with the distribution.
+
+    * Neither the name of the NumPy Developers nor the names of any
+       contributors may be used to endorse or promote products derived
+       from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

http://git-wip-us.apache.org/repos/asf/arrow/blob/0d1e69c2/NOTICE.txt
----------------------------------------------------------------------
diff --git a/NOTICE.txt b/NOTICE.txt
index c02e75f..875ad74 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -8,11 +8,6 @@ This product includes software from the SFrame project (BSD, 3-clause).
 * Copyright (C) 2015 Dato, Inc.
 * Copyright (c) 2009 Carnegie Mellon University.
 
-This product includes software from the Numpy project (BSD-new)
- https://github.com/numpy/numpy/blob/e1f191c46f2eebd6cb892a4bfe14d9dd43a06c4e/numpy/core/src/multiarray/multiarraymodule.c#L2910
- * Copyright (c) 1995, 1996, 1997 Jim Hugunin, hugunin@mit.edu
- * Copyright (c) 2005 Travis E. Oliphant oliphant@ee.byu.edu Brigham Young University
-
 This product includes software from the Feather project (Apache 2.0)
 https://github.com/wesm/feather
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/0d1e69c2/cpp/src/arrow/python/arrow_to_python.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/arrow_to_python.cc b/cpp/src/arrow/python/arrow_to_python.cc
index b4f4a41..de05a23 100644
--- a/cpp/src/arrow/python/arrow_to_python.cc
+++ b/cpp/src/arrow/python/arrow_to_python.cc
@@ -28,6 +28,7 @@
 #include "arrow/python/helpers.h"
 #include "arrow/python/numpy_convert.h"
 #include "arrow/python/python_to_arrow.h"
+#include "arrow/python/util/datetime.h"
 #include "arrow/table.h"
 #include "arrow/util/logging.h"
 
@@ -126,6 +127,12 @@ Status GetValue(PyObject* context, const Array& arr, int64_t index,
int32_t type
     case Type::DOUBLE:
       *result = PyFloat_FromDouble(static_cast<const DoubleArray&>(arr).Value(index));
       return Status::OK();
+    case Type::DATE64: {
+      RETURN_NOT_OK(PyDateTime_from_int(static_cast<const Date64Array&>(arr).Value(index),
+                                        TimeUnit::MICRO, result));
+      RETURN_IF_PYERROR();
+      return Status::OK();
+    }
     case Type::STRUCT: {
       const auto& s = static_cast<const StructArray&>(arr);
       const auto& l = static_cast<const ListArray&>(*s.field(0));
@@ -248,6 +255,7 @@ Status ReadSerializedObject(io::RandomAccessFile* src, SerializedPyObject*
out)
 Status DeserializeObject(PyObject* context, const SerializedPyObject& obj, PyObject*
base,
                          PyObject** out) {
   PyAcquireGIL lock;
+  PyDateTime_IMPORT;
   return DeserializeList(context, *obj.batch->column(0), 0, obj.batch->num_rows(),
base,
                          obj.tensors, out);
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/0d1e69c2/cpp/src/arrow/python/python_to_arrow.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc
index a693a08..e2d7452 100644
--- a/cpp/src/arrow/python/python_to_arrow.cc
+++ b/cpp/src/arrow/python/python_to_arrow.cc
@@ -36,6 +36,7 @@
 #include "arrow/python/helpers.h"
 #include "arrow/python/numpy_convert.h"
 #include "arrow/python/platform.h"
+#include "arrow/python/util/datetime.h"
 #include "arrow/tensor.h"
 #include "arrow/util/logging.h"
 
@@ -59,6 +60,7 @@ class SequenceBuilder {
         strings_(pool),
         floats_(::arrow::float32(), pool),
         doubles_(::arrow::float64(), pool),
+        date64s_(::arrow::date64(), pool),
         tensor_indices_(::arrow::int32(), pool),
         list_offsets_({0}),
         tuple_offsets_({0}),
@@ -125,6 +127,11 @@ class SequenceBuilder {
     return AppendPrimitive(data, &double_tag_, &doubles_);
   }
 
+  /// Appending a Date64 timestamp to the sequence
+  Status AppendDate64(const int64_t timestamp) {
+    return AppendPrimitive(timestamp, &date64_tag_, &date64s_);
+  }
+
   /// Appending a tensor to the sequence
   ///
   /// \param tensor_index Index of the tensor in the object.
@@ -217,6 +224,7 @@ class SequenceBuilder {
     RETURN_NOT_OK(AddElement(bytes_tag_, &bytes_));
     RETURN_NOT_OK(AddElement(float_tag_, &floats_));
     RETURN_NOT_OK(AddElement(double_tag_, &doubles_));
+    RETURN_NOT_OK(AddElement(date64_tag_, &date64s_));
     RETURN_NOT_OK(AddElement(tensor_tag_, &tensor_indices_));
 
     RETURN_NOT_OK(AddSubsequence(list_tag_, list_data, list_offsets_, "list"));
@@ -244,6 +252,7 @@ class SequenceBuilder {
   StringBuilder strings_;
   FloatBuilder floats_;
   DoubleBuilder doubles_;
+  Date64Builder date64s_;
 
   // We use an Int32Builder here to distinguish the tensor indices from
   // the ints_ above (see the case Type::INT32 in get_value in python.cc).
@@ -267,6 +276,7 @@ class SequenceBuilder {
   int8_t bytes_tag_ = -1;
   int8_t float_tag_ = -1;
   int8_t double_tag_ = -1;
+  int8_t date64_tag_ = -1;
 
   int8_t tensor_tag_ = -1;
   int8_t list_tag_ = -1;
@@ -485,6 +495,9 @@ Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder,
                                  subdicts, tensors_out));
   } else if (elem == Py_None) {
     RETURN_NOT_OK(builder->AppendNone());
+  } else if (PyDateTime_CheckExact(elem)) {
+    PyDateTime_DateTime* datetime = reinterpret_cast<PyDateTime_DateTime*>(elem);
+    RETURN_NOT_OK(builder->AppendDate64(PyDateTime_to_us(datetime)));
   } else {
     // Attempt to serialize the object using the custom callback.
     PyObject* serialized_object;
@@ -656,6 +669,7 @@ std::shared_ptr<RecordBatch> MakeBatch(std::shared_ptr<Array>
data) {
 
 Status SerializeObject(PyObject* context, PyObject* sequence, SerializedPyObject* out) {
   PyAcquireGIL lock;
+  PyDateTime_IMPORT;
   std::vector<PyObject*> sequences = {sequence};
   std::shared_ptr<Array> array;
   std::vector<PyObject*> py_tensors;

http://git-wip-us.apache.org/repos/asf/arrow/blob/0d1e69c2/cpp/src/arrow/python/util/datetime.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/util/datetime.h b/cpp/src/arrow/python/util/datetime.h
index 4ebef72..01fbc18 100644
--- a/cpp/src/arrow/python/util/datetime.h
+++ b/cpp/src/arrow/python/util/datetime.h
@@ -18,12 +18,149 @@
 #ifndef PYARROW_UTIL_DATETIME_H
 #define PYARROW_UTIL_DATETIME_H
 
+#include <algorithm>
+#include <sstream>
+
 #include <datetime.h>
+#include "arrow/status.h"
+#include "arrow/util/logging.h"
 #include "arrow/python/platform.h"
 
 namespace arrow {
 namespace py {
 
+// The following code is adapted from
+// https://github.com/numpy/numpy/blob/master/numpy/core/src/multiarray/datetime.c
+
+// Days per month, regular year and leap year
+static int64_t _days_per_month_table[2][12] = {
+    { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
+    { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
+};
+
+static bool is_leapyear(int64_t year) {
+    return (year & 0x3) == 0 && // year % 4 == 0
+           ((year % 100) != 0 ||
+            (year % 400) == 0);
+}
+
+// Calculates the days offset from the 1970 epoch.
+static int64_t get_days_from_date(int64_t date_year,
+                                  int64_t date_month,
+                                  int64_t date_day) {
+    int64_t i, month;
+    int64_t year, days = 0;
+    int64_t *month_lengths;
+
+    year = date_year - 1970;
+    days = year * 365;
+
+    // Adjust for leap years
+    if (days >= 0) {
+        // 1968 is the closest leap year before 1970.
+        // Exclude the current year, so add 1.
+        year += 1;
+        // Add one day for each 4 years
+        days += year / 4;
+        // 1900 is the closest previous year divisible by 100
+        year += 68;
+        // Subtract one day for each 100 years
+        days -= year / 100;
+        // 1600 is the closest previous year divisible by 400
+        year += 300;
+        // Add one day for each 400 years
+        days += year / 400;
+    } else {
+        // 1972 is the closest later year after 1970.
+        // Include the current year, so subtract 2.
+        year -= 2;
+        // Subtract one day for each 4 years
+        days += year / 4;
+        // 2000 is the closest later year divisible by 100
+        year -= 28;
+        // Add one day for each 100 years
+        days -= year / 100;
+        // 2000 is also the closest later year divisible by 400
+        // Subtract one day for each 400 years
+        days += year / 400;
+    }
+
+    month_lengths = _days_per_month_table[is_leapyear(date_year)];
+    month = date_month - 1;
+
+    // Add the months
+    for (i = 0; i < month; ++i) {
+        days += month_lengths[i];
+    }
+
+    // Add the days
+    days += date_day - 1;
+
+    return days;
+}
+
+// Modifies '*days_' to be the day offset within the year,
+// and returns the year.
+static int64_t days_to_yearsdays(int64_t* days_) {
+    const int64_t days_per_400years = (400*365 + 100 - 4 + 1);
+    // Adjust so it's relative to the year 2000 (divisible by 400)
+    int64_t days = (*days_) - (365*30 + 7);
+    int64_t year;
+
+    // Break down the 400 year cycle to get the year and day within the year
+    if (days >= 0) {
+        year = 400 * (days / days_per_400years);
+        days = days % days_per_400years;
+    } else {
+        year = 400 * ((days - (days_per_400years - 1)) / days_per_400years);
+        days = days % days_per_400years;
+        if (days < 0) {
+            days += days_per_400years;
+        }
+    }
+
+    // Work out the year/day within the 400 year cycle
+    if (days >= 366) {
+        year += 100 * ((days-1) / (100*365 + 25 - 1));
+        days = (days-1) % (100*365 + 25 - 1);
+        if (days >= 365) {
+            year += 4 * ((days+1) / (4*365 + 1));
+            days = (days+1) % (4*365 + 1);
+            if (days >= 366) {
+                year += (days-1) / 365;
+                days = (days-1) % 365;
+            }
+        }
+    }
+
+    *days_ = days;
+    return year + 2000;
+}
+
+// Extracts the month and year and day number from a number of days
+static void get_date_from_days(int64_t days,
+                               int64_t* date_year,
+                               int64_t* date_month,
+                               int64_t* date_day) {
+    int64_t *month_lengths, i;
+
+    *date_year = days_to_yearsdays(&days);
+    month_lengths = _days_per_month_table[is_leapyear(*date_year)];
+
+    for (i = 0; i < 12; ++i) {
+        if (days < month_lengths[i]) {
+            *date_month = i + 1;
+            *date_day = days + 1;
+            return;
+        } else {
+            days -= month_lengths[i];
+        }
+    }
+
+    // Should never get here
+    return;
+}
+
 static inline int64_t PyTime_to_us(PyObject* pytime) {
   return (static_cast<int64_t>(PyDateTime_TIME_GET_HOUR(pytime)) * 3600000000LL +
           static_cast<int64_t>(PyDateTime_TIME_GET_MINUTE(pytime)) * 60000000LL +
@@ -31,9 +168,28 @@ static inline int64_t PyTime_to_us(PyObject* pytime) {
           PyDateTime_TIME_GET_MICROSECOND(pytime));
 }
 
-static inline Status PyTime_from_int(int64_t val, const TimeUnit::type unit,
-                                     PyObject** out) {
-  int64_t hour = 0, minute = 0, second = 0, microsecond = 0;
+
+// Splitting time quantities, for example splitting total seconds into
+// minutes and remaining seconds. After we run
+// int64_t remaining = split_time(total, quotient, &next)
+// we have
+// total = next * quotient + remaining. Handles negative values by propagating
+// them: If total is negative, next will be negative and remaining will
+// always be non-negative.
+static inline int64_t split_time(int64_t total, int64_t quotient, int64_t* next) {
+  int64_t r = total % quotient;
+  if (r < 0) {
+    *next = total / quotient - 1;
+    return r + quotient;
+  } else {
+    *next = total / quotient;
+    return r;
+  }
+}
+
+static inline Status PyTime_convert_int(int64_t val, const TimeUnit::type unit,
+                                        int64_t *hour, int64_t *minute,
+                                        int64_t *second, int64_t *microsecond) {
   switch (unit) {
     case TimeUnit::NANO:
       if (val % 1000 != 0) {
@@ -44,75 +200,66 @@ static inline Status PyTime_from_int(int64_t val, const TimeUnit::type
unit,
       val /= 1000;
     // fall through
     case TimeUnit::MICRO:
-      microsecond = val - (val / 1000000LL) * 1000000LL;
-      val /= 1000000LL;
-      second = val - (val / 60) * 60;
-      val /= 60;
-      minute = val - (val / 60) * 60;
-      hour = val / 60;
+      *microsecond = split_time(val, 1000000LL, &val);
+      *second = split_time(val, 60, &val);
+      *minute = split_time(val, 60, hour);
       break;
     case TimeUnit::MILLI:
-      microsecond = (val - (val / 1000) * 1000) * 1000;
-      val /= 1000;
+      *microsecond = split_time(val, 1000, &val) * 1000;
     // fall through
     case TimeUnit::SECOND:
-      second = val - (val / 60) * 60;
-      val /= 60;
-      minute = val - (val / 60) * 60;
-      hour = val / 60;
+      *second = split_time(val, 60, &val);
+      *minute = split_time(val, 60, hour);
       break;
     default:
       break;
   }
+  return Status::OK();
+}
+
+static inline Status PyTime_from_int(int64_t val, const TimeUnit::type unit,
+                                     PyObject** out) {
+  int64_t hour = 0, minute = 0, second = 0, microsecond = 0;
+  RETURN_NOT_OK(PyTime_convert_int(val, unit, &hour, &minute, &second, &microsecond));
   *out = PyTime_FromTime(static_cast<int32_t>(hour), static_cast<int32_t>(minute),
                          static_cast<int32_t>(second), static_cast<int32_t>(microsecond));
   return Status::OK();
 }
 
+static inline Status PyDateTime_from_int(int64_t val, const TimeUnit::type unit,
+                                         PyObject** out) {
+  int64_t hour = 0, minute = 0, second = 0, microsecond = 0;
+  RETURN_NOT_OK(PyTime_convert_int(val, unit, &hour, &minute, &second, &microsecond));
+  int64_t total_days = 0;
+  hour = split_time(hour, 24, &total_days);
+  int64_t year = 0, month = 0, day = 0;
+  get_date_from_days(total_days, &year, &month, &day);
+  *out = PyDateTime_FromDateAndTime(static_cast<int32_t>(year),
+                                    static_cast<int32_t>(month),
+                                    static_cast<int32_t>(day),
+                                    static_cast<int32_t>(hour),
+                                    static_cast<int32_t>(minute),
+                                    static_cast<int32_t>(second),
+                                    static_cast<int32_t>(microsecond));
+  return Status::OK();
+}
+
 static inline int64_t PyDate_to_ms(PyDateTime_Date* pydate) {
-  struct tm date;
-  memset(&date, 0, sizeof(struct tm));
-  date.tm_year = PyDateTime_GET_YEAR(pydate) - 1900;
-  date.tm_mon = PyDateTime_GET_MONTH(pydate) - 1;
-  date.tm_mday = PyDateTime_GET_DAY(pydate);
-  struct tm epoch;
-  memset(&epoch, 0, sizeof(struct tm));
-
-  epoch.tm_year = 70;
-  epoch.tm_mday = 1;
-#ifdef _MSC_VER
-  // Milliseconds since the epoch
-  const int64_t current_timestamp = static_cast<int64_t>(_mkgmtime64(&date));
-  const int64_t epoch_timestamp = static_cast<int64_t>(_mkgmtime64(&epoch));
-  return (current_timestamp - epoch_timestamp) * 1000LL;
-#else
-  return lrint(difftime(mktime(&date), mktime(&epoch)) * 1000);
-#endif
+  int64_t total_seconds = 0;
+  total_seconds += PyDateTime_DATE_GET_SECOND(pydate);
+  total_seconds += PyDateTime_DATE_GET_MINUTE(pydate) * 60;
+  total_seconds += PyDateTime_DATE_GET_HOUR(pydate) * 3600;
+  int64_t days = get_days_from_date(PyDateTime_GET_YEAR(pydate),
+                                    PyDateTime_GET_MONTH(pydate),
+                                    PyDateTime_GET_DAY(pydate));
+  total_seconds += days * 24 * 3600;
+  return total_seconds * 1000;
 }
 
 static inline int64_t PyDateTime_to_us(PyDateTime_DateTime* pydatetime) {
-  struct tm datetime;
-  memset(&datetime, 0, sizeof(struct tm));
-  datetime.tm_year = PyDateTime_GET_YEAR(pydatetime) - 1900;
-  datetime.tm_mon = PyDateTime_GET_MONTH(pydatetime) - 1;
-  datetime.tm_mday = PyDateTime_GET_DAY(pydatetime);
-  datetime.tm_hour = PyDateTime_DATE_GET_HOUR(pydatetime);
-  datetime.tm_min = PyDateTime_DATE_GET_MINUTE(pydatetime);
-  datetime.tm_sec = PyDateTime_DATE_GET_SECOND(pydatetime);
+  int64_t ms = PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime));
   int us = PyDateTime_DATE_GET_MICROSECOND(pydatetime);
-  struct tm epoch;
-  memset(&epoch, 0, sizeof(struct tm));
-  epoch.tm_year = 70;
-  epoch.tm_mday = 1;
-#ifdef _MSC_VER
-  // Microseconds since the epoch
-  const int64_t current_timestamp = static_cast<int64_t>(_mkgmtime64(&datetime));
-  const int64_t epoch_timestamp = static_cast<int64_t>(_mkgmtime64(&epoch));
-  return (current_timestamp - epoch_timestamp) * 1000000L + us;
-#else
-  return static_cast<int64_t>(
-      lrint(difftime(mktime(&datetime), mktime(&epoch))) * 1000000 + us);
-#endif
+  return ms * 1000 + us;
 }
 
 static inline int32_t PyDate_to_days(PyDateTime_Date* pydate) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/0d1e69c2/python/pyarrow/tests/test_serialization.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py
index 09fc4aa..5441b9f 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -20,6 +20,7 @@ from __future__ import division
 import pytest
 
 from collections import namedtuple, OrderedDict, defaultdict
+import datetime
 import string
 import sys
 
@@ -311,6 +312,29 @@ def test_numpy_serialization(large_memory_map):
             serialization_roundtrip(obj, mmap)
 
 
+def test_datetime_serialization(large_memory_map):
+    data = [# Principia Mathematica published
+            datetime.datetime(year=1687, month=7, day=5),
+            # Some random date
+            datetime.datetime(year=1911, month=6, day=3, hour=4,
+                              minute=55, second=44),
+            # End of WWI
+            datetime.datetime(year=1918, month=11, day=11),
+            # Beginning of UNIX time
+            datetime.datetime(year=1970, month=1, day=1),
+            # The Berlin wall falls
+            datetime.datetime(year=1989, month=11, day=9),
+            # Another random date
+            datetime.datetime(year=2011, month=6, day=3, hour=4,
+                              minute=0, second=3),
+            # Another random date
+            datetime.datetime(year=1970, month=1, day=3, hour=4,
+                              minute=0, second=0)]
+    with pa.memory_map(large_memory_map, mode="r+") as mmap:
+        for d in data:
+            serialization_roundtrip(d, mmap)
+
+
 def test_numpy_immutable(large_memory_map):
     with pa.memory_map(large_memory_map, mode="r+") as mmap:
         obj = np.zeros([10])


Mime
View raw message