arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-632: [Python] Add support for FixedWidthBinary type
Date Thu, 30 Mar 2017 23:12:55 GMT
Repository: arrow
Updated Branches:
  refs/heads/master edd6cfcd9 -> 4915ecf1e


ARROW-632: [Python] Add support for FixedWidthBinary type

Author: Phillip Cloud <cpcloud@gmail.com>

Closes #461 from cpcloud/ARROW-632 and squashes the following commits:

134644a [Phillip Cloud] ARROW-632: [Python] Add support for FixedWidthBinary type


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/4915ecf1
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/4915ecf1
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/4915ecf1

Branch: refs/heads/master
Commit: 4915ecf1e1dba625d916604d30f2575e4ddb6439
Parents: edd6cfc
Author: Phillip Cloud <cpcloud@gmail.com>
Authored: Thu Mar 30 19:12:49 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Thu Mar 30 19:12:49 2017 -0400

----------------------------------------------------------------------
 .gitignore                                    |   3 +
 cpp/src/arrow/builder.cc                      |   1 +
 cpp/src/arrow/ipc/ipc-read-write-benchmark.cc |   4 +-
 cpp/src/arrow/ipc/reader.cc                   |   2 +-
 cpp/src/arrow/python/builtin_convert.cc       | 101 +++++++++++++---
 cpp/src/arrow/python/builtin_convert.h        |  17 ++-
 cpp/src/arrow/python/pandas_convert.cc        | 131 ++++++++++++++++++---
 cpp/src/arrow/util/logging.h                  |   7 +-
 python/pyarrow/__init__.py                    |   5 +-
 python/pyarrow/array.pxd                      |   8 ++
 python/pyarrow/array.pyx                      |  16 ++-
 python/pyarrow/includes/libarrow.pxd          |   8 ++
 python/pyarrow/includes/pyarrow.pxd           |   3 +
 python/pyarrow/scalar.pxd                     |   5 +
 python/pyarrow/scalar.pyx                     |  19 ++-
 python/pyarrow/schema.pxd                     |   6 +
 python/pyarrow/schema.pyx                     |  42 +++++--
 python/pyarrow/tests/test_convert_builtin.py  |  13 ++
 python/pyarrow/tests/test_convert_pandas.py   |  17 +++
 python/pyarrow/tests/test_scalars.py          |  14 +++
 20 files changed, 367 insertions(+), 55 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index a00cbba..5e28b36 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,3 +24,6 @@
 *.dylib
 .build_cache_dir
 MANIFEST
+
+cpp/.idea/
+python/.eggs/
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 52a785d..82b6214 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -542,6 +542,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>&
type,
     BUILDER_CASE(DOUBLE, DoubleBuilder);
     BUILDER_CASE(STRING, StringBuilder);
     BUILDER_CASE(BINARY, BinaryBuilder);
+    BUILDER_CASE(FIXED_WIDTH_BINARY, FixedWidthBinaryBuilder);
     case Type::LIST: {
       std::shared_ptr<ArrayBuilder> value_builder;
       std::shared_ptr<DataType> value_type =

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/cpp/src/arrow/ipc/ipc-read-write-benchmark.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-read-write-benchmark.cc b/cpp/src/arrow/ipc/ipc-read-write-benchmark.cc
index 1aecdbc..b385929 100644
--- a/cpp/src/arrow/ipc/ipc-read-write-benchmark.cc
+++ b/cpp/src/arrow/ipc/ipc-read-write-benchmark.cc
@@ -80,7 +80,7 @@ static void BM_WriteRecordBatch(benchmark::State& state) {  // NOLINT
non-const
     int32_t metadata_length;
     int64_t body_length;
     if (!ipc::WriteRecordBatch(*record_batch, 0, &stream, &metadata_length, &body_length,
-             default_memory_pool())
+            default_memory_pool())
              .ok()) {
       state.SkipWithError("Failed to write!");
     }
@@ -101,7 +101,7 @@ static void BM_ReadRecordBatch(benchmark::State& state) {  // NOLINT
non-const r
   int32_t metadata_length;
   int64_t body_length;
   if (!ipc::WriteRecordBatch(*record_batch, 0, &stream, &metadata_length, &body_length,
-           default_memory_pool())
+          default_memory_pool())
            .ok()) {
     state.SkipWithError("Failed to write!");
   }

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/cpp/src/arrow/ipc/reader.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index b47b773..00ea20c 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -32,8 +32,8 @@
 #include "arrow/ipc/util.h"
 #include "arrow/status.h"
 #include "arrow/table.h"
-#include "arrow/type.h"
 #include "arrow/tensor.h"
+#include "arrow/type.h"
 #include "arrow/util/logging.h"
 
 namespace arrow {

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/cpp/src/arrow/python/builtin_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
index 6e59845..72e8677 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -23,6 +23,7 @@
 
 #include "arrow/api.h"
 #include "arrow/status.h"
+#include "arrow/util/logging.h"
 
 #include "arrow/python/helpers.h"
 #include "arrow/python/util/datetime.h"
@@ -200,18 +201,25 @@ class SeqVisitor {
   int nesting_histogram_[MAX_NESTING_LEVELS];
 };
 
-// Non-exhaustive type inference
-Status InferArrowType(PyObject* obj, int64_t* size, std::shared_ptr<DataType>* out_type)
{
-  *size = PySequence_Size(obj);
+Status InferArrowSize(PyObject* obj, int64_t* size) {
+  *size = static_cast<int64_t>(PySequence_Size(obj));
   if (PyErr_Occurred()) {
     // Not a sequence
     PyErr_Clear();
     return Status::TypeError("Object is not a sequence");
   }
+  return Status::OK();
+}
+
+// Non-exhaustive type inference
+Status InferArrowTypeAndSize(
+    PyObject* obj, int64_t* size, std::shared_ptr<DataType>* out_type) {
+  RETURN_NOT_OK(InferArrowSize(obj, size));
 
   // For 0-length sequences, refuse to guess
   if (*size == 0) { *out_type = null(); }
 
+  PyDateTime_IMPORT;
   SeqVisitor seq_visitor;
   RETURN_NOT_OK(seq_visitor.Visit(obj));
   RETURN_NOT_OK(seq_visitor.Validate());
@@ -253,7 +261,7 @@ class TypedConverter : public SeqConverter {
 class BoolConverter : public TypedConverter<BooleanBuilder> {
  public:
   Status AppendData(PyObject* seq) override {
-    Py_ssize_t size = PySequence_Size(seq);
+    int64_t size = static_cast<int64_t>(PySequence_Size(seq));
     RETURN_NOT_OK(typed_builder_->Reserve(size));
     for (int64_t i = 0; i < size; ++i) {
       OwnedRef item(PySequence_GetItem(seq, i));
@@ -275,14 +283,14 @@ class Int64Converter : public TypedConverter<Int64Builder> {
  public:
   Status AppendData(PyObject* seq) override {
     int64_t val;
-    Py_ssize_t size = PySequence_Size(seq);
+    int64_t size = static_cast<int64_t>(PySequence_Size(seq));
     RETURN_NOT_OK(typed_builder_->Reserve(size));
     for (int64_t i = 0; i < size; ++i) {
       OwnedRef item(PySequence_GetItem(seq, i));
       if (item.obj() == Py_None) {
         typed_builder_->AppendNull();
       } else {
-        val = PyLong_AsLongLong(item.obj());
+        val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
         RETURN_IF_PYERROR();
         typed_builder_->Append(val);
       }
@@ -294,7 +302,7 @@ class Int64Converter : public TypedConverter<Int64Builder> {
 class DateConverter : public TypedConverter<Date64Builder> {
  public:
   Status AppendData(PyObject* seq) override {
-    Py_ssize_t size = PySequence_Size(seq);
+    int64_t size = static_cast<int64_t>(PySequence_Size(seq));
     RETURN_NOT_OK(typed_builder_->Reserve(size));
     for (int64_t i = 0; i < size; ++i) {
       OwnedRef item(PySequence_GetItem(seq, i));
@@ -312,7 +320,7 @@ class DateConverter : public TypedConverter<Date64Builder> {
 class TimestampConverter : public TypedConverter<TimestampBuilder> {
  public:
   Status AppendData(PyObject* seq) override {
-    Py_ssize_t size = PySequence_Size(seq);
+    int64_t size = static_cast<int64_t>(PySequence_Size(seq));
     RETURN_NOT_OK(typed_builder_->Reserve(size));
     for (int64_t i = 0; i < size; ++i) {
       OwnedRef item(PySequence_GetItem(seq, i));
@@ -334,7 +342,8 @@ class TimestampConverter : public TypedConverter<TimestampBuilder>
{
         epoch.tm_year = 70;
         epoch.tm_mday = 1;
         // Microseconds since the epoch
-        int64_t val = lrint(difftime(mktime(&datetime), mktime(&epoch))) * 1000000
+ us;
+        int64_t val = static_cast<int64_t>(
+            lrint(difftime(mktime(&datetime), mktime(&epoch))) * 1000000 + us);
         typed_builder_->Append(val);
       }
     }
@@ -346,7 +355,7 @@ class DoubleConverter : public TypedConverter<DoubleBuilder> {
  public:
   Status AppendData(PyObject* seq) override {
     double val;
-    Py_ssize_t size = PySequence_Size(seq);
+    int64_t size = static_cast<int64_t>(PySequence_Size(seq));
     RETURN_NOT_OK(typed_builder_->Reserve(size));
     for (int64_t i = 0; i < size; ++i) {
       OwnedRef item(PySequence_GetItem(seq, i));
@@ -369,7 +378,7 @@ class BytesConverter : public TypedConverter<BinaryBuilder> {
     PyObject* bytes_obj;
     OwnedRef tmp;
     const char* bytes;
-    int64_t length;
+    Py_ssize_t length;
     Py_ssize_t size = PySequence_Size(seq);
     for (int64_t i = 0; i < size; ++i) {
       item = PySequence_GetItem(seq, i);
@@ -385,7 +394,8 @@ class BytesConverter : public TypedConverter<BinaryBuilder> {
       } else if (PyBytes_Check(item)) {
         bytes_obj = item;
       } else {
-        return Status::TypeError("Non-string value encountered");
+        return Status::TypeError(
+            "Value that cannot be converted to bytes was encountered");
       }
       // No error checking
       length = PyBytes_GET_SIZE(bytes_obj);
@@ -396,6 +406,41 @@ class BytesConverter : public TypedConverter<BinaryBuilder> {
   }
 };
 
+class FixedWidthBytesConverter : public TypedConverter<FixedWidthBinaryBuilder> {
+ public:
+  Status AppendData(PyObject* seq) override {
+    PyObject* item;
+    PyObject* bytes_obj;
+    OwnedRef tmp;
+    Py_ssize_t expected_length = std::dynamic_pointer_cast<FixedWidthBinaryType>(
+        typed_builder_->type())->byte_width();
+    Py_ssize_t size = PySequence_Size(seq);
+    for (int64_t i = 0; i < size; ++i) {
+      item = PySequence_GetItem(seq, i);
+      OwnedRef holder(item);
+
+      if (item == Py_None) {
+        RETURN_NOT_OK(typed_builder_->AppendNull());
+        continue;
+      } else if (PyUnicode_Check(item)) {
+        tmp.reset(PyUnicode_AsUTF8String(item));
+        RETURN_IF_PYERROR();
+        bytes_obj = tmp.obj();
+      } else if (PyBytes_Check(item)) {
+        bytes_obj = item;
+      } else {
+        return Status::TypeError(
+            "Value that cannot be converted to bytes was encountered");
+      }
+      // No error checking
+      RETURN_NOT_OK(CheckPythonBytesAreFixedLength(bytes_obj, expected_length));
+      RETURN_NOT_OK(typed_builder_->Append(
+          reinterpret_cast<const uint8_t*>(PyBytes_AS_STRING(bytes_obj))));
+    }
+    return Status::OK();
+  }
+};
+
 class UTF8Converter : public TypedConverter<StringBuilder> {
  public:
   Status AppendData(PyObject* seq) override {
@@ -403,7 +448,7 @@ class UTF8Converter : public TypedConverter<StringBuilder> {
     PyObject* bytes_obj;
     OwnedRef tmp;
     const char* bytes;
-    int64_t length;
+    Py_ssize_t length;
     Py_ssize_t size = PySequence_Size(seq);
     for (int64_t i = 0; i < size; ++i) {
       item = PySequence_GetItem(seq, i);
@@ -465,6 +510,8 @@ std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>&
type
       return std::make_shared<DoubleConverter>();
     case Type::BINARY:
       return std::make_shared<BytesConverter>();
+    case Type::FIXED_WIDTH_BINARY:
+      return std::make_shared<FixedWidthBytesConverter>();
     case Type::STRING:
       return std::make_shared<UTF8Converter>();
     case Type::LIST:
@@ -472,7 +519,6 @@ std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>&
type
     case Type::STRUCT:
     default:
       return nullptr;
-      break;
   }
 }
 
@@ -492,6 +538,7 @@ Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>&
builder) {
 
 Status AppendPySequence(PyObject* obj, const std::shared_ptr<DataType>& type,
     const std::shared_ptr<ArrayBuilder>& builder) {
+  PyDateTime_IMPORT;
   std::shared_ptr<SeqConverter> converter = GetConverter(type);
   if (converter == nullptr) {
     std::stringstream ss;
@@ -506,9 +553,12 @@ Status AppendPySequence(PyObject* obj, const std::shared_ptr<DataType>&
type,
 Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out)
{
   std::shared_ptr<DataType> type;
   int64_t size;
-  PyDateTime_IMPORT;
-  RETURN_NOT_OK(InferArrowType(obj, &size, &type));
+  RETURN_NOT_OK(InferArrowTypeAndSize(obj, &size, &type));
+  return ConvertPySequence(obj, pool, out, type, size);
+}
 
+Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out,
+    const std::shared_ptr<DataType>& type, int64_t size) {
   // Handle NA / NullType case
   if (type->type == Type::NA) {
     out->reset(new NullArray(size));
@@ -519,9 +569,26 @@ Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>
   std::shared_ptr<ArrayBuilder> builder;
   RETURN_NOT_OK(MakeBuilder(pool, type, &builder));
   RETURN_NOT_OK(AppendPySequence(obj, type, builder));
-
   return builder->Finish(out);
 }
 
+Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out,
+    const std::shared_ptr<DataType>& type) {
+  int64_t size;
+  RETURN_NOT_OK(InferArrowSize(obj, &size));
+  return ConvertPySequence(obj, pool, out, type, size);
+}
+
+Status CheckPythonBytesAreFixedLength(PyObject* obj, Py_ssize_t expected_length) {
+  const Py_ssize_t length = PyBytes_GET_SIZE(obj);
+  if (length != expected_length) {
+    std::stringstream ss;
+    ss << "Found byte string of length " << length << ", expected length
is "
+       << expected_length;
+    return Status::TypeError(ss.str());
+  }
+  return Status::OK();
+}
+
 }  // namespace py
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/cpp/src/arrow/python/builtin_convert.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.h b/cpp/src/arrow/python/builtin_convert.h
index 7b50990..00ff0fd 100644
--- a/cpp/src/arrow/python/builtin_convert.h
+++ b/cpp/src/arrow/python/builtin_convert.h
@@ -38,16 +38,31 @@ class Status;
 
 namespace py {
 
-ARROW_EXPORT arrow::Status InferArrowType(
+ARROW_EXPORT arrow::Status InferArrowTypeAndSize(
     PyObject* obj, int64_t* size, std::shared_ptr<arrow::DataType>* out_type);
+ARROW_EXPORT arrow::Status InferArrowSize(PyObject* obj, int64_t* size);
 
 ARROW_EXPORT arrow::Status AppendPySequence(PyObject* obj,
     const std::shared_ptr<arrow::DataType>& type,
     const std::shared_ptr<arrow::ArrayBuilder>& builder);
 
+// Type and size inference
 ARROW_EXPORT
 Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out);
 
+// Size inference
+ARROW_EXPORT
+Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out,
+    const std::shared_ptr<DataType>& type);
+
+// No inference
+ARROW_EXPORT
+Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out,
+    const std::shared_ptr<DataType>& type, int64_t size);
+
+ARROW_EXPORT Status CheckPythonBytesAreFixedLength(
+    PyObject* obj, Py_ssize_t expected_length);
+
 }  // namespace py
 }  // namespace arrow
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/cpp/src/arrow/python/pandas_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc
index db2e90e..68a8d7d 100644
--- a/cpp/src/arrow/python/pandas_convert.cc
+++ b/cpp/src/arrow/python/pandas_convert.cc
@@ -147,8 +147,8 @@ Status CheckFlatNumpyArray(PyArrayObject* numpy_array, int np_type) {
   return Status::OK();
 }
 
-Status AppendObjectStrings(StringBuilder& string_builder, PyObject** objects,
-    int64_t objects_length, bool* have_bytes) {
+Status AppendObjectStrings(int64_t objects_length, StringBuilder* builder,
+    PyObject** objects, bool* have_bytes) {
   PyObject* obj;
 
   for (int64_t i = 0; i < objects_length; ++i) {
@@ -160,15 +160,45 @@ Status AppendObjectStrings(StringBuilder& string_builder, PyObject**
objects,
         return Status::TypeError("failed converting unicode to UTF8");
       }
       const int32_t length = static_cast<int32_t>(PyBytes_GET_SIZE(obj));
-      Status s = string_builder.Append(PyBytes_AS_STRING(obj), length);
+      Status s = builder->Append(PyBytes_AS_STRING(obj), length);
       Py_DECREF(obj);
       if (!s.ok()) { return s; }
     } else if (PyBytes_Check(obj)) {
       *have_bytes = true;
       const int32_t length = static_cast<int32_t>(PyBytes_GET_SIZE(obj));
-      RETURN_NOT_OK(string_builder.Append(PyBytes_AS_STRING(obj), length));
+      RETURN_NOT_OK(builder->Append(PyBytes_AS_STRING(obj), length));
     } else {
-      string_builder.AppendNull();
+      builder->AppendNull();
+    }
+  }
+
+  return Status::OK();
+}
+
+static Status AppendObjectFixedWidthBytes(int64_t objects_length, int byte_width,
+    FixedWidthBinaryBuilder* builder, PyObject** objects) {
+  PyObject* obj;
+
+  for (int64_t i = 0; i < objects_length; ++i) {
+    obj = objects[i];
+    if (PyUnicode_Check(obj)) {
+      obj = PyUnicode_AsUTF8String(obj);
+      if (obj == NULL) {
+        PyErr_Clear();
+        return Status::TypeError("failed converting unicode to UTF8");
+      }
+
+      RETURN_NOT_OK(CheckPythonBytesAreFixedLength(obj, byte_width));
+      Status s =
+          builder->Append(reinterpret_cast<const uint8_t*>(PyBytes_AS_STRING(obj)));
+      Py_DECREF(obj);
+      RETURN_NOT_OK(s);
+    } else if (PyBytes_Check(obj)) {
+      RETURN_NOT_OK(CheckPythonBytesAreFixedLength(obj, byte_width));
+      RETURN_NOT_OK(
+          builder->Append(reinterpret_cast<const uint8_t*>(PyBytes_AS_STRING(obj))));
+    } else {
+      builder->AppendNull();
     }
   }
 
@@ -192,6 +222,13 @@ struct WrapBytes<BinaryArray> {
   }
 };
 
+template <>
+struct WrapBytes<FixedWidthBinaryArray> {
+  static inline PyObject* Wrap(const uint8_t* data, int64_t length) {
+    return PyBytes_FromStringAndSize(reinterpret_cast<const char*>(data), length);
+  }
+};
+
 static inline bool ListTypeSupported(const Type::type type_id) {
   switch (type_id) {
     case Type::UINT8:
@@ -226,7 +263,7 @@ class PandasConverter : public TypeVisitor {
         arr_(reinterpret_cast<PyArrayObject*>(ao)),
         mask_(nullptr) {
     if (mo != nullptr && mo != Py_None) { mask_ = reinterpret_cast<PyArrayObject*>(mo);
}
-    length_ = PyArray_SIZE(arr_);
+    length_ = static_cast<int64_t>(PyArray_SIZE(arr_));
   }
 
   bool is_strided() const {
@@ -241,7 +278,7 @@ class PandasConverter : public TypeVisitor {
     RETURN_NOT_OK(null_bitmap_->Resize(null_bytes));
 
     null_bitmap_data_ = null_bitmap_->mutable_data();
-    memset(null_bitmap_data_, 0, null_bytes);
+    memset(null_bitmap_data_, 0, static_cast<size_t>(null_bytes));
 
     return Status::OK();
   }
@@ -321,6 +358,8 @@ class PandasConverter : public TypeVisitor {
       const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* out);
 
   Status ConvertObjectStrings(std::shared_ptr<Array>* out);
+  Status ConvertObjectFixedWidthBytes(
+      const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* out);
   Status ConvertBooleans(std::shared_ptr<Array>* out);
   Status ConvertDates(std::shared_ptr<Array>* out);
   Status ConvertLists(const std::shared_ptr<DataType>& type, std::shared_ptr<Array>*
out);
@@ -402,13 +441,13 @@ Status PandasConverter::ConvertObjectStrings(std::shared_ptr<Array>*
out) {
   // and unicode mixed in the object array
 
   PyObject** objects = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
-  StringBuilder string_builder(pool_);
-  RETURN_NOT_OK(string_builder.Resize(length_));
+  StringBuilder builder(pool_);
+  RETURN_NOT_OK(builder.Resize(length_));
 
   Status s;
   bool have_bytes = false;
-  RETURN_NOT_OK(AppendObjectStrings(string_builder, objects, length_, &have_bytes));
-  RETURN_NOT_OK(string_builder.Finish(out));
+  RETURN_NOT_OK(AppendObjectStrings(length_, &builder, objects, &have_bytes));
+  RETURN_NOT_OK(builder.Finish(out));
 
   if (have_bytes) {
     const auto& arr = static_cast<const StringArray&>(*out->get());
@@ -418,6 +457,20 @@ Status PandasConverter::ConvertObjectStrings(std::shared_ptr<Array>*
out) {
   return Status::OK();
 }
 
+Status PandasConverter::ConvertObjectFixedWidthBytes(
+    const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* out) {
+  PyAcquireGIL lock;
+
+  PyObject** objects = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
+  FixedWidthBinaryBuilder builder(pool_, type);
+  RETURN_NOT_OK(builder.Resize(length_));
+  RETURN_NOT_OK(AppendObjectFixedWidthBytes(length_,
+      std::dynamic_pointer_cast<FixedWidthBinaryType>(builder.type())->byte_width(),
+      &builder, objects));
+  RETURN_NOT_OK(builder.Finish(out));
+  return Status::OK();
+}
+
 Status PandasConverter::ConvertBooleans(std::shared_ptr<Array>* out) {
   PyAcquireGIL lock;
 
@@ -474,6 +527,8 @@ Status PandasConverter::ConvertObjects(std::shared_ptr<Array>* out)
{
     switch (type_->type) {
       case Type::STRING:
         return ConvertObjectStrings(out);
+      case Type::FIXED_WIDTH_BINARY:
+        return ConvertObjectFixedWidthBytes(type_, out);
       case Type::BOOL:
         return ConvertBooleans(out);
       case Type::DATE64:
@@ -543,7 +598,7 @@ inline Status PandasConverter::ConvertTypedLists(
       int64_t size;
       std::shared_ptr<DataType> inferred_type;
       RETURN_NOT_OK(list_builder.Append(true));
-      RETURN_NOT_OK(InferArrowType(objects[i], &size, &inferred_type));
+      RETURN_NOT_OK(InferArrowTypeAndSize(objects[i], &size, &inferred_type));
       if (inferred_type->type != type->type) {
         std::stringstream ss;
         ss << inferred_type->ToString() << " cannot be converted to " <<
type->ToString();
@@ -577,14 +632,14 @@ inline Status PandasConverter::ConvertTypedLists<NPY_OBJECT, StringType>(
       // TODO(uwe): Support more complex numpy array structures
       RETURN_NOT_OK(CheckFlatNumpyArray(numpy_array, NPY_OBJECT));
 
-      int64_t size = PyArray_DIM(numpy_array, 0);
+      int64_t size = static_cast<int64_t>(PyArray_DIM(numpy_array, 0));
       auto data = reinterpret_cast<PyObject**>(PyArray_DATA(numpy_array));
-      RETURN_NOT_OK(AppendObjectStrings(*value_builder.get(), data, size, &have_bytes));
+      RETURN_NOT_OK(AppendObjectStrings(size, value_builder.get(), data, &have_bytes));
     } else if (PyList_Check(objects[i])) {
       int64_t size;
       std::shared_ptr<DataType> inferred_type;
       RETURN_NOT_OK(list_builder.Append(true));
-      RETURN_NOT_OK(InferArrowType(objects[i], &size, &inferred_type));
+      RETURN_NOT_OK(InferArrowTypeAndSize(objects[i], &size, &inferred_type));
       if (inferred_type->type != Type::STRING) {
         std::stringstream ss;
         ss << inferred_type->ToString() << " cannot be converted to STRING.";
@@ -832,7 +887,7 @@ inline void ConvertIntegerWithNulls(const ChunkedArray& data, double*
out_values
     // Upcast to double, set NaN as appropriate
 
     for (int i = 0; i < arr->length(); ++i) {
-      *out_values++ = prim_arr->IsNull(i) ? NAN : in_values[i];
+      *out_values++ = prim_arr->IsNull(i) ? NAN : static_cast<double>(in_values[i]);
     }
   }
 }
@@ -924,6 +979,36 @@ inline Status ConvertBinaryLike(const ChunkedArray& data, PyObject**
out_values)
   return Status::OK();
 }
 
+inline Status ConvertFixedWidthBinary(const ChunkedArray& data, PyObject** out_values)
{
+  PyAcquireGIL lock;
+  for (int c = 0; c < data.num_chunks(); c++) {
+    auto arr = static_cast<FixedWidthBinaryArray*>(data.chunk(c).get());
+
+    const uint8_t* data_ptr;
+    int32_t length =
+        std::dynamic_pointer_cast<FixedWidthBinaryType>(arr->type())->byte_width();
+    const bool has_nulls = data.null_count() > 0;
+    for (int64_t i = 0; i < arr->length(); ++i) {
+      if (has_nulls && arr->IsNull(i)) {
+        Py_INCREF(Py_None);
+        *out_values = Py_None;
+      } else {
+        data_ptr = arr->GetValue(i);
+        *out_values = WrapBytes<FixedWidthBinaryArray>::Wrap(data_ptr, length);
+        if (*out_values == nullptr) {
+          PyErr_Clear();
+          std::stringstream ss;
+          ss << "Wrapping "
+             << std::string(reinterpret_cast<const char*>(data_ptr), length)
<< " failed";
+          return Status::UnknownError(ss.str());
+        }
+      }
+      ++out_values;
+    }
+  }
+  return Status::OK();
+}
+
 template <typename ArrowType>
 inline Status ConvertListsLike(
     const std::shared_ptr<Column>& col, PyObject** out_values) {
@@ -1058,6 +1143,8 @@ class ObjectBlock : public PandasBlock {
       RETURN_NOT_OK(ConvertBinaryLike<BinaryArray>(data, out_buffer));
     } else if (type == Type::STRING) {
       RETURN_NOT_OK(ConvertBinaryLike<StringArray>(data, out_buffer));
+    } else if (type == Type::FIXED_WIDTH_BINARY) {
+      RETURN_NOT_OK(ConvertFixedWidthBinary(data, out_buffer));
     } else if (type == Type::LIST) {
       auto list_type = std::static_pointer_cast<ListType>(col->type());
       switch (list_type->value_type()->type) {
@@ -1487,6 +1574,7 @@ class DataFrameBlockCreator {
           break;
         case Type::STRING:
         case Type::BINARY:
+        case Type::FIXED_WIDTH_BINARY:
           output_type = PandasBlock::OBJECT;
           break;
         case Type::DATE64:
@@ -1751,6 +1839,7 @@ class ArrowDeserializer {
       CONVERT_CASE(DOUBLE);
       CONVERT_CASE(BINARY);
       CONVERT_CASE(STRING);
+      CONVERT_CASE(FIXED_WIDTH_BINARY);
       CONVERT_CASE(DATE64);
       CONVERT_CASE(TIMESTAMP);
       CONVERT_CASE(DICTIONARY);
@@ -1845,6 +1934,7 @@ class ArrowDeserializer {
     return ConvertBinaryLike<StringArray>(data_, out_values);
   }
 
+  // Binary strings
   template <int T2>
   inline typename std::enable_if<T2 == Type::BINARY, Status>::type ConvertValues()
{
     RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
@@ -1852,6 +1942,15 @@ class ArrowDeserializer {
     return ConvertBinaryLike<BinaryArray>(data_, out_values);
   }
 
+  // Fixed length binary strings
+  template <int TYPE>
+  inline typename std::enable_if<TYPE == Type::FIXED_WIDTH_BINARY, Status>::type
+  ConvertValues() {
+    RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
+    auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
+    return ConvertFixedWidthBinary(data_, out_values);
+  }
+
 #define CONVERTVALUES_LISTSLIKE_CASE(ArrowType, ArrowEnum) \
   case Type::ArrowEnum:                                    \
     return ConvertListsLike<ArrowType>(col_, out_values);

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/cpp/src/arrow/util/logging.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index b22f07d..697d47c 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -38,9 +38,10 @@ namespace arrow {
 #define ARROW_LOG_INTERNAL(level) ::arrow::internal::CerrLog(level)
 #define ARROW_LOG(level) ARROW_LOG_INTERNAL(ARROW_##level)
 
-#define ARROW_CHECK(condition)                               \
-  (condition) ? 0 : ::arrow::internal::FatalLog(ARROW_FATAL) \
-                        << __FILE__ << __LINE__ << " Check failed: " #condition
" "
+#define ARROW_CHECK(condition)                           \
+  (condition) ? 0                                        \
+              : ::arrow::internal::FatalLog(ARROW_FATAL) \
+                    << __FILE__ << __LINE__ << " Check failed: " #condition
" "
 
 #ifdef NDEBUG
 #define ARROW_DFATAL ARROW_WARNING

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index dce4389..66b6038 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -55,7 +55,7 @@ from pyarrow.scalar import (ArrayValue, Scalar, NA, NAType,
                             Int8Value, Int16Value, Int32Value, Int64Value,
                             UInt8Value, UInt16Value, UInt32Value, UInt64Value,
                             FloatValue, DoubleValue, ListValue,
-                            BinaryValue, StringValue)
+                            BinaryValue, StringValue, FixedWidthBinaryValue)
 
 import pyarrow.schema as _schema
 
@@ -65,7 +65,8 @@ from pyarrow.schema import (null, bool_,
                             timestamp, date32, date64,
                             float_, double, binary, string,
                             list_, struct, dictionary, field,
-                            DataType, Field, Schema, schema)
+                            DataType, FixedWidthBinaryType,
+                            Field, Schema, schema)
 
 
 from pyarrow.table import Column, RecordBatch, Table, concat_tables

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/python/pyarrow/array.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pxd b/python/pyarrow/array.pxd
index c3e7997..a7241c6 100644
--- a/python/pyarrow/array.pxd
+++ b/python/pyarrow/array.pxd
@@ -24,9 +24,11 @@ from pyarrow.schema cimport DataType
 
 from cpython cimport PyObject
 
+
 cdef extern from "Python.h":
     int PySlice_Check(object)
 
+
 cdef class Array:
     cdef:
         shared_ptr[CArray] sp_array
@@ -38,6 +40,7 @@ cdef class Array:
     cdef init(self, const shared_ptr[CArray]& sp_array)
     cdef getitem(self, int64_t i)
 
+
 cdef object box_array(const shared_ptr[CArray]& sp_array)
 
 
@@ -52,6 +55,7 @@ cdef class NumericArray(Array):
 cdef class IntegerArray(NumericArray):
     pass
 
+
 cdef class FloatingPointArray(NumericArray):
     pass
 
@@ -96,6 +100,10 @@ cdef class DoubleArray(FloatingPointArray):
     pass
 
 
+cdef class FixedWidthBinaryArray(Array):
+    pass
+
+
 cdef class ListArray(Array):
     pass
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/python/pyarrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
index 6cae196..289baf2 100644
--- a/python/pyarrow/array.pyx
+++ b/python/pyarrow/array.pyx
@@ -37,6 +37,7 @@ cimport pyarrow.scalar as scalar
 from pyarrow.scalar import NA
 
 from pyarrow.schema cimport (DataType, Field, Schema, DictionaryType,
+                             FixedWidthBinaryType,
                              box_data_type)
 import pyarrow.schema as schema
 
@@ -197,7 +198,11 @@ cdef class Array:
         if type is None:
             check_status(pyarrow.ConvertPySequence(list_obj, pool, &sp_array))
         else:
-            raise NotImplementedError()
+            check_status(
+                pyarrow.ConvertPySequence(
+                    list_obj, pool, &sp_array, type.sp_type
+                )
+            )
 
         return box_array(sp_array)
 
@@ -385,6 +390,7 @@ cdef class Date64Array(NumericArray):
 cdef class TimestampArray(NumericArray):
     pass
 
+
 cdef class Time32Array(NumericArray):
     pass
 
@@ -392,6 +398,7 @@ cdef class Time32Array(NumericArray):
 cdef class Time64Array(NumericArray):
     pass
 
+
 cdef class FloatArray(FloatingPointArray):
     pass
 
@@ -400,6 +407,10 @@ cdef class DoubleArray(FloatingPointArray):
     pass
 
 
+cdef class FixedWidthBinaryArray(Array):
+    pass
+
+
 cdef class ListArray(Array):
     pass
 
@@ -506,7 +517,8 @@ cdef dict _array_classes = {
     Type_LIST: ListArray,
     Type_BINARY: BinaryArray,
     Type_STRING: StringArray,
-    Type_DICTIONARY: DictionaryArray
+    Type_DICTIONARY: DictionaryArray,
+    Type_FIXED_WIDTH_BINARY: FixedWidthBinaryArray,
 }
 
 cdef object box_array(const shared_ptr[CArray]& sp_array):

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 8e428b4..b44ade5 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -45,6 +45,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         Type_TIME64" arrow::Type::TIME64"
         Type_BINARY" arrow::Type::BINARY"
         Type_STRING" arrow::Type::STRING"
+        Type_FIXED_WIDTH_BINARY" arrow::Type::FIXED_WIDTH_BINARY"
 
         Type_LIST" arrow::Type::LIST"
         Type_STRUCT" arrow::Type::STRUCT"
@@ -139,6 +140,10 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     cdef cppclass CStringType" arrow::StringType"(CDataType):
         pass
 
+    cdef cppclass CFixedWidthBinaryType" arrow::FixedWidthBinaryType"(CFixedWidthType):
+        CFixedWidthBinaryType(int byte_width)
+        int byte_width()
+
     cdef cppclass CField" arrow::Field":
         c_string name
         shared_ptr[CDataType] type
@@ -203,6 +208,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     cdef cppclass CDoubleArray" arrow::DoubleArray"(CArray):
         double Value(int i)
 
+    cdef cppclass CFixedWidthBinaryArray" arrow::FixedWidthBinaryArray"(CArray):
+        const uint8_t* GetValue(int i)
+
     cdef cppclass CListArray" arrow::ListArray"(CArray):
         const int32_t* raw_value_offsets()
         int32_t value_offset(int i)

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/python/pyarrow/includes/pyarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd
index c3fdf4b..8142c1c 100644
--- a/python/pyarrow/includes/pyarrow.pxd
+++ b/python/pyarrow/includes/pyarrow.pxd
@@ -30,6 +30,9 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
     shared_ptr[CDataType] GetTimestampType(TimeUnit unit)
     CStatus ConvertPySequence(object obj, CMemoryPool* pool,
                               shared_ptr[CArray]* out)
+    CStatus ConvertPySequence(object obj, CMemoryPool* pool,
+                              shared_ptr[CArray]* out,
+                              const shared_ptr[CDataType]& type)
 
     CStatus PandasDtypeToArrow(object dtype, shared_ptr[CDataType]* type)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/python/pyarrow/scalar.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pxd b/python/pyarrow/scalar.pxd
index 551aeb9..e9cc3cb 100644
--- a/python/pyarrow/scalar.pxd
+++ b/python/pyarrow/scalar.pxd
@@ -61,6 +61,11 @@ cdef class ListValue(ArrayValue):
 cdef class StringValue(ArrayValue):
     pass
 
+
+cdef class FixedWidthBinaryValue(ArrayValue):
+    pass
+
+
 cdef object box_scalar(DataType type,
                        const shared_ptr[CArray]& sp_array,
                        int64_t index)

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
index 1b7e67b..f4a1c9e 100644
--- a/python/pyarrow/scalar.pyx
+++ b/python/pyarrow/scalar.pyx
@@ -224,6 +224,22 @@ cdef class ListValue(ArrayValue):
         return result
 
 
+cdef class FixedWidthBinaryValue(ArrayValue):
+
+    def as_py(self):
+        cdef:
+            CFixedWidthBinaryArray* ap
+            CFixedWidthBinaryType* ap_type
+            int32_t length
+            const char* data
+        ap = <CFixedWidthBinaryArray*> self.sp_array.get()
+        ap_type = <CFixedWidthBinaryType*> ap.type().get()
+        length = ap_type.byte_width()
+        data = <const char*> ap.GetValue(self.index)
+        return cp.PyBytes_FromStringAndSize(data, length)
+
+
+
 cdef dict _scalar_classes = {
     Type_BOOL: BooleanValue,
     Type_UINT8: Int8Value,
@@ -241,7 +257,8 @@ cdef dict _scalar_classes = {
     Type_DOUBLE: DoubleValue,
     Type_LIST: ListValue,
     Type_BINARY: BinaryValue,
-    Type_STRING: StringValue
+    Type_STRING: StringValue,
+    Type_FIXED_WIDTH_BINARY: FixedWidthBinaryValue,
 }
 
 cdef object box_scalar(DataType type, const shared_ptr[CArray]& sp_array,

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/python/pyarrow/schema.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pxd b/python/pyarrow/schema.pxd
index 15ee5f1..c0c2c70 100644
--- a/python/pyarrow/schema.pxd
+++ b/python/pyarrow/schema.pxd
@@ -19,6 +19,7 @@ from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport (CDataType,
                                         CDictionaryType,
                                         CTimestampType,
+                                        CFixedWidthBinaryType,
                                         CField, CSchema)
 
 cdef class DataType:
@@ -39,6 +40,11 @@ cdef class TimestampType(DataType):
         const CTimestampType* ts_type
 
 
+cdef class FixedWidthBinaryType(DataType):
+    cdef:
+        const CFixedWidthBinaryType* fixed_width_binary_type
+
+
 cdef class Field:
     cdef:
         shared_ptr[CField] sp_field

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/python/pyarrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
index 4f02901..532a318 100644
--- a/python/pyarrow/schema.pyx
+++ b/python/pyarrow/schema.pyx
@@ -28,6 +28,7 @@ from pyarrow.compat import frombytes, tobytes
 from pyarrow.array cimport Array
 from pyarrow.error cimport check_status
 from pyarrow.includes.libarrow cimport (CDataType, CStructType, CListType,
+                                        CFixedWidthBinaryType,
                                         TimeUnit_SECOND, TimeUnit_MILLI,
                                         TimeUnit_MICRO, TimeUnit_NANO,
                                         Type, TimeUnit)
@@ -52,7 +53,7 @@ cdef class DataType:
         return frombytes(self.type.ToString())
 
     def __repr__(self):
-        return 'DataType({0})'.format(str(self))
+        return '{0.__class__.__name__}({0})'.format(self)
 
     def __richcmp__(DataType self, DataType other, int op):
         if op == cpython.Py_EQ:
@@ -69,9 +70,6 @@ cdef class DictionaryType(DataType):
         DataType.init(self, type)
         self.dict_type = <const CDictionaryType*> type.get()
 
-    def __repr__(self):
-        return 'DictionaryType({0})'.format(str(self))
-
 
 cdef class TimestampType(DataType):
 
@@ -92,8 +90,17 @@ cdef class TimestampType(DataType):
             else:
                 return None
 
-    def __repr__(self):
-        return 'TimestampType({0})'.format(str(self))
+
+cdef class FixedWidthBinaryType(DataType):
+
+    cdef init(self, const shared_ptr[CDataType]& type):
+        DataType.init(self, type)
+        self.fixed_width_binary_type = <const CFixedWidthBinaryType*> type.get()
+
+    property byte_width:
+
+        def __get__(self):
+            return self.fixed_width_binary_type.byte_width()
 
 
 cdef class Field:
@@ -348,11 +355,24 @@ def string():
     return primitive_type(la.Type_STRING)
 
 
-def binary():
-    """
-    Binary (PyBytes-like) type
+def binary(int length=-1):
+    """Binary (PyBytes-like) type
+
+    Parameters
+    ----------
+    length : int, optional, default -1
+        If length == -1 then return a variable length binary type. If length is
+        greater than or equal to 0 then return a fixed width binary type of
+        width `length`.
     """
-    return primitive_type(la.Type_BINARY)
+    if length == -1:
+        return primitive_type(la.Type_BINARY)
+
+    cdef FixedWidthBinaryType out = FixedWidthBinaryType()
+    cdef shared_ptr[CDataType] fixed_width_binary_type
+    fixed_width_binary_type.reset(new CFixedWidthBinaryType(length))
+    out.init(fixed_width_binary_type)
+    return out
 
 
 def list_(DataType value_type):
@@ -408,6 +428,8 @@ cdef DataType box_data_type(const shared_ptr[CDataType]& type):
         out = DictionaryType()
     elif type.get().type == la.Type_TIMESTAMP:
         out = TimestampType()
+    elif type.get().type == la.Type_FIXED_WIDTH_BINARY:
+        out = FixedWidthBinaryType()
     else:
         out = DataType()
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/python/pyarrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 7915f97..9925125 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -92,6 +92,19 @@ class TestConvertList(unittest.TestCase):
         assert arr.type == pyarrow.binary()
         assert arr.to_pylist() == [b'foo', u1, None]
 
+    def test_fixed_width_bytes(self):
+        data = [b'foof', None, b'barb', b'2346']
+        arr = pyarrow.from_pylist(data, type=pyarrow.binary(4))
+        assert len(arr) == 4
+        assert arr.null_count == 1
+        assert arr.type == pyarrow.binary(4)
+        assert arr.to_pylist() == data
+
+    def test_fixed_width_bytes_does_not_accept_varying_lengths(self):
+        data = [b'foo', None, b'barb', b'2346']
+        with self.assertRaises(pyarrow.error.ArrowException):
+            pyarrow.from_pylist(data, type=pyarrow.binary(4))
+
     def test_date(self):
         data = [datetime.date(2000, 1, 1), None, datetime.date(1970, 1, 1),
                 datetime.date(2040, 2, 26)]

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index ea7a892..f7cb47f 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -244,6 +244,23 @@ class TestPandasConversion(unittest.TestCase):
         expected = pd.DataFrame({'strings': values2})
         self._check_pandas_roundtrip(df, expected)
 
+    def test_fixed_width_bytes(self):
+        values = [b'foo', None, b'bar', None, None, b'hey']
+        df = pd.DataFrame({'strings': values})
+        schema = A.Schema.from_fields([A.field('strings', A.binary(3))])
+        table = A.Table.from_pandas(df, schema=schema)
+        assert table.schema[0].type == schema[0].type
+        assert table.schema[0].name == schema[0].name
+        result = table.to_pandas()
+        tm.assert_frame_equal(result, df)
+
+    def test_fixed_width_bytes_does_not_accept_varying_lengths(self):
+        values = [b'foo', None, b'ba', None, None, b'hey']
+        df = pd.DataFrame({'strings': values})
+        schema = A.Schema.from_fields([A.field('strings', A.binary(3))])
+        with self.assertRaises(A.error.ArrowException):
+            A.Table.from_pandas(df, schema=schema)
+
     def test_timestamps_notimezone_no_nulls(self):
         df = pd.DataFrame({
             'datetime64': np.array([

http://git-wip-us.apache.org/repos/asf/arrow/blob/4915ecf1/python/pyarrow/tests/test_scalars.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index d56481c..265ce8d 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -87,6 +87,20 @@ class TestScalars(unittest.TestCase):
         assert v == b'bar'
         assert isinstance(v, bytes)
 
+    def test_fixed_width_bytes(self):
+        data = [b'foof', None, b'barb']
+        arr = A.from_pylist(data, type=A.binary(4))
+
+        v = arr[0]
+        assert isinstance(v, A.FixedWidthBinaryValue)
+        assert v.as_py() == b'foof'
+
+        assert arr[1] is A.NA
+
+        v = arr[2].as_py()
+        assert v == b'barb'
+        assert isinstance(v, bytes)
+
     def test_list(self):
         arr = A.from_pylist([['foo', None], None, ['bar'], []])
 


Mime
View raw message