arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [1/2] arrow git commit: ARROW-54: [Python] Rename package to "pyarrow"
Date Wed, 09 Mar 2016 23:45:11 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 83675273b -> 6fdcd4943


http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
new file mode 100644
index 0000000..baba112
--- /dev/null
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.common cimport *
+
+cdef extern from "arrow/api.h" namespace "arrow" nogil:
+
+    enum LogicalType" arrow::LogicalType::type":
+        LogicalType_NA" arrow::LogicalType::NA"
+
+        LogicalType_BOOL" arrow::LogicalType::BOOL"
+
+        LogicalType_UINT8" arrow::LogicalType::UINT8"
+        LogicalType_INT8" arrow::LogicalType::INT8"
+        LogicalType_UINT16" arrow::LogicalType::UINT16"
+        LogicalType_INT16" arrow::LogicalType::INT16"
+        LogicalType_UINT32" arrow::LogicalType::UINT32"
+        LogicalType_INT32" arrow::LogicalType::INT32"
+        LogicalType_UINT64" arrow::LogicalType::UINT64"
+        LogicalType_INT64" arrow::LogicalType::INT64"
+
+        LogicalType_FLOAT" arrow::LogicalType::FLOAT"
+        LogicalType_DOUBLE" arrow::LogicalType::DOUBLE"
+
+        LogicalType_STRING" arrow::LogicalType::STRING"
+
+        LogicalType_LIST" arrow::LogicalType::LIST"
+        LogicalType_STRUCT" arrow::LogicalType::STRUCT"
+
+    cdef cppclass CDataType" arrow::DataType":
+        LogicalType type
+        c_bool nullable
+
+        c_bool Equals(const CDataType* other)
+
+        c_string ToString()
+
+    cdef cppclass MemoryPool" arrow::MemoryPool":
+        int64_t bytes_allocated()
+
+    cdef cppclass CListType" arrow::ListType"(CDataType):
+        CListType(const shared_ptr[CDataType]& value_type,
+                  c_bool nullable)
+
+    cdef cppclass CStringType" arrow::StringType"(CDataType):
+        pass
+
+    cdef cppclass CField" arrow::Field":
+        c_string name
+        shared_ptr[CDataType] type
+
+        CField(const c_string& name, const shared_ptr[CDataType]& type)
+
+    cdef cppclass CStructType" arrow::StructType"(CDataType):
+        CStructType(const vector[shared_ptr[CField]]& fields,
+                    c_bool nullable)
+
+    cdef cppclass CSchema" arrow::Schema":
+        CSchema(const shared_ptr[CField]& fields)
+
+    cdef cppclass CArray" arrow::Array":
+        const shared_ptr[CDataType]& type()
+
+        int32_t length()
+        int32_t null_count()
+        LogicalType logical_type()
+
+        c_bool IsNull(int i)
+
+    cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray):
+        uint8_t Value(int i)
+
+    cdef cppclass CInt8Array" arrow::Int8Array"(CArray):
+        int8_t Value(int i)
+
+    cdef cppclass CUInt16Array" arrow::UInt16Array"(CArray):
+        uint16_t Value(int i)
+
+    cdef cppclass CInt16Array" arrow::Int16Array"(CArray):
+        int16_t Value(int i)
+
+    cdef cppclass CUInt32Array" arrow::UInt32Array"(CArray):
+        uint32_t Value(int i)
+
+    cdef cppclass CInt32Array" arrow::Int32Array"(CArray):
+        int32_t Value(int i)
+
+    cdef cppclass CUInt64Array" arrow::UInt64Array"(CArray):
+        uint64_t Value(int i)
+
+    cdef cppclass CInt64Array" arrow::Int64Array"(CArray):
+        int64_t Value(int i)
+
+    cdef cppclass CFloatArray" arrow::FloatArray"(CArray):
+        float Value(int i)
+
+    cdef cppclass CDoubleArray" arrow::DoubleArray"(CArray):
+        double Value(int i)
+
+    cdef cppclass CListArray" arrow::ListArray"(CArray):
+        const int32_t* offsets()
+        int32_t offset(int i)
+        int32_t value_length(int i)
+        const shared_ptr[CArray]& values()
+        const shared_ptr[CDataType]& value_type()
+
+    cdef cppclass CStringArray" arrow::StringArray"(CListArray):
+        c_string GetString(int i)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/includes/parquet.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/parquet.pxd b/python/pyarrow/includes/parquet.pxd
new file mode 100644
index 0000000..99a2d42
--- /dev/null
+++ b/python/pyarrow/includes/parquet.pxd
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.common cimport *
+
+cdef extern from "parquet/api/reader.h" namespace "parquet_cpp" nogil:
+    cdef cppclass ColumnReader:
+        pass
+
+    cdef cppclass BoolReader(ColumnReader):
+        pass
+
+    cdef cppclass Int32Reader(ColumnReader):
+        pass
+
+    cdef cppclass Int64Reader(ColumnReader):
+        pass
+
+    cdef cppclass Int96Reader(ColumnReader):
+        pass
+
+    cdef cppclass FloatReader(ColumnReader):
+        pass
+
+    cdef cppclass DoubleReader(ColumnReader):
+        pass
+
+    cdef cppclass ByteArrayReader(ColumnReader):
+        pass
+
+    cdef cppclass RowGroupReader:
+        pass
+
+    cdef cppclass ParquetFileReader:
+        pass

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/includes/pyarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd
new file mode 100644
index 0000000..9a0c004
--- /dev/null
+++ b/python/pyarrow/includes/pyarrow.pxd
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport (CArray, CDataType, LogicalType,
+                                        MemoryPool)
+
+cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil:
+    # We can later add more of the common status factory methods as needed
+    cdef Status Status_OK "Status::OK"()
+
+    cdef cppclass Status:
+        Status()
+
+        c_string ToString()
+
+        c_bool ok()
+        c_bool IsOutOfMemory()
+        c_bool IsKeyError()
+        c_bool IsTypeError()
+        c_bool IsIOError()
+        c_bool IsValueError()
+        c_bool IsNotImplemented()
+        c_bool IsArrowError()
+
+    shared_ptr[CDataType] GetPrimitiveType(LogicalType type, c_bool nullable)
+    Status ConvertPySequence(object obj, shared_ptr[CArray]* out)
+
+    MemoryPool* GetMemoryPool()

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/parquet.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/parquet.pyx b/python/pyarrow/parquet.pyx
new file mode 100644
index 0000000..622e7d0
--- /dev/null
+++ b/python/pyarrow/parquet.pyx
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from pyarrow.compat import frombytes, tobytes
+from pyarrow.includes.parquet cimport *

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/scalar.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pxd b/python/pyarrow/scalar.pxd
new file mode 100644
index 0000000..b068457
--- /dev/null
+++ b/python/pyarrow/scalar.pxd
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.includes.common cimport *
+from pyarrow.includes.libarrow cimport *
+
+from pyarrow.schema cimport DataType
+
+cdef class Scalar:
+    cdef readonly:
+        DataType type
+
+
+cdef class NAType(Scalar):
+    pass
+
+
+cdef class ArrayValue(Scalar):
+    cdef:
+        shared_ptr[CArray] sp_array
+        int index
+
+    cdef void init(self, DataType type,
+                   const shared_ptr[CArray]& sp_array, int index)
+
+    cdef void _set_array(self, const shared_ptr[CArray]& sp_array)
+
+
+cdef class Int8Value(ArrayValue):
+    pass
+
+
+cdef class Int64Value(ArrayValue):
+    pass
+
+
+cdef class ListValue(ArrayValue):
+    cdef readonly:
+        DataType value_type
+
+    cdef:
+        CListArray* ap
+
+    cdef getitem(self, int i)
+
+
+cdef class StringValue(ArrayValue):
+    pass
+
+cdef object box_arrow_scalar(DataType type,
+                             const shared_ptr[CArray]& sp_array,
+                             int index)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
new file mode 100644
index 0000000..261a389
--- /dev/null
+++ b/python/pyarrow/scalar.pyx
@@ -0,0 +1,198 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.schema cimport DataType, box_data_type
+
+from pyarrow.compat import frombytes
+import pyarrow.schema as schema
+
+NA = None
+
+cdef class NAType(Scalar):
+
+    def __cinit__(self):
+        global NA
+        if NA is not None:
+            raise Exception('Cannot create multiple NAType instances')
+
+        self.type = schema.null()
+
+    def __repr__(self):
+        return 'NA'
+
+    def as_py(self):
+        return None
+
+NA = NAType()
+
+cdef class ArrayValue(Scalar):
+
+    cdef void init(self, DataType type, const shared_ptr[CArray]& sp_array,
+                   int index):
+        self.type = type
+        self.index = index
+        self._set_array(sp_array)
+
+    cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
+        self.sp_array = sp_array
+
+    def __repr__(self):
+        if hasattr(self, 'as_py'):
+            return repr(self.as_py())
+        else:
+            return Scalar.__repr__(self)
+
+
+cdef class BooleanValue(ArrayValue):
+    pass
+
+
+cdef class Int8Value(ArrayValue):
+
+    def as_py(self):
+        cdef CInt8Array* ap = <CInt8Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class UInt8Value(ArrayValue):
+
+    def as_py(self):
+        cdef CUInt8Array* ap = <CUInt8Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class Int16Value(ArrayValue):
+
+    def as_py(self):
+        cdef CInt16Array* ap = <CInt16Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class UInt16Value(ArrayValue):
+
+    def as_py(self):
+        cdef CUInt16Array* ap = <CUInt16Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class Int32Value(ArrayValue):
+
+    def as_py(self):
+        cdef CInt32Array* ap = <CInt32Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class UInt32Value(ArrayValue):
+
+    def as_py(self):
+        cdef CUInt32Array* ap = <CUInt32Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class Int64Value(ArrayValue):
+
+    def as_py(self):
+        cdef CInt64Array* ap = <CInt64Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class UInt64Value(ArrayValue):
+
+    def as_py(self):
+        cdef CUInt64Array* ap = <CUInt64Array*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class FloatValue(ArrayValue):
+
+    def as_py(self):
+        cdef CFloatArray* ap = <CFloatArray*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class DoubleValue(ArrayValue):
+
+    def as_py(self):
+        cdef CDoubleArray* ap = <CDoubleArray*> self.sp_array.get()
+        return ap.Value(self.index)
+
+
+cdef class StringValue(ArrayValue):
+
+    def as_py(self):
+        cdef CStringArray* ap = <CStringArray*> self.sp_array.get()
+        return frombytes(ap.GetString(self.index))
+
+
+cdef class ListValue(ArrayValue):
+
+    def __len__(self):
+        return self.ap.value_length(self.index)
+
+    def __getitem__(self, i):
+        return self.getitem(i)
+
+    def __iter__(self):
+        for i in range(len(self)):
+            yield self.getitem(i)
+        raise StopIteration
+
+    cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
+        self.sp_array = sp_array
+        self.ap = <CListArray*> sp_array.get()
+        self.value_type = box_data_type(self.ap.value_type())
+
+    cdef getitem(self, int i):
+        cdef int j = self.ap.offset(self.index) + i
+        return box_arrow_scalar(self.value_type, self.ap.values(), j)
+
+    def as_py(self):
+        cdef:
+            int j
+            list result = []
+
+        for j in range(len(self)):
+            result.append(self.getitem(j).as_py())
+
+        return result
+
+
+cdef dict _scalar_classes = {
+    LogicalType_UINT8: Int8Value,
+    LogicalType_UINT16: Int16Value,
+    LogicalType_UINT32: Int32Value,
+    LogicalType_UINT64: Int64Value,
+    LogicalType_INT8: Int8Value,
+    LogicalType_INT16: Int16Value,
+    LogicalType_INT32: Int32Value,
+    LogicalType_INT64: Int64Value,
+    LogicalType_FLOAT: FloatValue,
+    LogicalType_DOUBLE: DoubleValue,
+    LogicalType_LIST: ListValue,
+    LogicalType_STRING: StringValue
+}
+
+cdef object box_arrow_scalar(DataType type,
+                             const shared_ptr[CArray]& sp_array,
+                             int index):
+    cdef ArrayValue val
+    if sp_array.get().IsNull(index):
+        return NA
+    else:
+        val = _scalar_classes[type.type.type]()
+        val.init(type, sp_array, index)
+        return val

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/schema.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pxd b/python/pyarrow/schema.pxd
new file mode 100644
index 0000000..07b9bd0
--- /dev/null
+++ b/python/pyarrow/schema.pxd
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.includes.common cimport shared_ptr
+from pyarrow.includes.libarrow cimport CDataType, CField, CSchema
+
+cdef class DataType:
+    cdef:
+        shared_ptr[CDataType] sp_type
+        CDataType* type
+
+    cdef init(self, const shared_ptr[CDataType]& type)
+
+cdef class Field:
+    cdef:
+        shared_ptr[CField] sp_field
+        CField* field
+
+    cdef readonly:
+        DataType type
+
+cdef class Schema:
+    cdef:
+        shared_ptr[CSchema] sp_schema
+        CSchema* schema
+
+cdef DataType box_data_type(const shared_ptr[CDataType]& type)

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
new file mode 100644
index 0000000..ea87872
--- /dev/null
+++ b/python/pyarrow/schema.pyx
@@ -0,0 +1,164 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+########################################
+# Data types, fields, schemas, and so forth
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from pyarrow.compat import frombytes, tobytes
+from pyarrow.includes.libarrow cimport *
+cimport pyarrow.includes.pyarrow as pyarrow
+
+cimport cpython
+
+cdef class DataType:
+
+    def __cinit__(self):
+        pass
+
+    cdef init(self, const shared_ptr[CDataType]& type):
+        self.sp_type = type
+        self.type = type.get()
+
+    def __str__(self):
+        return frombytes(self.type.ToString())
+
+    def __repr__(self):
+        return 'DataType({0})'.format(str(self))
+
+    def __richcmp__(DataType self, DataType other, int op):
+        if op == cpython.Py_EQ:
+            return self.type.Equals(other.type)
+        elif op == cpython.Py_NE:
+            return not self.type.Equals(other.type)
+        else:
+            raise TypeError('Invalid comparison')
+
+
+cdef class Field:
+
+    def __cinit__(self, object name, DataType type):
+        self.type = type
+        self.sp_field.reset(new CField(tobytes(name), type.sp_type))
+        self.field = self.sp_field.get()
+
+    def __repr__(self):
+        return 'Field({0!r}, type={1})'.format(self.name, str(self.type))
+
+    property name:
+
+        def __get__(self):
+            return frombytes(self.field.name)
+
+cdef dict _type_cache = {}
+
+cdef DataType primitive_type(LogicalType type, bint nullable=True):
+    if (type, nullable) in _type_cache:
+        return _type_cache[type, nullable]
+
+    cdef DataType out = DataType()
+    out.init(pyarrow.GetPrimitiveType(type, nullable))
+
+    _type_cache[type, nullable] = out
+    return out
+
+#------------------------------------------------------------
+# Type factory functions
+
+def field(name, type):
+    return Field(name, type)
+
+cdef set PRIMITIVE_TYPES = set([
+    LogicalType_NA, LogicalType_BOOL,
+    LogicalType_UINT8, LogicalType_INT8,
+    LogicalType_UINT16, LogicalType_INT16,
+    LogicalType_UINT32, LogicalType_INT32,
+    LogicalType_UINT64, LogicalType_INT64,
+    LogicalType_FLOAT, LogicalType_DOUBLE])
+
+def null():
+    return primitive_type(LogicalType_NA)
+
+def bool_(c_bool nullable=True):
+    return primitive_type(LogicalType_BOOL, nullable)
+
+def uint8(c_bool nullable=True):
+    return primitive_type(LogicalType_UINT8, nullable)
+
+def int8(c_bool nullable=True):
+    return primitive_type(LogicalType_INT8, nullable)
+
+def uint16(c_bool nullable=True):
+    return primitive_type(LogicalType_UINT16, nullable)
+
+def int16(c_bool nullable=True):
+    return primitive_type(LogicalType_INT16, nullable)
+
+def uint32(c_bool nullable=True):
+    return primitive_type(LogicalType_UINT32, nullable)
+
+def int32(c_bool nullable=True):
+    return primitive_type(LogicalType_INT32, nullable)
+
+def uint64(c_bool nullable=True):
+    return primitive_type(LogicalType_UINT64, nullable)
+
+def int64(c_bool nullable=True):
+    return primitive_type(LogicalType_INT64, nullable)
+
+def float_(c_bool nullable=True):
+    return primitive_type(LogicalType_FLOAT, nullable)
+
+def double(c_bool nullable=True):
+    return primitive_type(LogicalType_DOUBLE, nullable)
+
+def string(c_bool nullable=True):
+    """
+    UTF8 string
+    """
+    return primitive_type(LogicalType_STRING, nullable)
+
+def list_(DataType value_type, c_bool nullable=True):
+    cdef DataType out = DataType()
+    out.init(shared_ptr[CDataType](
+        new CListType(value_type.sp_type, nullable)))
+    return out
+
+def struct(fields, c_bool nullable=True):
+    """
+
+    """
+    cdef:
+        DataType out = DataType()
+        Field field
+        vector[shared_ptr[CField]] c_fields
+
+    for field in fields:
+        c_fields.push_back(field.sp_field)
+
+    out.init(shared_ptr[CDataType](
+        new CStructType(c_fields, nullable)))
+    return out
+
+
+cdef DataType box_data_type(const shared_ptr[CDataType]& type):
+    cdef DataType out = DataType()
+    out.init(type)
+    return out

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/__init__.py b/python/pyarrow/tests/__init__.py
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/test_array.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
new file mode 100644
index 0000000..034c157
--- /dev/null
+++ b/python/pyarrow/tests/test_array.py
@@ -0,0 +1,63 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest
+import pyarrow
+import pyarrow.formatting as fmt
+
+
+class TestArrayAPI(unittest.TestCase):
+
+    def test_getitem_NA(self):
+        arr = pyarrow.from_pylist([1, None, 2])
+        assert arr[1] is pyarrow.NA
+
+    def test_list_format(self):
+        arr = pyarrow.from_pylist([[1], None, [2, 3]])
+        result = fmt.array_format(arr)
+        expected = """\
+[
+  [1],
+  NA,
+  [2,
+   3]
+]"""
+        assert result == expected
+
+    def test_string_format(self):
+        arr = pyarrow.from_pylist(['foo', None, 'bar'])
+        result = fmt.array_format(arr)
+        expected = """\
+[
+  'foo',
+  NA,
+  'bar'
+]"""
+        assert result == expected
+
+    def test_long_array_format(self):
+        arr = pyarrow.from_pylist(range(100))
+        result = fmt.array_format(arr, window=2)
+        expected = """\
+[
+  0,
+  1,
+  ...
+  98,
+  99
+]"""
+        assert result == expected

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
new file mode 100644
index 0000000..25f6969
--- /dev/null
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest
+import pyarrow
+
+
+class TestConvertList(unittest.TestCase):
+
+    def test_boolean(self):
+        pass
+
+    def test_empty_list(self):
+        arr = pyarrow.from_pylist([])
+        assert len(arr) == 0
+        assert arr.null_count == 0
+        assert arr.type == pyarrow.null()
+
+    def test_all_none(self):
+        arr = pyarrow.from_pylist([None, None])
+        assert len(arr) == 2
+        assert arr.null_count == 2
+        assert arr.type == pyarrow.null()
+
+    def test_integer(self):
+        arr = pyarrow.from_pylist([1, None, 3, None])
+        assert len(arr) == 4
+        assert arr.null_count == 2
+        assert arr.type == pyarrow.int64()
+
+    def test_garbage_collection(self):
+        import gc
+        bytes_before = pyarrow.total_allocated_bytes()
+        pyarrow.from_pylist([1, None, 3, None])
+        gc.collect()
+        assert pyarrow.total_allocated_bytes() == bytes_before
+
+    def test_double(self):
+        data = [1.5, 1, None, 2.5, None, None]
+        arr = pyarrow.from_pylist(data)
+        assert len(arr) == 6
+        assert arr.null_count == 3
+        assert arr.type == pyarrow.double()
+
+    def test_string(self):
+        data = ['foo', b'bar', None, 'arrow']
+        arr = pyarrow.from_pylist(data)
+        assert len(arr) == 4
+        assert arr.null_count == 1
+        assert arr.type == pyarrow.string()
+
+    def test_mixed_nesting_levels(self):
+        pyarrow.from_pylist([1, 2, None])
+        pyarrow.from_pylist([[1], [2], None])
+        pyarrow.from_pylist([[1], [2], [None]])
+
+        with self.assertRaises(pyarrow.ArrowException):
+            pyarrow.from_pylist([1, 2, [1]])
+
+        with self.assertRaises(pyarrow.ArrowException):
+            pyarrow.from_pylist([1, 2, []])
+
+        with self.assertRaises(pyarrow.ArrowException):
+            pyarrow.from_pylist([[1], [2], [None, [1]]])
+
+    def test_list_of_int(self):
+        data = [[1, 2, 3], [], None, [1, 2]]
+        arr = pyarrow.from_pylist(data)
+        assert len(arr) == 4
+        assert arr.null_count == 1
+        assert arr.type == pyarrow.list_(pyarrow.int64())

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/test_scalars.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
new file mode 100644
index 0000000..021737d
--- /dev/null
+++ b/python/pyarrow/tests/test_scalars.py
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest, u
+import pyarrow as arrow
+
+
+class TestScalars(unittest.TestCase):
+
+    def test_null_singleton(self):
+        with self.assertRaises(Exception):
+            arrow.NAType()
+
+    def test_bool(self):
+        pass
+
+    def test_int64(self):
+        arr = arrow.from_pylist([1, 2, None])
+
+        v = arr[0]
+        assert isinstance(v, arrow.Int64Value)
+        assert repr(v) == "1"
+        assert v.as_py() == 1
+
+        assert arr[2] is arrow.NA
+
+    def test_double(self):
+        arr = arrow.from_pylist([1.5, None, 3])
+
+        v = arr[0]
+        assert isinstance(v, arrow.DoubleValue)
+        assert repr(v) == "1.5"
+        assert v.as_py() == 1.5
+
+        assert arr[1] is arrow.NA
+
+        v = arr[2]
+        assert v.as_py() == 3.0
+
+    def test_string(self):
+        arr = arrow.from_pylist(['foo', None, u('bar')])
+
+        v = arr[0]
+        assert isinstance(v, arrow.StringValue)
+        assert repr(v) == "'foo'"
+        assert v.as_py() == 'foo'
+
+        assert arr[1] is arrow.NA
+
+        v = arr[2].as_py()
+        assert v == 'bar'
+        assert isinstance(v, str)
+
+    def test_list(self):
+        arr = arrow.from_pylist([['foo', None], None, ['bar'], []])
+
+        v = arr[0]
+        assert len(v) == 2
+        assert isinstance(v, arrow.ListValue)
+        assert repr(v) == "['foo', None]"
+        assert v.as_py() == ['foo', None]
+        assert v[0].as_py() == 'foo'
+        assert v[1] is arrow.NA
+
+        assert arr[1] is arrow.NA
+
+        v = arr[3]
+        assert len(v) == 0

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/pyarrow/tests/test_schema.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
new file mode 100644
index 0000000..0235526
--- /dev/null
+++ b/python/pyarrow/tests/test_schema.py
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyarrow.compat import unittest
+import pyarrow as arrow
+
+
+class TestTypes(unittest.TestCase):
+
+    def test_integers(self):
+        dtypes = ['int8', 'int16', 'int32', 'int64',
+                  'uint8', 'uint16', 'uint32', 'uint64']
+
+        for name in dtypes:
+            factory = getattr(arrow, name)
+            t = factory()
+            t_required = factory(False)
+
+            assert str(t) == name
+            assert str(t_required) == '{0} not null'.format(name)
+
+    def test_list(self):
+        value_type = arrow.int32()
+        list_type = arrow.list_(value_type)
+        assert str(list_type) == 'list<int32>'
+
+    def test_string(self):
+        t = arrow.string()
+        assert str(t) == 'string'
+
+    def test_field(self):
+        t = arrow.string()
+        f = arrow.field('foo', t)
+
+        assert f.name == 'foo'
+        assert f.type is t
+        assert repr(f) == "Field('foo', type=string)"

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/requirements.txt
----------------------------------------------------------------------
diff --git a/python/requirements.txt b/python/requirements.txt
index a82cb20..f42c90c 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,4 +1,3 @@
 pytest
 numpy>=1.7.0
-pandas>=0.12.0
 six

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/setup.py
----------------------------------------------------------------------
diff --git a/python/setup.py b/python/setup.py
index eb3ff2a..5cc871a 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -27,7 +27,7 @@ import Cython
 import sys
 
 import pkg_resources
-from setuptools import setup
+from setuptools import setup, Extension
 
 import os
 
@@ -40,10 +40,12 @@ from distutils import sysconfig
 is_64_bit = sys.maxsize > 2**32
 
 # Check if this is a debug build of Python.
-if hasattr(sys, 'gettotalrefcount'):
-    build_type = 'Debug'
-else:
-    build_type = 'Release'
+# if hasattr(sys, 'gettotalrefcount'):
+#     build_type = 'Debug'
+# else:
+#     build_type = 'Release'
+
+build_type = 'Debug'
 
 if Cython.__version__ < '0.19.1':
     raise Exception('Please upgrade to Cython 0.19.1 or newer')
@@ -51,7 +53,7 @@ if Cython.__version__ < '0.19.1':
 MAJOR = 0
 MINOR = 1
 MICRO = 0
-VERSION = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
+VERSION = '%d.%d.%ddev' % (MAJOR, MINOR, MICRO)
 
 
 class clean(_clean):
@@ -70,6 +72,9 @@ class build_ext(_build_ext):
     def build_extensions(self):
         numpy_incl = pkg_resources.resource_filename('numpy', 'core/include')
 
+        self.extensions = [ext for ext in self.extensions
+                           if ext.name != '__dummy__']
+
         for ext in self.extensions:
             if (hasattr(ext, 'include_dirs') and
                     numpy_incl not in ext.include_dirs):
@@ -98,6 +103,7 @@ class build_ext(_build_ext):
 
         # The staging directory for the module being built
         build_temp = pjoin(os.getcwd(), self.build_temp)
+        build_lib = os.path.join(os.getcwd(), self.build_lib)
 
         # Change to the build directory
         saved_cwd = os.getcwd()
@@ -124,7 +130,7 @@ class build_ext(_build_ext):
                              static_lib_option, source]
 
             self.spawn(cmake_command)
-            args = ['make']
+            args = ['make', 'VERBOSE=1']
             if 'PYARROW_PARALLEL' in os.environ:
                 args.append('-j{0}'.format(os.environ['PYARROW_PARALLEL']))
             self.spawn(args)
@@ -150,21 +156,19 @@ class build_ext(_build_ext):
         if self.inplace:
             # a bit hacky
             build_lib = saved_cwd
-        else:
-            build_lib = pjoin(os.getcwd(), self.build_lib)
 
         # Move the built libpyarrow library to the place expected by the Python
         # build
         if sys.platform != 'win32':
             name, = glob.glob('libpyarrow.*')
             try:
-                os.makedirs(pjoin(build_lib, 'arrow'))
+                os.makedirs(pjoin(build_lib, 'pyarrow'))
             except OSError:
                 pass
-            shutil.move(name, pjoin(build_lib, 'arrow', name))
+            shutil.move(name, pjoin(build_lib, 'pyarrow', name))
         else:
             shutil.move(pjoin(build_type, 'pyarrow.dll'),
-                        pjoin(build_lib, 'arrow', 'pyarrow.dll'))
+                        pjoin(build_lib, 'pyarrow', 'pyarrow.dll'))
 
         # Move the built C-extension to the place expected by the Python build
         self._found_names = []
@@ -192,7 +196,7 @@ class build_ext(_build_ext):
     def _get_cmake_ext_path(self, name):
         # Get the package directory from build_py
         build_py = self.get_finalized_command('build_py')
-        package_dir = build_py.get_package_dir('arrow')
+        package_dir = build_py.get_package_dir('pyarrow')
         # This is the name of the arrow C-extension
         suffix = sysconfig.get_config_var('EXT_SUFFIX')
         if suffix is None:
@@ -217,23 +221,23 @@ class build_ext(_build_ext):
 
     def get_outputs(self):
         # Just the C extensions
-        cmake_exts = [self._get_cmake_ext_path(name)
-                      for name in self.get_names()]
-        regular_exts = _build_ext.get_outputs(self)
-        return regular_exts + cmake_exts
+        # regular_exts = _build_ext.get_outputs(self)
+        return [self._get_cmake_ext_path(name)
+                for name in self.get_names()]
 
 
-extensions = []
-
 DESC = """\
 Python library for Apache Arrow"""
 
 setup(
-    name="arrow",
-    packages=['arrow', 'arrow.tests'],
+    name="pyarrow",
+    packages=['pyarrow', 'pyarrow.tests'],
     version=VERSION,
-    package_data={'arrow': ['*.pxd', '*.pyx']},
-    ext_modules=extensions,
+    zip_safe=False,
+    package_data={'pyarrow': ['*.pxd', '*.pyx']},
+    # Dummy extension to trigger build_ext
+    ext_modules=[Extension('__dummy__', sources=[])],
+
     cmdclass={
         'clean': clean,
         'build_ext': build_ext
@@ -243,5 +247,5 @@ setup(
     license='Apache License, Version 2.0',
     maintainer="Apache Arrow Developers",
     maintainer_email="dev@arrow.apache.org",
-    test_suite="arrow.tests"
+    test_suite="pyarrow.tests"
 )

http://git-wip-us.apache.org/repos/asf/arrow/blob/6fdcd494/python/src/pyarrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/util/CMakeLists.txt b/python/src/pyarrow/util/CMakeLists.txt
index 3fd8bac..4afb4d0 100644
--- a/python/src/pyarrow/util/CMakeLists.txt
+++ b/python/src/pyarrow/util/CMakeLists.txt
@@ -19,19 +19,21 @@
 # pyarrow_test_main
 #######################################
 
-add_library(pyarrow_test_main
-  test_main.cc)
+if (PYARROW_BUILD_TESTS)
+  add_library(pyarrow_test_main
+	test_main.cc)
 
-if (APPLE)
-  target_link_libraries(pyarrow_test_main
-    gtest
-    dl)
-  set_target_properties(pyarrow_test_main
-        PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
-else()
-  target_link_libraries(pyarrow_test_main
-    gtest
-    pthread
-    dl
-  )
+  if (APPLE)
+	target_link_libraries(pyarrow_test_main
+      gtest
+      dl)
+	set_target_properties(pyarrow_test_main
+      PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+  else()
+	target_link_libraries(pyarrow_test_main
+      gtest
+      pthread
+      dl
+	  )
+  endif()
 endif()


Mime
View raw message