arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-1327: [Python] Always release GIL before calling check_status in Cython
Date Mon, 07 Aug 2017 14:44:56 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 25439e7fb -> 3200e914d


ARROW-1327: [Python] Always release GIL before calling check_status in Cython

This should prevent deadlock in some multithreaded or subinterpreter contexts. We can be more
mindful of this in the future

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #945 from wesm/ARROW-1327 and squashes the following commits:

d690c5b3 [Wes McKinney] Fix some GIL acquisitions
870e5222 [Wes McKinney] Always release GIL before calling check_status in Cython


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/3200e914
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/3200e914
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/3200e914

Branch: refs/heads/master
Commit: 3200e914d78773bf4a59c3c0a1e1e7164d77fa64
Parents: 25439e7
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Mon Aug 7 10:44:50 2017 -0400
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Mon Aug 7 10:44:50 2017 -0400

----------------------------------------------------------------------
 cpp/src/arrow/python/builtin_convert.cc |  7 +++++-
 cpp/src/arrow/python/numpy_convert.cc   |  4 +++
 python/pyarrow/_parquet.pyx             | 18 +++++++++-----
 python/pyarrow/array.pxi                | 37 ++++++++++++++++++----------
 python/pyarrow/feather.pxi              |  3 ++-
 python/pyarrow/io-hdfs.pxi              | 12 ++++++---
 python/pyarrow/io.pxi                   |  6 +++--
 python/pyarrow/table.pxi                |  5 ++--
 8 files changed, 63 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/3200e914/cpp/src/arrow/python/builtin_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
index 6eaa37f..d3bf76d 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -660,6 +660,7 @@ Status AppendPySequence(PyObject* obj, int64_t size,
 }
 
 Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out)
{
+  PyAcquireGIL lock;
   std::shared_ptr<DataType> type;
   int64_t size;
   RETURN_NOT_OK(InferArrowTypeAndSize(obj, &size, &type));
@@ -668,6 +669,7 @@ Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>
 
 Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out,
                          const std::shared_ptr<DataType>& type, int64_t size) {
+  PyAcquireGIL lock;
   // Handle NA / NullType case
   if (type->id() == Type::NA) {
     out->reset(new NullArray(size));
@@ -684,7 +686,10 @@ Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>
 Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out,
                          const std::shared_ptr<DataType>& type) {
   int64_t size;
-  RETURN_NOT_OK(InferArrowSize(obj, &size));
+  {
+    PyAcquireGIL lock;
+    RETURN_NOT_OK(InferArrowSize(obj, &size));
+  }
   return ConvertPySequence(obj, pool, out, type, size);
 }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/3200e914/cpp/src/arrow/python/numpy_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc
index 95d63b8..61192f3 100644
--- a/cpp/src/arrow/python/numpy_convert.cc
+++ b/cpp/src/arrow/python/numpy_convert.cc
@@ -202,6 +202,8 @@ Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>*
out) {
 #undef TO_ARROW_TYPE_CASE
 
 Status NdarrayToTensor(MemoryPool* pool, PyObject* ao, std::shared_ptr<Tensor>* out)
{
+  PyAcquireGIL lock;
+
   if (!PyArray_Check(ao)) {
     return Status::TypeError("Did not pass ndarray object");
   }
@@ -234,6 +236,8 @@ Status NdarrayToTensor(MemoryPool* pool, PyObject* ao, std::shared_ptr<Tensor>*
 }
 
 Status TensorToNdarray(const Tensor& tensor, PyObject* base, PyObject** out) {
+  PyAcquireGIL lock;
+
   int type_num;
   RETURN_NOT_OK(GetNumPyType(*tensor.type(), &type_num));
   PyArray_Descr* dtype = PyArray_DescrNewFromType(type_num);

http://git-wip-us.apache.org/repos/asf/arrow/blob/3200e914/python/pyarrow/_parquet.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 919e82c..65ca468 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -558,9 +558,14 @@ cdef class ParquetWriter:
         cdef:
             shared_ptr[FileOutputStream] filestream
             shared_ptr[WriterProperties] properties
+            c_string c_where
+            CMemoryPool* pool
 
         if isinstance(where, six.string_types):
-            check_status(FileOutputStream.Open(tobytes(where), &filestream))
+            c_where = tobytes(where)
+            with nogil:
+                check_status(FileOutputStream.Open(c_where,
+                                                   &filestream))
             self.sink = <shared_ptr[OutputStream]> filestream
         else:
             get_writer(where, &self.sink)
@@ -580,11 +585,12 @@ cdef class ParquetWriter:
         self._set_int96_support(&arrow_properties_builder)
         arrow_properties = arrow_properties_builder.build()
 
-        check_status(
-            FileWriter.Open(deref(schema.schema),
-                            maybe_unbox_memory_pool(memory_pool),
-                            self.sink, properties, arrow_properties,
-                            &self.writer))
+        pool = maybe_unbox_memory_pool(memory_pool)
+        with nogil:
+            check_status(
+                FileWriter.Open(deref(schema.schema), pool,
+                                self.sink, properties, arrow_properties,
+                                &self.writer))
 
     cdef void _set_int96_support(self, ArrowWriterProperties.Builder* props):
         if self.use_deprecated_int96_timestamps:

http://git-wip-us.apache.org/repos/asf/arrow/blob/3200e914/python/pyarrow/array.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 4e0c21c..c0c7ac6 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -44,23 +44,28 @@ def array(object sequence, DataType type=None, MemoryPool memory_pool=None,
     cdef:
         shared_ptr[CArray] sp_array
         CMemoryPool* pool
+        int64_t c_size
 
     pool = maybe_unbox_memory_pool(memory_pool)
     if type is None:
-        check_status(ConvertPySequence(sequence, pool, &sp_array))
+        with nogil:
+            check_status(ConvertPySequence(sequence, pool, &sp_array))
     else:
         if size is None:
-            check_status(
-                ConvertPySequence(
-                    sequence, pool, &sp_array, type.sp_type
+            with nogil:
+                check_status(
+                    ConvertPySequence(
+                        sequence, pool, &sp_array, type.sp_type
+                    )
                 )
-            )
         else:
-            check_status(
-                ConvertPySequence(
-                    sequence, pool, &sp_array, type.sp_type, size
+            c_size = size
+            with nogil:
+                check_status(
+                    ConvertPySequence(
+                        sequence, pool, &sp_array, type.sp_type, c_size
+                    )
                 )
-            )
 
     return pyarrow_wrap_array(sp_array)
 
@@ -91,7 +96,8 @@ cdef class Array:
         self.type = pyarrow_wrap_data_type(self.sp_array.get().type())
 
     def _debug_print(self):
-        check_status(DebugPrint(deref(self.ap), 0))
+        with nogil:
+            check_status(DebugPrint(deref(self.ap), 0))
 
     @staticmethod
     def from_pandas(obj, mask=None, DataType type=None,
@@ -185,7 +191,9 @@ cdef class Array:
                 values, obj.dtype, type, timestamps_to_ms=timestamps_to_ms)
 
             if type is None:
-                check_status(NumPyDtypeToArrow(values.dtype, &c_type))
+                dtype = values.dtype
+                with nogil:
+                    check_status(NumPyDtypeToArrow(dtype, &c_type))
             else:
                 c_type = type.sp_type
 
@@ -319,7 +327,9 @@ strides: {2}""".format(self.type, self.shape, self.strides)
     @staticmethod
     def from_numpy(obj):
         cdef shared_ptr[CTensor] ctensor
-        check_status(NdarrayToTensor(c_default_memory_pool(), obj, &ctensor))
+        with nogil:
+            check_status(NdarrayToTensor(c_default_memory_pool(), obj,
+                                         &ctensor))
         return pyarrow_wrap_tensor(ctensor)
 
     def to_numpy(self):
@@ -329,7 +339,8 @@ strides: {2}""".format(self.type, self.shape, self.strides)
         cdef:
             PyObject* out
 
-        check_status(TensorToNdarray(deref(self.tp), self, &out))
+        with nogil:
+            check_status(TensorToNdarray(deref(self.tp), self, &out))
         return PyObject_to_object(out)
 
     def equals(self, Tensor other):

http://git-wip-us.apache.org/repos/asf/arrow/blob/3200e914/python/pyarrow/feather.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/feather.pxi b/python/pyarrow/feather.pxi
index 6faf2f9..6a1fa30 100644
--- a/python/pyarrow/feather.pxi
+++ b/python/pyarrow/feather.pxi
@@ -44,7 +44,8 @@ cdef class FeatherWriter:
         if self.num_rows < 0:
             self.num_rows = 0
         self.writer.get().SetNumRows(self.num_rows)
-        check_status(self.writer.get().Finalize())
+        with nogil:
+            check_status(self.writer.get().Finalize())
 
     def write_array(self, object name, object col, object mask=None):
         cdef Array arr

http://git-wip-us.apache.org/repos/asf/arrow/blob/3200e914/python/pyarrow/io-hdfs.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/io-hdfs.pxi b/python/pyarrow/io-hdfs.pxi
index 27e9948..e6285e4 100644
--- a/python/pyarrow/io-hdfs.pxi
+++ b/python/pyarrow/io-hdfs.pxi
@@ -29,7 +29,8 @@ except ImportError:
 
 def have_libhdfs():
     try:
-        check_status(HaveLibHdfs())
+        with nogil:
+            check_status(HaveLibHdfs())
         return True
     except:
         return False
@@ -37,7 +38,8 @@ def have_libhdfs():
 
 def have_libhdfs3():
     try:
-        check_status(HaveLibHdfs3())
+        with nogil:
+            check_status(HaveLibHdfs3())
         return True
     except:
         return False
@@ -73,10 +75,12 @@ cdef class HadoopFileSystem:
             conf.kerb_ticket = tobytes(kerb_ticket)
 
         if driver == 'libhdfs':
-            check_status(HaveLibHdfs())
+            with nogil:
+                check_status(HaveLibHdfs())
             conf.driver = HdfsDriver_LIBHDFS
         else:
-            check_status(HaveLibHdfs3())
+            with nogil:
+                check_status(HaveLibHdfs3())
             conf.driver = HdfsDriver_LIBHDFS3
 
         with nogil:

http://git-wip-us.apache.org/repos/asf/arrow/blob/3200e914/python/pyarrow/io.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index cccb173..eda8de7 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -447,7 +447,8 @@ cdef class MemoryMappedFile(NativeFile):
         else:
             raise ValueError('Invalid file mode: {0}'.format(mode))
 
-        check_status(CMemoryMappedFile.Open(c_path, c_mode, &handle))
+        with nogil:
+            check_status(CMemoryMappedFile.Open(c_path, c_mode, &handle))
 
         self.wr_file = <shared_ptr[OutputStream]> handle
         self.rd_file = <shared_ptr[RandomAccessFile]> handle
@@ -642,7 +643,8 @@ cdef class BufferOutputStream(NativeFile):
         self.is_open = True
 
     def get_result(self):
-        check_status(self.wr_file.get().Close())
+        with nogil:
+            check_status(self.wr_file.get().Close())
         self.is_open = False
         return pyarrow_wrap_buffer(<shared_ptr[CBuffer]> self.buffer)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/3200e914/python/pyarrow/table.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index e33c9ba..b9b0899 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -168,8 +168,9 @@ cdef class Column:
         cdef:
             PyObject* out
 
-        check_status(libarrow.ConvertColumnToPandas(self.sp_column,
-                                                    self, &out))
+        with nogil:
+            check_status(libarrow.ConvertColumnToPandas(self.sp_column,
+                                                        self, &out))
 
         return pd.Series(wrap_array_output(out), name=self.name)
 


Mime
View raw message