Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id A4A3D200C4C for ; Tue, 4 Apr 2017 19:18:24 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id A342C160B90; Tue, 4 Apr 2017 17:18:24 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 99D50160B77 for ; Tue, 4 Apr 2017 19:18:23 +0200 (CEST) Received: (qmail 48574 invoked by uid 500); 4 Apr 2017 17:18:22 -0000 Mailing-List: contact commits-help@arrow.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@arrow.apache.org Delivered-To: mailing list commits@arrow.apache.org Received: (qmail 48565 invoked by uid 99); 4 Apr 2017 17:18:22 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 04 Apr 2017 17:18:22 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id ACE6FDFC8E; Tue, 4 Apr 2017 17:18:22 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: wesm@apache.org To: commits@arrow.apache.org Message-Id: <7a167e0236244329a6da37025f82e7ba@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: arrow git commit: ARROW-765: [Python] Add more natural Exception type hierarchy for thirdparty users Date: Tue, 4 Apr 2017 17:18:22 +0000 (UTC) archived-at: Tue, 04 Apr 2017 17:18:24 -0000 Repository: arrow Updated Branches: refs/heads/master ec6188efc -> 2aed7845f ARROW-765: [Python] Add more natural Exception type hierarchy for thirdparty users I also took the liberty of changing a number of error types in libarrow_python Author: Wes McKinney Closes #489 from wesm/ARROW-765 and squashes the following commits: 74c43df [Wes McKinney] Make a nicer Exception hierachy, with more intuitive bases for thirdparty users 2a58a1b [Wes McKinney] Add a nicer exception hierarchy. Unknown errors return as ValueError Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/2aed7845 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/2aed7845 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/2aed7845 Branch: refs/heads/master Commit: 2aed7845fbc9e3d91ab9d16965ee9f6f3abc668b Parents: ec6188e Author: Wes McKinney Authored: Tue Apr 4 13:18:16 2017 -0400 Committer: Wes McKinney Committed: Tue Apr 4 13:18:16 2017 -0400 ---------------------------------------------------------------------- cpp/src/arrow/python/builtin_convert.cc | 8 +-- cpp/src/arrow/python/pandas_convert.cc | 6 +- cpp/src/arrow/status.h | 2 +- python/pyarrow/__init__.py | 8 ++- python/pyarrow/error.pyx | 43 ++++++++++++- python/pyarrow/includes/common.pxd | 4 +- python/pyarrow/tests/test_convert_builtin.py | 78 +++++++++++------------ python/pyarrow/tests/test_convert_pandas.py | 4 +- python/pyarrow/tests/test_feather.py | 2 +- 9 files changed, 101 insertions(+), 54 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/cpp/src/arrow/python/builtin_convert.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc index 6a13fdc..25b32ee 100644 --- a/cpp/src/arrow/python/builtin_convert.cc +++ b/cpp/src/arrow/python/builtin_convert.cc @@ -394,7 +394,7 @@ class BytesConverter : public TypedConverter { } else if (PyBytes_Check(item)) { bytes_obj = item; } else { - return Status::TypeError( + return Status::Invalid( "Value that cannot be converted to bytes was encountered"); } // No error checking @@ -429,7 +429,7 @@ class FixedWidthBytesConverter : public TypedConverter { } else if (PyBytes_Check(item)) { bytes_obj = item; } else { - return Status::TypeError( + return Status::Invalid( "Value that cannot be converted to bytes was encountered"); } // No error checking @@ -458,7 +458,7 @@ class UTF8Converter : public TypedConverter { RETURN_NOT_OK(typed_builder_->AppendNull()); continue; } else if (!PyUnicode_Check(item)) { - return Status::TypeError("Non-unicode value encountered"); + return Status::Invalid("Non-unicode value encountered"); } tmp.reset(PyUnicode_AsUTF8String(item)); RETURN_IF_PYERROR(); @@ -585,7 +585,7 @@ Status CheckPythonBytesAreFixedLength(PyObject* obj, Py_ssize_t expected_length) std::stringstream ss; ss << "Found byte string of length " << length << ", expected length is " << expected_length; - return Status::TypeError(ss.str()); + return Status::Invalid(ss.str()); } return Status::OK(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/cpp/src/arrow/python/pandas_convert.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc index 9577892..48d3489 100644 --- a/cpp/src/arrow/python/pandas_convert.cc +++ b/cpp/src/arrow/python/pandas_convert.cc @@ -161,7 +161,7 @@ static Status AppendObjectStrings( obj = PyUnicode_AsUTF8String(obj); if (obj == NULL) { PyErr_Clear(); - return Status::TypeError("failed converting unicode to UTF8"); + return Status::Invalid("failed converting unicode to UTF8"); } const int32_t length = static_cast(PyBytes_GET_SIZE(obj)); Status s = builder->Append(PyBytes_AS_STRING(obj), length); @@ -200,7 +200,7 @@ static Status AppendObjectFixedWidthBytes(PyArrayObject* arr, PyArrayObject* mas obj = PyUnicode_AsUTF8String(obj); if (obj == NULL) { PyErr_Clear(); - return Status::TypeError("failed converting unicode to UTF8"); + return Status::Invalid("failed converting unicode to UTF8"); } RETURN_NOT_OK(CheckPythonBytesAreFixedLength(obj, byte_width)); @@ -482,7 +482,7 @@ Status InvalidConversion(PyObject* obj, const std::string& expected_type_name) { std::stringstream ss; ss << "Python object of type " << cpp_type_name << " is not None and is not a " << expected_type_name << " object"; - return Status::TypeError(ss.str()); + return Status::Invalid(ss.str()); } Status PandasConverter::ConvertDates() { http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/cpp/src/arrow/status.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h index 05f5b74..dd65b75 100644 --- a/cpp/src/arrow/status.h +++ b/cpp/src/arrow/status.h @@ -134,7 +134,7 @@ class ARROW_EXPORT Status { bool IsKeyError() const { return code() == StatusCode::KeyError; } bool IsInvalid() const { return code() == StatusCode::Invalid; } bool IsIOError() const { return code() == StatusCode::IOError; } - + bool IsTypeError() const { return code() == StatusCode::TypeError; } bool IsUnknownError() const { return code() == StatusCode::UnknownError; } bool IsNotImplemented() const { return code() == StatusCode::NotImplemented; } http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/__init__.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 6860f98..8c52074 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -38,7 +38,13 @@ from pyarrow.array import (Array, Tensor, from_pylist, ListArray, StringArray, DictionaryArray) -from pyarrow.error import ArrowException +from pyarrow.error import (ArrowException, + ArrowKeyError, + ArrowInvalid, + ArrowIOError, + ArrowMemoryError, + ArrowNotImplementedError, + ArrowTypeError) from pyarrow.filesystem import Filesystem, HdfsClient, LocalFilesystem from pyarrow.io import (HdfsFile, NativeFile, PythonFileInterface, http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/error.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/error.pyx b/python/pyarrow/error.pyx index b8a82b3..259aeb0 100644 --- a/python/pyarrow/error.pyx +++ b/python/pyarrow/error.pyx @@ -19,13 +19,52 @@ from pyarrow.includes.libarrow cimport CStatus from pyarrow.includes.common cimport c_string from pyarrow.compat import frombytes + class ArrowException(Exception): pass + +class ArrowInvalid(ValueError, ArrowException): + pass + + +class ArrowMemoryError(MemoryError, ArrowException): + pass + + +class ArrowIOError(IOError, ArrowException): + pass + + +class ArrowKeyError(KeyError, ArrowException): + pass + + +class ArrowTypeError(TypeError, ArrowException): + pass + + +class ArrowNotImplementedError(NotImplementedError, ArrowException): + pass + + cdef int check_status(const CStatus& status) nogil except -1: if status.ok(): return 0 - cdef c_string c_message = status.ToString() with gil: - raise ArrowException(frombytes(c_message)) + message = frombytes(status.ToString()) + if status.IsInvalid(): + raise ArrowInvalid(message) + elif status.IsIOError(): + raise ArrowIOError(message) + elif status.IsOutOfMemory(): + raise ArrowMemoryError(message) + elif status.IsKeyError(): + raise ArrowKeyError(message) + elif status.IsNotImplemented(): + raise ArrowNotImplementedError(message) + elif status.IsTypeError(): + raise ArrowTypeError(message) + else: + raise ArrowException(message) http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/includes/common.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd index f689bdc..ab38ff3 100644 --- a/python/pyarrow/includes/common.pxd +++ b/python/pyarrow/includes/common.pxd @@ -43,10 +43,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: c_string ToString() c_bool ok() + c_bool IsIOError() c_bool IsOutOfMemory() + c_bool IsInvalid() c_bool IsKeyError() c_bool IsNotImplemented() - c_bool IsInvalid() + c_bool IsTypeError() cdef inline object PyObject_to_object(PyObject* o): http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/tests/test_convert_builtin.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index 15fca56..e2b03d8 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -17,7 +17,7 @@ # under the License. from pyarrow.compat import unittest, u # noqa -import pyarrow +import pyarrow as pa import datetime @@ -26,32 +26,32 @@ class TestConvertList(unittest.TestCase): def test_boolean(self): expected = [True, None, False, None] - arr = pyarrow.from_pylist(expected) + arr = pa.from_pylist(expected) assert len(arr) == 4 assert arr.null_count == 2 - assert arr.type == pyarrow.bool_() + assert arr.type == pa.bool_() assert arr.to_pylist() == expected def test_empty_list(self): - arr = pyarrow.from_pylist([]) + arr = pa.from_pylist([]) assert len(arr) == 0 assert arr.null_count == 0 - assert arr.type == pyarrow.null() + assert arr.type == pa.null() assert arr.to_pylist() == [] def test_all_none(self): - arr = pyarrow.from_pylist([None, None]) + arr = pa.from_pylist([None, None]) assert len(arr) == 2 assert arr.null_count == 2 - assert arr.type == pyarrow.null() + assert arr.type == pa.null() assert arr.to_pylist() == [None, None] def test_integer(self): expected = [1, None, 3, None] - arr = pyarrow.from_pylist(expected) + arr = pa.from_pylist(expected) assert len(arr) == 4 assert arr.null_count == 2 - assert arr.type == pyarrow.int64() + assert arr.type == pa.int64() assert arr.to_pylist() == expected def test_garbage_collection(self): @@ -60,25 +60,25 @@ class TestConvertList(unittest.TestCase): # Force the cyclic garbage collector to run gc.collect() - bytes_before = pyarrow.total_allocated_bytes() - pyarrow.from_pylist([1, None, 3, None]) + bytes_before = pa.total_allocated_bytes() + pa.from_pylist([1, None, 3, None]) gc.collect() - assert pyarrow.total_allocated_bytes() == bytes_before + assert pa.total_allocated_bytes() == bytes_before def test_double(self): data = [1.5, 1, None, 2.5, None, None] - arr = pyarrow.from_pylist(data) + arr = pa.from_pylist(data) assert len(arr) == 6 assert arr.null_count == 3 - assert arr.type == pyarrow.float64() + assert arr.type == pa.float64() assert arr.to_pylist() == data def test_unicode(self): data = [u'foo', u'bar', None, u'maƱana'] - arr = pyarrow.from_pylist(data) + arr = pa.from_pylist(data) assert len(arr) == 4 assert arr.null_count == 1 - assert arr.type == pyarrow.string() + assert arr.type == pa.string() assert arr.to_pylist() == data def test_bytes(self): @@ -86,31 +86,31 @@ class TestConvertList(unittest.TestCase): data = [b'foo', u1.decode('utf-8'), # unicode gets encoded, None] - arr = pyarrow.from_pylist(data) + arr = pa.from_pylist(data) assert len(arr) == 3 assert arr.null_count == 1 - assert arr.type == pyarrow.binary() + assert arr.type == pa.binary() assert arr.to_pylist() == [b'foo', u1, None] def test_fixed_size_bytes(self): data = [b'foof', None, b'barb', b'2346'] - arr = pyarrow.from_pylist(data, type=pyarrow.binary(4)) + arr = pa.from_pylist(data, type=pa.binary(4)) assert len(arr) == 4 assert arr.null_count == 1 - assert arr.type == pyarrow.binary(4) + assert arr.type == pa.binary(4) assert arr.to_pylist() == data def test_fixed_size_bytes_does_not_accept_varying_lengths(self): data = [b'foo', None, b'barb', b'2346'] - with self.assertRaises(pyarrow.error.ArrowException): - pyarrow.from_pylist(data, type=pyarrow.binary(4)) + with self.assertRaises(pa.ArrowInvalid): + pa.from_pylist(data, type=pa.binary(4)) def test_date(self): data = [datetime.date(2000, 1, 1), None, datetime.date(1970, 1, 1), datetime.date(2040, 2, 26)] - arr = pyarrow.from_pylist(data) + arr = pa.from_pylist(data) assert len(arr) == 4 - assert arr.type == pyarrow.date64() + assert arr.type == pa.date64() assert arr.null_count == 1 assert arr[0].as_py() == datetime.date(2000, 1, 1) assert arr[1].as_py() is None @@ -124,9 +124,9 @@ class TestConvertList(unittest.TestCase): datetime.datetime(2006, 1, 13, 12, 34, 56, 432539), datetime.datetime(2010, 8, 13, 5, 46, 57, 437699) ] - arr = pyarrow.from_pylist(data) + arr = pa.from_pylist(data) assert len(arr) == 4 - assert arr.type == pyarrow.timestamp('us') + assert arr.type == pa.timestamp('us') assert arr.null_count == 1 assert arr[0].as_py() == datetime.datetime(2007, 7, 13, 1, 23, 34, 123456) @@ -137,28 +137,28 @@ class TestConvertList(unittest.TestCase): 46, 57, 437699) def test_mixed_nesting_levels(self): - pyarrow.from_pylist([1, 2, None]) - pyarrow.from_pylist([[1], [2], None]) - pyarrow.from_pylist([[1], [2], [None]]) + pa.from_pylist([1, 2, None]) + pa.from_pylist([[1], [2], None]) + pa.from_pylist([[1], [2], [None]]) - with self.assertRaises(pyarrow.ArrowException): - pyarrow.from_pylist([1, 2, [1]]) + with self.assertRaises(pa.ArrowInvalid): + pa.from_pylist([1, 2, [1]]) - with self.assertRaises(pyarrow.ArrowException): - pyarrow.from_pylist([1, 2, []]) + with self.assertRaises(pa.ArrowInvalid): + pa.from_pylist([1, 2, []]) - with self.assertRaises(pyarrow.ArrowException): - pyarrow.from_pylist([[1], [2], [None, [1]]]) + with self.assertRaises(pa.ArrowInvalid): + pa.from_pylist([[1], [2], [None, [1]]]) def test_list_of_int(self): data = [[1, 2, 3], [], None, [1, 2]] - arr = pyarrow.from_pylist(data) + arr = pa.from_pylist(data) assert len(arr) == 4 assert arr.null_count == 1 - assert arr.type == pyarrow.list_(pyarrow.int64()) + assert arr.type == pa.list_(pa.int64()) assert arr.to_pylist() == data def test_mixed_types_fails(self): data = ['a', 1, 2.0] - with self.assertRaises(pyarrow.error.ArrowException): - pyarrow.from_pylist(data) + with self.assertRaises(pa.ArrowException): + pa.from_pylist(data) http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/tests/test_convert_pandas.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index 56830a8..87c9c03 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -266,7 +266,7 @@ class TestPandasConversion(unittest.TestCase): values = [b'foo', None, b'ba', None, None, b'hey'] df = pd.DataFrame({'strings': values}) schema = A.Schema.from_fields([A.field('strings', A.binary(3))]) - with self.assertRaises(A.error.ArrowException): + with self.assertRaises(A.ArrowInvalid): A.Table.from_pandas(df, schema=schema) def test_timestamps_notimezone_no_nulls(self): @@ -409,7 +409,7 @@ class TestPandasConversion(unittest.TestCase): def test_mixed_types_fails(self): data = pd.DataFrame({'a': ['a', 1, 2.0]}) - with self.assertRaises(A.error.ArrowException): + with self.assertRaises(A.ArrowException): A.Table.from_pandas(data) def test_strided_data_import(self): http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/tests/test_feather.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py index c7b4f1e..cba9464 100644 --- a/python/pyarrow/tests/test_feather.py +++ b/python/pyarrow/tests/test_feather.py @@ -45,7 +45,7 @@ class TestFeatherReader(unittest.TestCase): pass def test_file_not_exist(self): - with self.assertRaises(pa.ArrowException): + with self.assertRaises(pa.ArrowIOError): FeatherReader('test_invalid_file') def _get_null_counts(self, path, columns=None):