arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject arrow git commit: ARROW-447: Always return unicode objects for UTF-8 strings
Date Thu, 29 Dec 2016 12:06:22 GMT
Repository: arrow
Updated Branches:
  refs/heads/master 23fe6ae02 -> e15c6a0b3


ARROW-447: Always return unicode objects for UTF-8 strings

As the u() function was not working with Unicode characters, this uses
the u'' literal again which was re-introduced with Python 3.3. Thus the
tests will fail with Python3 < 3.3

Author: Uwe L. Korn <uwelk@xhochy.com>

Closes #260 from xhochy/ARROW-447 and squashes the following commits:

84d3569 [Uwe L. Korn] ARROW-447: Always return unicode objects for UTF-8 strings


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/e15c6a0b
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/e15c6a0b
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/e15c6a0b

Branch: refs/heads/master
Commit: e15c6a0b3c05b5b42c204f34369d127182450ca0
Parents: 23fe6ae
Author: Uwe L. Korn <uwelk@xhochy.com>
Authored: Thu Dec 29 07:05:55 2016 -0500
Committer: Wes McKinney <wes.mckinney@twosigma.com>
Committed: Thu Dec 29 07:05:55 2016 -0500

----------------------------------------------------------------------
 python/pyarrow/scalar.pyx                    | 2 +-
 python/pyarrow/tests/test_convert_builtin.py | 5 +++--
 python/pyarrow/tests/test_convert_pandas.py  | 3 ++-
 python/pyarrow/tests/test_scalars.py         | 7 ++++---
 python/src/pyarrow/adapters/pandas.cc        | 4 ----
 5 files changed, 10 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/e15c6a0b/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
index a0610a1..30b9040 100644
--- a/python/pyarrow/scalar.pyx
+++ b/python/pyarrow/scalar.pyx
@@ -168,7 +168,7 @@ cdef class StringValue(ArrayValue):
 
     def as_py(self):
         cdef CStringArray* ap = <CStringArray*> self.sp_array.get()
-        return frombytes(ap.GetString(self.index))
+        return ap.GetString(self.index).decode('utf-8')
 
 
 cdef class BinaryValue(ArrayValue):

http://git-wip-us.apache.org/repos/asf/arrow/blob/e15c6a0b/python/pyarrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index a5f7aa5..6116742 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -72,12 +73,12 @@ class TestConvertList(unittest.TestCase):
         assert arr.to_pylist() == data
 
     def test_unicode(self):
-        data = [u('foo'), u('bar'), None, u('arrow')]
+        data = [u'foo', u'bar', None, u'mañana']
         arr = pyarrow.from_pylist(data)
         assert len(arr) == 4
         assert arr.null_count == 1
         assert arr.type == pyarrow.string()
-        assert arr.to_pylist() == [u('foo'), u('bar'), None, u('arrow')]
+        assert arr.to_pylist() == data
 
     def test_bytes(self):
         u1 = b'ma\xc3\xb1ana'

http://git-wip-us.apache.org/repos/asf/arrow/blob/e15c6a0b/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 863aa30..bb9f0b3 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -183,7 +184,7 @@ class TestPandasConversion(unittest.TestCase):
 
     def test_unicode(self):
         repeats = 1000
-        values = [u('foo'), None, u('bar'), u('qux'), np.nan]
+        values = [u'foo', None, u'bar', u'mañana', np.nan]
         df = pd.DataFrame({'strings': values * repeats})
 
         self._check_pandas_roundtrip(df)

http://git-wip-us.apache.org/repos/asf/arrow/blob/e15c6a0b/python/pyarrow/tests/test_scalars.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 19cfacb..62e51f8 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -59,7 +60,7 @@ class TestScalars(unittest.TestCase):
         assert v.as_py() == 3.0
 
     def test_string_unicode(self):
-        arr = A.from_pylist([u('foo'), None, u('bar')])
+        arr = A.from_pylist([u'foo', None, u'mañana'])
 
         v = arr[0]
         assert isinstance(v, A.StringValue)
@@ -68,8 +69,8 @@ class TestScalars(unittest.TestCase):
         assert arr[1] is A.NA
 
         v = arr[2].as_py()
-        assert v == u('bar')
-        assert isinstance(v, str)
+        assert v == u'mañana'
+        assert isinstance(v, unicode_type)
 
     def test_bytes(self):
         arr = A.from_pylist([b'foo', None, u('bar')])

http://git-wip-us.apache.org/repos/asf/arrow/blob/e15c6a0b/python/src/pyarrow/adapters/pandas.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc
index 5e5826b..ad18eca 100644
--- a/python/src/pyarrow/adapters/pandas.cc
+++ b/python/src/pyarrow/adapters/pandas.cc
@@ -603,11 +603,7 @@ struct WrapBytes {};
 template <>
 struct WrapBytes<arrow::StringArray> {
   static inline PyObject* Wrap(const uint8_t* data, int64_t length) {
-#if PY_MAJOR_VERSION >= 3
     return PyUnicode_FromStringAndSize(reinterpret_cast<const char*>(data), length);
-#else
-    return PyString_FromStringAndSize(reinterpret_cast<const char*>(data), length);
-#endif
   }
 };
 


Mime
View raw message