kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject [1/2] kudu git commit: KUDU-1649 - [python] Add Binary Type Support
Date Mon, 31 Oct 2016 23:00:17 GMT
Repository: kudu
Updated Branches:
  refs/heads/master 6f54154fd -> 4ed84b956


KUDU-1649 - [python] Add Binary Type Support

The Python client currently does not support the Binary type.
This patch adds this type and contains test updates.

Change-Id: Ib13539d040b754399c13031003a2cf64c71ba151
Reviewed-on: http://gerrit.cloudera.org:8080/4653
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <todd@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b2f38034
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b2f38034
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b2f38034

Branch: refs/heads/master
Commit: b2f3803458f9aae2668a756861be3a207932b0cd
Parents: 6f54154
Author: Jordan Birdsell <jordantbirdsell@gmail.com>
Authored: Wed Oct 5 21:06:45 2016 -0400
Committer: Todd Lipcon <todd@apache.org>
Committed: Mon Oct 31 22:44:57 2016 +0000

----------------------------------------------------------------------
 python/kudu/client.pyx              | 29 +++++++++++++++++++++--------
 python/kudu/libkudu_client.pxd      |  3 +++
 python/kudu/tests/test_scanner.py   |  4 ++++
 python/kudu/tests/test_scantoken.py |  4 ++++
 python/kudu/tests/util.py           | 17 +++++++++++++++--
 5 files changed, 47 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/b2f38034/python/kudu/client.pyx
----------------------------------------------------------------------
diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx
index 23e2edb..a555ed6 100644
--- a/python/kudu/client.pyx
+++ b/python/kudu/client.pyx
@@ -821,7 +821,7 @@ cdef class Column:
 
         if (self.spec.type.name[:3] == 'int'):
             val = KuduValue.FromInt(obj)
-        elif (self.spec.type.name == 'string'):
+        elif (self.spec.type.name in ['string', 'binary']):
             if isinstance(obj, unicode):
                 obj = obj.encode('utf8')
 
@@ -1185,6 +1185,12 @@ cdef class Row:
         return cpython.PyBytes_FromStringAndSize(<char*> val.mutable_data(),
                                                  val.size())
 
+    cdef inline get_binary(self, int i):
+        cdef Slice val
+        check_status(self.row.GetBinary(i, &val))
+        return cpython.PyBytes_FromStringAndSize(<char*> val.mutable_data(),
+                                                 val.size())
+
     cdef inline get_unixtime_micros(self, int i):
         cdef int64_t val
         check_status(self.row.GetUnixTimeMicros(i, &val))
@@ -1211,6 +1217,8 @@ cdef class Row:
             return self.get_float(i)
         elif t == KUDU_STRING:
             return frombytes(self.get_string(i))
+        elif t == KUDU_BINARY:
+            return self.get_binary(i)
         elif t == KUDU_UNIXTIME_MICROS:
             return from_unixtime_micros(self.get_unixtime_micros(i))
         else:
@@ -2189,7 +2197,7 @@ cdef class PartialRow:
     cpdef set_loc(self, int i, value):
         cdef:
             DataType t = self.schema.loc_type(i)
-            cdef Slice* slc
+            Slice slc
 
         if value is None:
             self.row.SetNull(i)
@@ -2213,15 +2221,18 @@ cdef class PartialRow:
         elif t == KUDU_DOUBLE:
             self.row.SetDouble(i, <double> value)
         elif t == KUDU_STRING:
-            if not cpython.PyBytes_Check(value):
+            if isinstance(value, unicode):
                 value = value.encode('utf8')
 
-            # TODO: It would be much better not to heap-allocate a Slice object
-            slc = new Slice(cpython.PyBytes_AsString(value))
+            slc = Slice(<char*> value, len(value))
+            self.row.SetStringCopy(i, slc)
+        elif t == KUDU_BINARY:
+            if isinstance(value, unicode):
+                raise TypeError("Unicode objects must be explicitly encoded " +
+                                "before storing in a Binary field.")
 
-            # Not safe to take a reference to PyBytes data for now
-            self.row.SetStringCopy(i, deref(slc))
-            del slc
+            slc = Slice(<char*> value, len(value))
+            self.row.SetBinaryCopy(i, slc)
         elif t == KUDU_UNIXTIME_MICROS:
             self.row.SetUnixTimeMicros(i, <int64_t>
                 to_unixtime_micros(value))
@@ -2329,5 +2340,7 @@ cdef inline cast_pyvalue(DataType t, object o):
         return StringVal(o)
     elif t == KUDU_UNIXTIME_MICROS:
         return UnixtimeMicrosVal(o)
+    elif t == KUDU_BINARY:
+        return StringVal(o)
     else:
         raise TypeError("Cannot cast kudu type <{0}>".format(_type_names[t]))

http://git-wip-us.apache.org/repos/asf/kudu/blob/b2f38034/python/kudu/libkudu_client.pxd
----------------------------------------------------------------------
diff --git a/python/kudu/libkudu_client.pxd b/python/kudu/libkudu_client.pxd
index 25c0f10..4d58a20 100644
--- a/python/kudu/libkudu_client.pxd
+++ b/python/kudu/libkudu_client.pxd
@@ -323,6 +323,9 @@ cdef extern from "kudu/common/partial_row.h" namespace "kudu" nogil:
         Status SetStringCopy(Slice& col_name, Slice& val)
         Status SetStringCopy(int col_idx, Slice& val)
 
+        Status SetBinary(Slice& col_name, Slice& val)
+        Status SetBinary(int col_idx, Slice&val)
+
         Status SetBinaryCopy(const Slice& col_name, const Slice& val)
         Status SetBinaryCopy(int col_idx, const Slice& val)
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/b2f38034/python/kudu/tests/test_scanner.py
----------------------------------------------------------------------
diff --git a/python/kudu/tests/test_scanner.py b/python/kudu/tests/test_scanner.py
index fd39f1a..0b9aeb4 100644
--- a/python/kudu/tests/test_scanner.py
+++ b/python/kudu/tests/test_scanner.py
@@ -259,6 +259,10 @@ class TestScanner(TestScanBase):
         # Does a row check count only
         self._test_float_pred()
 
+    def test_binary_pred(self):
+        # Test a binary predicate
+        self._test_binary_pred()
+
     def test_scan_selection(self):
         """
         This test confirms that setting the scan selection policy on the

http://git-wip-us.apache.org/repos/asf/kudu/blob/b2f38034/python/kudu/tests/test_scantoken.py
----------------------------------------------------------------------
diff --git a/python/kudu/tests/test_scantoken.py b/python/kudu/tests/test_scantoken.py
index e027cf5..115ac30 100644
--- a/python/kudu/tests/test_scantoken.py
+++ b/python/kudu/tests/test_scantoken.py
@@ -240,6 +240,10 @@ class TestScanToken(TestScanBase):
         # Does a row check count only
         self._test_float_pred()
 
+    def test_binary_pred(self):
+        # Test a binary predicate
+        self._test_binary_pred()
+
     def test_scan_selection(self):
         """
         This test confirms that setting the scan selection policy on the

http://git-wip-us.apache.org/repos/asf/kudu/blob/b2f38034/python/kudu/tests/util.py
----------------------------------------------------------------------
diff --git a/python/kudu/tests/util.py b/python/kudu/tests/util.py
index 230f723..a6bbae6 100644
--- a/python/kudu/tests/util.py
+++ b/python/kudu/tests/util.py
@@ -71,6 +71,7 @@ class TestScanBase(KuduTestBase, unittest.TestCase):
         builder.add_column('bool_val', type_=kudu.bool)
         builder.add_column('double_val', type_=kudu.double)
         builder.add_column('int8_val', type_=kudu.int8)
+        builder.add_column('binary_val', type_='binary', compression=kudu.COMPRESSION_SNAPPY,
encoding='prefix')
         builder.add_column('float_val', type_=kudu.float)
         schema = builder.build()
 
@@ -86,9 +87,13 @@ class TestScanBase(KuduTestBase, unittest.TestCase):
         # Insert new rows
         self.type_test_rows = [
             (1, datetime.datetime(2016, 1, 1).replace(tzinfo=pytz.utc),
-             "Test One", True, 1.7976931348623157 * (10^308), 127, 3.402823 * (10^38)),
+             "Test One", True, 1.7976931348623157 * (10^308), 127,
+             b'\xce\x99\xce\xbf\xcf\x81\xce\xb4\xce\xb1\xce\xbd\xce\xaf\xce\xb1',
+             3.402823 * (10^38)),
             (2, datetime.datetime.utcnow().replace(tzinfo=pytz.utc),
-             "测试二", False, 200.1, -1, -150.2)
+             "测试二", False, 200.1, -1,
+             b'\xd0\x98\xd0\xbe\xd1\x80\xd0\xb4\xd0\xb0\xd0\xbd\xd0\xb8\xd1\x8f',
+             -150.2)
         ]
         session = self.client.new_session()
         for row in self.type_test_rows:
@@ -208,3 +213,11 @@ class TestScanBase(KuduTestBase, unittest.TestCase):
             row_indexes=slice(0, 1),
             count_only=True
         )
+
+    def _test_binary_pred(self):
+        self.verify_pred_type_scans(
+            preds=[
+                self.type_table['binary_val'] == 'Иордания'
+            ],
+            row_indexes=slice(1, 2)
+        )


Mime
View raw message