Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 8E4C6200BA3 for ; Wed, 5 Oct 2016 20:25:22 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 8CE82160AC9; Wed, 5 Oct 2016 18:25:22 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id A6BD7160AEA for ; Wed, 5 Oct 2016 20:25:21 +0200 (CEST) Received: (qmail 28936 invoked by uid 500); 5 Oct 2016 18:25:20 -0000 Mailing-List: contact commits-help@kudu.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@kudu.apache.org Delivered-To: mailing list commits@kudu.apache.org Received: (qmail 28771 invoked by uid 99); 5 Oct 2016 18:25:20 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 05 Oct 2016 18:25:20 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 9A787DFB81; Wed, 5 Oct 2016 18:25:20 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: danburkert@apache.org To: commits@kudu.apache.org Message-Id: <0b06957aea864b01b06f9c7ffa2d4277@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: kudu git commit: KUDU-1640 - [python] Add IN-list predicate support Date: Wed, 5 Oct 2016 18:25:20 +0000 (UTC) archived-at: Wed, 05 Oct 2016 18:25:22 -0000 Repository: kudu Updated Branches: refs/heads/master 07d190c3e -> 3830fc972 KUDU-1640 - [python] Add IN-list predicate support This patch adds IN list predicate support for the python client. This patch includes a test. Change-Id: I932dfded62e162cf85e0e12432cf6716311957de Reviewed-on: http://gerrit.cloudera.org:8080/4548 Tested-by: Kudu Jenkins Reviewed-by: Dan Burkert Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/3830fc97 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/3830fc97 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/3830fc97 Branch: refs/heads/master Commit: 3830fc972b0a810529c87cd4de0bf026668b9b5c Parents: 07d190c Author: Jordan Birdsell Authored: Tue Sep 27 21:14:05 2016 -0400 Committer: Dan Burkert Committed: Wed Oct 5 18:24:57 2016 +0000 ---------------------------------------------------------------------- python/kudu/client.pyx | 101 +++++++++++++++++++++++-------- python/kudu/libkudu_client.pxd | 2 + python/kudu/tests/test_scanner.py | 28 +++++++++ python/kudu/tests/test_scantoken.py | 17 ++++++ 4 files changed, 124 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/3830fc97/python/kudu/client.pyx ---------------------------------------------------------------------- diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx index 0f64c92..29004bb 100644 --- a/python/kudu/client.pyx +++ b/python/kudu/client.pyx @@ -760,34 +760,73 @@ cdef class Column: cdef: KuduPredicate* pred KuduValue* val - Slice* col_name_slice + Slice col_name_slice ComparisonOp cmp_op Predicate result object _name = tobytes(self.name) - col_name_slice = new Slice( _name, - len(_name)) + col_name_slice = Slice( _name, len(_name)) + if op == 0: # < + cmp_op = KUDU_LESS + elif op == 1: # <= + cmp_op = KUDU_LESS_EQUAL + elif op == 2: # == + cmp_op = KUDU_EQUAL + elif op == 4: # > + cmp_op = KUDU_GREATER + elif op == 5: # >= + cmp_op = KUDU_GREATER_EQUAL + else: + raise NotImplementedError - try: - if op == 0: # < - cmp_op = KUDU_LESS - elif op == 1: # <= - cmp_op = KUDU_LESS_EQUAL - elif op == 2: # == - cmp_op = KUDU_EQUAL - elif op == 4: # > - cmp_op = KUDU_GREATER - elif op == 5: # >= - cmp_op = KUDU_GREATER_EQUAL - else: - raise NotImplementedError + val = self.box_value(value) + pred = (self.parent.ptr() + .NewComparisonPredicate(col_name_slice, + cmp_op, val)) - val = self.box_value(value) - pred = (self.parent.ptr() - .NewComparisonPredicate(deref(col_name_slice), - cmp_op, val)) - finally: - del col_name_slice + result = Predicate() + result.init(pred) + + return result + + def in_list(Column self, values): + """ + Creates a new InListPredicate for the Column. If a single value is + provided, then an equality comparison predicate is created. + + Parameters + ---------- + values : list + + Examples + -------- + scanner.add_predicate(table['key'].in_list([1, 2, 3]) + + Returns + ------- + pred : Predicate + """ + cdef: + KuduPredicate* pred + vector[KuduValue*] vals + Slice col_name_slice + Predicate result + object _name = tobytes(self.name) + + col_name_slice = Slice( _name, len(_name)) + + try: + for val in values: + vals.push_back(self.box_value(val)) + except TypeError: + while not vals.empty(): + _val = vals.back() + del _val + vals.pop_back() + raise + + pred = (self.parent.ptr() + .NewInListPredicate(col_name_slice, &vals)) result = Predicate() result.init(pred) @@ -1163,7 +1202,8 @@ cdef class Scanner: def add_predicates(self, preds): """ Add a list of scan predicates to the scanner. Select columns from the - parent table and make comparisons to create predicates. + parent table and make comparisons to create predicates. Returns a + reference to itself to facilitate chaining. Examples -------- @@ -1174,14 +1214,21 @@ cdef class Scanner: Parameters ---------- preds : list of Predicate + + Returns + ------- + self : scanner """ for pred in preds: self.add_predicate(pred) + return self + cpdef add_predicate(self, Predicate pred): """ Add a scan predicates to the scanner. Select columns from the - parent table and make comparisons to create predicates. + parent table and make comparisons to create predicates. Returns + a reference to itself to facilitate chaining. Examples -------- @@ -1191,6 +1238,10 @@ cdef class Scanner: Parameters ---------- pred : kudu.Predicate + + Returns + ------- + self : scanner """ cdef KuduPredicate* clone @@ -1199,6 +1250,8 @@ cdef class Scanner: clone = pred.pred.Clone() check_status(self.scanner.AddConjunctPredicate(clone)) + return self + def set_projected_column_names(self, names): """ Sets the columns to be scanned. http://git-wip-us.apache.org/repos/asf/kudu/blob/3830fc97/python/kudu/libkudu_client.pxd ---------------------------------------------------------------------- diff --git a/python/kudu/libkudu_client.pxd b/python/kudu/libkudu_client.pxd index 091f326..b9022e0 100644 --- a/python/kudu/libkudu_client.pxd +++ b/python/kudu/libkudu_client.pxd @@ -549,6 +549,8 @@ cdef extern from "kudu/client/client.h" namespace "kudu::client" nogil: KuduPredicate* NewComparisonPredicate(const Slice& col_name, ComparisonOp op, KuduValue* value); + KuduPredicate* NewInListPredicate(const Slice& col_name, + vector[KuduValue*]* values) KuduClient* client() # const PartitionSchema& partition_schema() http://git-wip-us.apache.org/repos/asf/kudu/blob/3830fc97/python/kudu/tests/test_scanner.py ---------------------------------------------------------------------- diff --git a/python/kudu/tests/test_scanner.py b/python/kudu/tests/test_scanner.py index b1d8505..e0fcd37 100644 --- a/python/kudu/tests/test_scanner.py +++ b/python/kudu/tests/test_scanner.py @@ -74,6 +74,24 @@ class TestScanner(TestScanBase): self.assertEqual(sorted(tuples), [(20, 'hello_20'), (22, 'hello_22')]) + def test_scan_rows_in_list_predicate(self): + """ + Test scanner with an InList predicate and + a string comparison predicate + """ + key_list = [2, 98] + scanner = self.table.scanner() + scanner.set_fault_tolerant()\ + .add_predicates([ + self.table[0].in_list(key_list), + self.table['string_val'] >= 'hello_9' + ]) + scanner.open() + + tuples = scanner.read_all_tuples() + + self.assertEqual(tuples, [self.tuples[98]]) + def test_index_projection_with_schema(self): scanner = self.table.scanner() scanner.set_projected_column_indexes([0, 1]) @@ -125,6 +143,16 @@ class TestScanner(TestScanBase): with self.assertRaises(kudu.KuduInvalidArgument): scanner.add_predicates([sv >= 1]) + with self.assertRaises(TypeError): + scanner.add_predicates([sv.in_list(['testing', + datetime.datetime.utcnow()])]) + + with self.assertRaises(kudu.KuduInvalidArgument): + scanner.add_predicates([sv.in_list([ + 'hello_20', + 120 + ])]) + def test_scan_batch_by_batch(self): scanner = self.table.scanner() scanner.set_fault_tolerant() http://git-wip-us.apache.org/repos/asf/kudu/blob/3830fc97/python/kudu/tests/test_scantoken.py ---------------------------------------------------------------------- diff --git a/python/kudu/tests/test_scantoken.py b/python/kudu/tests/test_scantoken.py index d3aff74..a5ae256 100644 --- a/python/kudu/tests/test_scantoken.py +++ b/python/kudu/tests/test_scantoken.py @@ -159,3 +159,20 @@ class TestScanToken(TestScanBase): tuples.extend(batch.as_tuples()) self.assertEqual(sorted(self.tuples), tuples) + + def test_scan_rows_in_list_predicate(self): + """ + Test scan token builder/scanner with an InList predicate and + a string comparison predicate + """ + key_list = [2, 98] + builder = self.table.scan_token_builder() + builder.set_fault_tolerant() \ + .add_predicates([ + self.table[0].in_list(key_list), + self.table['string_val'] >= 'hello_9' + ]) + + # Serialize execute and verify + self._subtest_serialize_thread_and_verify(builder.build(), + [self.tuples[98]])