kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject incubator-kudu git commit: python: add support for specifying partitioning
Date Thu, 26 May 2016 01:16:25 GMT
Repository: incubator-kudu
Updated Branches:
  refs/heads/branch-0.9.x 3d33dfef8 -> 9c8bf8b05


python: add support for specifying partitioning

This adds support for range and hash partitioning when creating
a table from the Python client.

Unfortunately, range partitioning is less-than-useful right now,
since we don't yet have a way to specify the split rows themselves.
I elected to defer that work since it's a bit tricky -- the PartialRow
constructor currently requires a Table object, and the Table object
isn't available until the table has been created.

Change-Id: Ie92c897c559fb3070240c51ceb03fe7c2ccd17ba
Reviewed-on: http://gerrit.cloudera.org:8080/3196
Tested-by: Kudu Jenkins
Reviewed-by: Dan Burkert <dan@cloudera.com>
(cherry picked from commit 7d6913cd0f3f6e7065dbbf0f61bd86553cdffab6)
Reviewed-on: http://gerrit.cloudera.org:8080/3218
Reviewed-by: Jean-Daniel Cryans


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/9c8bf8b0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/9c8bf8b0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/9c8bf8b0

Branch: refs/heads/branch-0.9.x
Commit: 9c8bf8b05ffe161bd78d3354746e8e8008c28b72
Parents: 3d33dfe
Author: Todd Lipcon <todd@apache.org>
Authored: Tue May 24 13:12:30 2016 -0700
Committer: Jean-Daniel Cryans <jdcryans@gerrit.cloudera.org>
Committed: Wed May 25 21:30:39 2016 +0000

----------------------------------------------------------------------
 python/kudu/client.pyx           | 84 +++++++++++++++++++++++++++++++++--
 python/kudu/libkudu_client.pxd   |  4 +-
 python/kudu/tests/test_client.py | 22 +++++++++
 3 files changed, 105 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/9c8bf8b0/python/kudu/client.pyx
----------------------------------------------------------------------
diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx
index c3c4a63..036ecda 100644
--- a/python/kudu/client.pyx
+++ b/python/kudu/client.pyx
@@ -226,7 +226,7 @@ cdef class Client:
         # Nothing yet to clean up here
         pass
 
-    def create_table(self, table_name, Schema schema):
+    def create_table(self, table_name, Schema schema, partitioning=None):
         """
         Creates a new Kudu table from the passed Schema and options.
 
@@ -235,19 +235,39 @@ cdef class Client:
         table_name : string
         schema : kudu.Schema
           Create using kudu.schema_builder
+        partitioning : Partitioning object
         """
         cdef:
             KuduTableCreator* c
             Status s
         c = self.cp.NewTableCreator()
         try:
-            s = (c.table_name(tobytes(table_name))
-                 .schema(schema.schema)
-                 .Create())
+            c.table_name(tobytes(table_name))
+            c.schema(schema.schema)
+            if partitioning is not None:
+                self._apply_partitioning(c, partitioning)
+            s = c.Create()
             check_status(s)
         finally:
             del c
 
+    cdef _apply_partitioning(self, KuduTableCreator* c, part):
+        cdef:
+            vector[string] v
+            PartialRow py_row
+        # Apply hash partitioning.
+        for col_names, num_buckets in part._hash_partitions:
+            v.clear()
+            for n in col_names:
+                v.push_back(tobytes(n))
+            c.add_hash_partitions(v, num_buckets)
+        # Apply range partitioning
+        if part._range_partition_cols is not None:
+            v.clear()
+            for n in part._range_partition_cols:
+                v.push_back(tobytes(n))
+            c.set_range_partition_columns(v)
+
     def delete_table(self, table_name):
         """
         Delete a Kudu table. Raises KuduNotFound if the table does not exist.
@@ -659,6 +679,62 @@ cdef class Column:
         return result
 
 
+class Partitioning(object):
+    """ Argument to Client.create_table(...) to describe table partitioning. """
+
+    def __init__(self):
+        self._hash_partitions = []
+        self._range_partition_cols = None
+
+    def add_hash_partitions(self, column_names, num_buckets):
+        """
+        Adds a set of hash partitions to the table.
+
+        For each set of hash partitions added to the table, the total number of
+        table partitions is multiplied by the number of buckets. For example, if a
+        table is created with 3 split rows, and two hash partitions with 4 and 5
+        buckets respectively, the total number of table partitions will be 80
+        (4 range partitions * 4 hash buckets * 5 hash buckets).
+
+        Parameters
+        ----------
+        column_names : list of string column names on which to partition
+        num_buckets : the number of buckets to create
+
+        Returns
+        -------
+        self: this object
+        """
+        if isinstance(column_names, str):
+            column_names = [column_names]
+        self._hash_partitions.append( (column_names, num_buckets) )
+        return self
+
+    def set_range_partition_columns(self, column_names):
+        """
+        Sets the columns on which the table will be range-partitioned.
+
+        Every column must be a part of the table's primary key. If not set, the
+        table will be created with the primary-key columns as the range-partition
+        columns. If called with an empty vector, the table will be created without
+        range partitioning.
+
+        Parameters
+        ----------
+        column_names : list of string column names on which to partition
+
+        Returns
+        -------
+        self: this object
+        """
+        self._range_partition_cols = column_names
+        return self
+
+    # TODO: implement split_rows.
+    # This is slightly tricky since currently the PartialRow constructor requires a
+    # Table object, which doesn't exist yet. Should we use tuples instead?
+
+
 cdef class Predicate:
 
     """

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/9c8bf8b0/python/kudu/libkudu_client.pxd
----------------------------------------------------------------------
diff --git a/python/kudu/libkudu_client.pxd b/python/kudu/libkudu_client.pxd
index 0c16cd7..8fe4942 100644
--- a/python/kudu/libkudu_client.pxd
+++ b/python/kudu/libkudu_client.pxd
@@ -486,7 +486,9 @@ cdef extern from "kudu/client/client.h" namespace "kudu::client" nogil:
     cdef cppclass KuduTableCreator:
         KuduTableCreator& table_name(string& name)
         KuduTableCreator& schema(KuduSchema* schema)
-        KuduTableCreator& split_keys(vector[string]& keys)
+        KuduTableCreator& add_hash_partitions(vector[string]& columns, int num_buckets)
+        KuduTableCreator& set_range_partition_columns(vector[string]& columns)
+        KuduTableCreator& split_rows(vector[const KuduPartialRow*]& split_rows)
         KuduTableCreator& num_replicas(int n_replicas)
         KuduTableCreator& wait(c_bool wait)
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/9c8bf8b0/python/kudu/tests/test_client.py
----------------------------------------------------------------------
diff --git a/python/kudu/tests/test_client.py b/python/kudu/tests/test_client.py
index eb79f73..8174421 100644
--- a/python/kudu/tests/test_client.py
+++ b/python/kudu/tests/test_client.py
@@ -18,6 +18,7 @@
 
 from kudu.compat import unittest, long
 from kudu.tests.common import KuduTestBase
+from kudu.client import Partitioning
 import kudu
 
 
@@ -92,6 +93,27 @@ class TestClient(KuduTestBase, unittest.TestCase):
         self.assertRaises(kudu.KuduNotFound, self.client.table,
                           '__donotexist__')
 
+    def test_create_partitioned_table(self):
+        name = 'partitioned_table'
+        try:
+            self.client.create_table(
+                name, self.schema,
+                partitioning=Partitioning().add_hash_partitions(['key'], 2))
+            # TODO: once the Python client can list partition info, assert that it was
+            # created successfully here.
+            self.client.delete_table(name)
+
+            self.client.create_table(
+                name, self.schema,
+                partitioning=Partitioning().set_range_partition_columns([]))
+            self.client.delete_table(name)
+
+        finally:
+            try:
+                self.client.delete_table(name)
+            except:
+                pass
+
     def test_insert_nonexistent_field(self):
         table = self.client.table(self.ex_table)
         op = table.new_insert()


Mime
View raw message