arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [arrow] branch master updated: ARROW-1849: [GLib] Add input checks to GArrowRecordBatch
Date Fri, 24 Nov 2017 14:32:40 GMT
This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 05bfb26  ARROW-1849: [GLib] Add input checks to GArrowRecordBatch
05bfb26 is described below

commit 05bfb2619b19154eead36e4aa067f440fdedf49a
Author: Kouhei Sutou <kou@clear-code.com>
AuthorDate: Fri Nov 24 09:32:36 2017 -0500

    ARROW-1849: [GLib] Add input checks to GArrowRecordBatch
    
    Author: Kouhei Sutou <kou@clear-code.com>
    
    Closes #1351 from kou/glib-add-input-check-to-record-batch and squashes the following
commits:
    
    c85dde22 [Kouhei Sutou] [GLib] Follow API change in Go example
    3bf1eeb4 [Kouhei Sutou] [GLib] Add index check to column readers for GArrowRecordBatch
    94d9f238 [Kouhei Sutou] Always validate on creating new record batch
---
 c_glib/arrow-glib/record-batch.cpp | 54 ++++++++++++++++++++-----
 c_glib/arrow-glib/record-batch.h   |  7 ++--
 c_glib/example/go/read-batch.go    |  4 +-
 c_glib/example/go/read-stream.go   |  4 +-
 c_glib/example/go/write-batch.go   | 10 ++++-
 c_glib/example/go/write-stream.go  | 10 ++++-
 c_glib/test/test-record-batch.rb   | 81 +++++++++++++++++++++++++++-----------
 7 files changed, 128 insertions(+), 42 deletions(-)

diff --git a/c_glib/arrow-glib/record-batch.cpp b/c_glib/arrow-glib/record-batch.cpp
index f23a0cf..64f2020 100644
--- a/c_glib/arrow-glib/record-batch.cpp
+++ b/c_glib/arrow-glib/record-batch.cpp
@@ -28,6 +28,23 @@
 
 #include <sstream>
 
+static inline bool
+garrow_record_batch_adjust_index(const std::shared_ptr<arrow::RecordBatch> arrow_record_batch,
+                                 gint &i)
+{
+  auto n_columns = arrow_record_batch->num_columns();
+  if (i < 0) {
+    i += n_columns;
+    if (i < 0) {
+      return false;
+    }
+  }
+  if (i >= n_columns) {
+    return false;
+  }
+  return true;
+}
+
 G_BEGIN_DECLS
 
 /**
@@ -135,13 +152,15 @@ garrow_record_batch_class_init(GArrowRecordBatchClass *klass)
  * @schema: The schema of the record batch.
  * @n_rows: The number of the rows in the record batch.
  * @columns: (element-type GArrowArray): The columns in the record batch.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: A newly created #GArrowRecordBatch.
+ * Returns: (nullable): A newly created #GArrowRecordBatch or %NULL on error.
  */
 GArrowRecordBatch *
 garrow_record_batch_new(GArrowSchema *schema,
                         guint32 n_rows,
-                        GList *columns)
+                        GList *columns,
+                        GError **error)
 {
   std::vector<std::shared_ptr<arrow::Array>> arrow_columns;
   for (GList *node = columns; node; node = node->next) {
@@ -152,7 +171,12 @@ garrow_record_batch_new(GArrowSchema *schema,
   auto arrow_record_batch =
     arrow::RecordBatch::Make(garrow_schema_get_raw(schema),
                              n_rows, arrow_columns);
-  return garrow_record_batch_new_raw(&arrow_record_batch);
+  auto status = arrow_record_batch->Validate();
+  if (garrow_error_check(error, status, "[record-batch][new]")) {
+    return garrow_record_batch_new_raw(&arrow_record_batch);
+  } else {
+    return NULL;
+  }
 }
 
 /**
@@ -192,15 +216,21 @@ garrow_record_batch_get_schema(GArrowRecordBatch *record_batch)
 /**
  * garrow_record_batch_get_column:
  * @record_batch: A #GArrowRecordBatch.
- * @i: The index of the target column.
+ * @i: The index of the target column. If it's negative, index is
+ *   counted backward from the end of the columns. `-1` means the last
+ *   column.
  *
- * Returns: (transfer full): The i-th column in the record batch.
+ * Returns: (transfer full) (nullable): The i-th column in the record batch
+ *   on success, %NULL on out of index.
  */
 GArrowArray *
 garrow_record_batch_get_column(GArrowRecordBatch *record_batch,
-                               guint i)
+                               gint i)
 {
   const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
+  if (!garrow_record_batch_adjust_index(arrow_record_batch, i)) {
+    return NULL;
+  }
   auto arrow_column = arrow_record_batch->column(i);
   return garrow_array_new_raw(&arrow_column);
 }
@@ -230,15 +260,21 @@ garrow_record_batch_get_columns(GArrowRecordBatch *record_batch)
 /**
  * garrow_record_batch_get_column_name:
  * @record_batch: A #GArrowRecordBatch.
- * @i: The index of the target column.
+ * @i: The index of the target column. If it's negative, index is
+ *   counted backward from the end of the columns. `-1` means the last
+ *   column.
  *
- * Returns: The name of the i-th column in the record batch.
+ * Returns: (nullable): The name of the i-th column in the record batch
+ *   on success, %NULL on out of index
  */
 const gchar *
 garrow_record_batch_get_column_name(GArrowRecordBatch *record_batch,
-                                    guint i)
+                                    gint i)
 {
   const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
+  if (!garrow_record_batch_adjust_index(arrow_record_batch, i)) {
+    return NULL;
+  }
   return arrow_record_batch->column_name(i).c_str();
 }
 
diff --git a/c_glib/arrow-glib/record-batch.h b/c_glib/arrow-glib/record-batch.h
index 021f894..d31edf4 100644
--- a/c_glib/arrow-glib/record-batch.h
+++ b/c_glib/arrow-glib/record-batch.h
@@ -68,17 +68,18 @@ GType garrow_record_batch_get_type(void) G_GNUC_CONST;
 
 GArrowRecordBatch *garrow_record_batch_new(GArrowSchema *schema,
                                            guint32 n_rows,
-                                           GList *columns);
+                                           GList *columns,
+                                           GError **error);
 
 gboolean garrow_record_batch_equal(GArrowRecordBatch *record_batch,
                                    GArrowRecordBatch *other_record_batch);
 
 GArrowSchema *garrow_record_batch_get_schema     (GArrowRecordBatch *record_batch);
 GArrowArray  *garrow_record_batch_get_column     (GArrowRecordBatch *record_batch,
-                                                  guint i);
+                                                  gint i);
 GList        *garrow_record_batch_get_columns    (GArrowRecordBatch *record_batch);
 const gchar  *garrow_record_batch_get_column_name(GArrowRecordBatch *record_batch,
-                                                  guint i);
+                                                  gint i);
 guint         garrow_record_batch_get_n_columns  (GArrowRecordBatch *record_batch);
 gint64        garrow_record_batch_get_n_rows     (GArrowRecordBatch *record_batch);
 GArrowRecordBatch *garrow_record_batch_slice     (GArrowRecordBatch *record_batch,
diff --git a/c_glib/example/go/read-batch.go b/c_glib/example/go/read-batch.go
index ef1a7fb..1472939 100644
--- a/c_glib/example/go/read-batch.go
+++ b/c_glib/example/go/read-batch.go
@@ -57,8 +57,8 @@ func PrintColumnValue(column *arrow.Array, i int64) {
 func PrintRecordBatch(recordBatch *arrow.RecordBatch) {
 	nColumns := recordBatch.GetNColumns()
 	for i := uint32(0); i < nColumns; i++ {
-		column := recordBatch.GetColumn(i)
-		columnName := recordBatch.GetColumnName(i)
+		column := recordBatch.GetColumn(int32(i))
+		columnName := recordBatch.GetColumnName(int32(i))
 		fmt.Printf("  %s: [", columnName)
 		nRows := recordBatch.GetNRows()
 		for j := int64(0); j < nRows; j++ {
diff --git a/c_glib/example/go/read-stream.go b/c_glib/example/go/read-stream.go
index 7bd0764..ed75a96 100644
--- a/c_glib/example/go/read-stream.go
+++ b/c_glib/example/go/read-stream.go
@@ -57,8 +57,8 @@ func PrintColumnValue(column *arrow.Array, i int64) {
 func PrintRecordBatch(recordBatch *arrow.RecordBatch) {
 	nColumns := recordBatch.GetNColumns()
 	for i := uint32(0); i < nColumns; i++ {
-		column := recordBatch.GetColumn(i)
-		columnName := recordBatch.GetColumnName(i)
+		column := recordBatch.GetColumn(int32(i))
+		columnName := recordBatch.GetColumnName(int32(i))
 		fmt.Printf("  %s: [", columnName)
 		nRows := recordBatch.GetNRows()
 		for j := int64(0); j < nRows; j++ {
diff --git a/c_glib/example/go/write-batch.go b/c_glib/example/go/write-batch.go
index 9dbc3c0..f4d03ed 100644
--- a/c_glib/example/go/write-batch.go
+++ b/c_glib/example/go/write-batch.go
@@ -188,7 +188,10 @@ func main() {
 		BuildDoubleArray(),
 	}
 
-	recordBatch := arrow.NewRecordBatch(schema, 4, columns)
+	recordBatch, err := arrow.NewRecordBatch(schema, 4, columns)
+	if err != nil {
+		log.Fatalf("Failed to create record batch #1: %v", err)
+	}
 	_, err = writer.WriteRecordBatch(recordBatch)
 	if err != nil {
 		log.Fatalf("Failed to write record batch #1: %v", err)
@@ -198,7 +201,10 @@ func main() {
 	for i, column := range columns {
 		slicedColumns[i] = column.Slice(1, 3)
 	}
-	recordBatch = arrow.NewRecordBatch(schema, 3, slicedColumns)
+	recordBatch, err = arrow.NewRecordBatch(schema, 3, slicedColumns)
+	if err != nil {
+		log.Fatalf("Failed to create record batch #2: %v", err)
+	}
 	_, err = writer.WriteRecordBatch(recordBatch)
 	if err != nil {
 		log.Fatalf("Failed to write record batch #2: %v", err)
diff --git a/c_glib/example/go/write-stream.go b/c_glib/example/go/write-stream.go
index 244741e..7225156 100644
--- a/c_glib/example/go/write-stream.go
+++ b/c_glib/example/go/write-stream.go
@@ -188,7 +188,10 @@ func main() {
 		BuildDoubleArray(),
 	}
 
-	recordBatch := arrow.NewRecordBatch(schema, 4, columns)
+	recordBatch, err := arrow.NewRecordBatch(schema, 4, columns)
+	if err != nil {
+		log.Fatalf("Failed to create record batch #1: %v", err)
+	}
 	_, err = writer.WriteRecordBatch(recordBatch)
 	if err != nil {
 		log.Fatalf("Failed to write record batch #1: %v", err)
@@ -198,7 +201,10 @@ func main() {
 	for i, column := range columns {
 		slicedColumns[i] = column.Slice(1, 3)
 	}
-	recordBatch = arrow.NewRecordBatch(schema, 3, slicedColumns)
+	recordBatch, err = arrow.NewRecordBatch(schema, 3, slicedColumns)
+	if err != nil {
+		log.Fatalf("Failed to create record batch #2: %v", err)
+	}
 	writer.WriteRecordBatch(recordBatch)
 	_, err = writer.WriteRecordBatch(recordBatch)
 	if err != nil {
diff --git a/c_glib/test/test-record-batch.rb b/c_glib/test/test-record-batch.rb
index 9fd34b7..365922f 100644
--- a/c_glib/test/test-record-batch.rb
+++ b/c_glib/test/test-record-batch.rb
@@ -18,32 +18,53 @@
 class TestTable < Test::Unit::TestCase
   include Helper::Buildable
 
-  def test_new
-    fields = [
-      Arrow::Field.new("visible", Arrow::BooleanDataType.new),
-      Arrow::Field.new("valid", Arrow::BooleanDataType.new),
-    ]
-    schema = Arrow::Schema.new(fields)
-    columns = [
-      build_boolean_array([true]),
-      build_boolean_array([false]),
-    ]
-    record_batch = Arrow::RecordBatch.new(schema, 1, columns)
-    assert_equal(1, record_batch.n_rows)
+  sub_test_case(".new") do
+    def test_valid
+      fields = [
+        Arrow::Field.new("visible", Arrow::BooleanDataType.new),
+        Arrow::Field.new("valid", Arrow::BooleanDataType.new),
+      ]
+      schema = Arrow::Schema.new(fields)
+      columns = [
+        build_boolean_array([true]),
+        build_boolean_array([false]),
+      ]
+      record_batch = Arrow::RecordBatch.new(schema, 1, columns)
+      assert_equal(1, record_batch.n_rows)
+    end
+
+    def test_no_columns
+      fields = [
+        Arrow::Field.new("visible", Arrow::BooleanDataType.new),
+      ]
+      schema = Arrow::Schema.new(fields)
+      message = "[record-batch][new]: " +
+        "Invalid: Number of columns did not match schema"
+      assert_raise(Arrow::Error::Invalid.new(message)) do
+        Arrow::RecordBatch.new(schema, 0, [])
+      end
+    end
   end
 
   sub_test_case("instance methods") do
     def setup
+      @visible_field = Arrow::Field.new("visible", Arrow::BooleanDataType.new)
+      @visible_values = [true, false, true, false, true]
+      @valid_field = Arrow::Field.new("valid", Arrow::BooleanDataType.new)
+      @valid_values = [false, true, false, true, false]
+
       fields = [
-        Arrow::Field.new("visible", Arrow::BooleanDataType.new),
-        Arrow::Field.new("valid", Arrow::BooleanDataType.new),
+        @visible_field,
+        @valid_field,
       ]
       schema = Arrow::Schema.new(fields)
       columns = [
-        build_boolean_array([true, false, true, false, true, false]),
-        build_boolean_array([false, true, false, true, false]),
+        build_boolean_array(@visible_values),
+        build_boolean_array(@valid_values),
       ]
-      @record_batch = Arrow::RecordBatch.new(schema, 5, columns)
+      @record_batch = Arrow::RecordBatch.new(schema,
+                                             @visible_values.size,
+                                             columns)
     end
 
     def test_equal
@@ -53,7 +74,7 @@ class TestTable < Test::Unit::TestCase
       ]
       schema = Arrow::Schema.new(fields)
       columns = [
-        build_boolean_array([true, false, true, false, true, false]),
+        build_boolean_array([true, false, true, false, true]),
         build_boolean_array([false, true, false, true, false]),
       ]
       other_record_batch = Arrow::RecordBatch.new(schema, 5, columns)
@@ -66,12 +87,28 @@ class TestTable < Test::Unit::TestCase
                    @record_batch.schema.fields.collect(&:name))
     end
 
-    def test_column
-      assert_equal(5, @record_batch.get_column(1).length)
+    sub_test_case("#column") do
+      def test_positive
+        assert_equal(build_boolean_array(@valid_values),
+                     @record_batch.get_column(1))
+      end
+
+      def test_negative
+        assert_equal(build_boolean_array(@visible_values),
+                     @record_batch.get_column(-2))
+      end
+
+      def test_positive_out_of_index
+        assert_nil(@record_batch.get_column(2))
+      end
+
+      def test_negative_out_of_index
+        assert_nil(@record_batch.get_column(-3))
+      end
     end
 
     def test_columns
-      assert_equal([6, 5],
+      assert_equal([5, 5],
                    @record_batch.columns.collect(&:length))
     end
 
@@ -94,7 +131,7 @@ class TestTable < Test::Unit::TestCase
 
     def test_to_s
       assert_equal(<<-PRETTY_PRINT, @record_batch.to_s)
-visible: [true, false, true, false, true, false]
+visible: [true, false, true, false, true]
 valid: [false, true, false, true, false]
       PRETTY_PRINT
     end

-- 
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <commits@arrow.apache.org>'].

Mime
View raw message