arrow-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [arrow] 09/09: ARROW-1862: [GLib] Add GArrowDictionaryArray
Date Fri, 01 Dec 2017 16:50:49 GMT
This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 1fd3457a77cda15ecf4068f1ced51d0683b519ab
Author: Kouhei Sutou <kou@clear-code.com>
AuthorDate: Wed Nov 29 12:48:39 2017 -0500

    ARROW-1862: [GLib] Add GArrowDictionaryArray
    
    Author: Kouhei Sutou <kou@clear-code.com>
    
    Closes #1365 from kou/glib-dictionary-array and squashes the following commits:
    
    83bfa135 [Kouhei Sutou] [GLib] Add GArrowDictionaryArray
---
 c_glib/arrow-glib/basic-array.cpp         |   3 +
 c_glib/arrow-glib/composite-array.cpp     | 107 ++++++++++++++++++++++++++++++
 c_glib/arrow-glib/composite-array.h       |  21 ++++++
 c_glib/arrow-glib/composite-data-type.cpp |   8 +++
 c_glib/test/test-dictionary-array.rb      |  63 ++++++++++++++++++
 5 files changed, 202 insertions(+)

diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp
index 0698a04..36cf460 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -2091,6 +2091,9 @@ garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array)
   case arrow::Type::type::STRUCT:
     type = GARROW_TYPE_STRUCT_ARRAY;
     break;
+  case arrow::Type::type::DICTIONARY:
+    type = GARROW_TYPE_DICTIONARY_ARRAY;
+    break;
   default:
     type = GARROW_TYPE_ARRAY;
     break;
diff --git a/c_glib/arrow-glib/composite-array.cpp b/c_glib/arrow-glib/composite-array.cpp
index 445103d..14cc46d 100644
--- a/c_glib/arrow-glib/composite-array.cpp
+++ b/c_glib/arrow-glib/composite-array.cpp
@@ -44,6 +44,11 @@ G_BEGIN_DECLS
  * or more structs. One struct has zero or more fields. If you don't
  * have Arrow format data, you need to use #GArrowStructArrayBuilder
  * to create a new array.
+ *
+ * #GArrowDictionaryArray is a class for dictionary array. It can
+ * store data with dictionary and indices. It's space effective than
+ * normal array when the array has many same values. You can convert a
+ * normal array to dictionary array by garrow_array_dictionary_encode().
  */
 
 G_DEFINE_TYPE(GArrowListArray,               \
@@ -234,4 +239,106 @@ garrow_struct_array_get_fields(GArrowStructArray *array)
   return g_list_reverse(fields);
 }
 
+
+G_DEFINE_TYPE(GArrowDictionaryArray,            \
+              garrow_dictionary_array,          \
+              GARROW_TYPE_ARRAY)
+
+static void
+garrow_dictionary_array_init(GArrowDictionaryArray *object)
+{
+}
+
+static void
+garrow_dictionary_array_class_init(GArrowDictionaryArrayClass *klass)
+{
+}
+
+/**
+ * garrow_dictionary_array_new:
+ * @data_type: The data type of dictionary.
+ * @indices: The indices of values in dictionary.
+ *
+ * Returns: A newly created #GArrowDictionaryArray.
+ *
+ * Since: 0.8.0
+ */
+GArrowDictionaryArray *
+garrow_dictionary_array_new(GArrowDataType *data_type,
+                            GArrowArray *indices)
+{
+  const auto arrow_data_type = garrow_data_type_get_raw(data_type);
+  const auto arrow_indices = garrow_array_get_raw(indices);
+  auto arrow_dictionary_array =
+    std::make_shared<arrow::DictionaryArray>(arrow_data_type,
+                                             arrow_indices);
+  auto arrow_array =
+    std::static_pointer_cast<arrow::Array>(arrow_dictionary_array);
+  return GARROW_DICTIONARY_ARRAY(garrow_array_new_raw(&arrow_array));
+}
+
+/**
+ * garrow_dictionary_array_get_indices:
+ * @array: A #GArrowDictionaryArray.
+ *
+ * Returns: (transfer full): The indices of values in dictionary.
+ *
+ * Since: 0.8.0
+ */
+GArrowArray *
+garrow_dictionary_array_get_indices(GArrowDictionaryArray *array)
+{
+  auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
+  auto arrow_dictionary_array =
+    std::static_pointer_cast<arrow::DictionaryArray>(arrow_array);
+  auto arrow_indices = arrow_dictionary_array->indices();
+  return garrow_array_new_raw(&arrow_indices);
+}
+
+/**
+ * garrow_dictionary_array_get_dictionary:
+ * @array: A #GArrowDictionaryArray.
+ *
+ * Returns: (transfer full): The dictionary of this array.
+ *
+ * Since: 0.8.0
+ */
+GArrowArray *
+garrow_dictionary_array_get_dictionary(GArrowDictionaryArray *array)
+{
+  auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
+  auto arrow_dictionary_array =
+    std::static_pointer_cast<arrow::DictionaryArray>(arrow_array);
+  auto arrow_dictionary = arrow_dictionary_array->dictionary();
+  return garrow_array_new_raw(&arrow_dictionary);
+}
+
+/**
+ * garrow_dictionary_array_get_dictionary_data_type:
+ * @array: A #GArrowDictionaryArray.
+ *
+ * Returns: (transfer full): The dictionary data type of this array.
+ *
+ * Since: 0.8.0
+ */
+GArrowDictionaryDataType *
+garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array)
+{
+  auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
+  auto arrow_dictionary_array =
+    std::static_pointer_cast<arrow::DictionaryArray>(arrow_array);
+  auto arrow_dictionary_data_type = arrow_dictionary_array->dict_type();
+  auto const_arrow_data_type =
+    static_cast<const arrow::DataType *>(arrow_dictionary_data_type);
+  auto arrow_data_type = const_cast<arrow::DataType *>(const_arrow_data_type);
+  struct NullDeleter {
+    void operator()(arrow::DataType *data_type) {
+    }
+  };
+  std::shared_ptr<arrow::DataType>
+    shared_arrow_data_type(arrow_data_type, NullDeleter());
+  auto data_type = garrow_data_type_new_raw(&shared_arrow_data_type);
+  return GARROW_DICTIONARY_DATA_TYPE(data_type);
+}
+
 G_END_DECLS
diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h
index ebf9554..c59a616 100644
--- a/c_glib/arrow-glib/composite-array.h
+++ b/c_glib/arrow-glib/composite-array.h
@@ -129,4 +129,25 @@ GArrowArray *garrow_struct_array_get_field(GArrowStructArray *array,
                                            gint i);
 GList *garrow_struct_array_get_fields(GArrowStructArray *array);
 
+
+#define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryArray,
+                         garrow_dictionary_array,
+                         GARROW,
+                         DICTIONARY_ARRAY,
+                         GArrowArray)
+struct _GArrowDictionaryArrayClass
+{
+  GArrowArrayClass parent_class;
+};
+
+GArrowDictionaryArray *
+garrow_dictionary_array_new(GArrowDataType *data_type, GArrowArray *indices);
+GArrowArray *
+garrow_dictionary_array_get_indices(GArrowDictionaryArray *array);
+GArrowArray *
+garrow_dictionary_array_get_dictionary(GArrowDictionaryArray *array);
+GArrowDictionaryDataType *
+garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array);
+
 G_END_DECLS
diff --git a/c_glib/arrow-glib/composite-data-type.cpp b/c_glib/arrow-glib/composite-data-type.cpp
index 7ce8a97..5f742e5 100644
--- a/c_glib/arrow-glib/composite-data-type.cpp
+++ b/c_glib/arrow-glib/composite-data-type.cpp
@@ -158,6 +158,8 @@ garrow_dictionary_data_type_class_init(GArrowDictionaryDataTypeClass *klass)
  * @ordered: Whether dictionary contents are ordered or not.
  *
  * Returns: The newly created dictionary data type.
+ *
+ * Since: 0.8.0
  */
 GArrowDictionaryDataType *
 garrow_dictionary_data_type_new(GArrowDataType *index_data_type,
@@ -177,6 +179,8 @@ garrow_dictionary_data_type_new(GArrowDataType *index_data_type,
  * @data_type: The #GArrowDictionaryDataType.
  *
  * Returns: (transfer full): The #GArrowDataType of index.
+ *
+ * Since: 0.8.0
  */
 GArrowDataType *
 garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *data_type)
@@ -193,6 +197,8 @@ garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType
*data_t
  * @data_type: The #GArrowDictionaryDataType.
  *
  * Returns: (transfer full): The dictionary as #GArrowArray.
+ *
+ * Since: 0.8.0
  */
 GArrowArray *
 garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *data_type)
@@ -209,6 +215,8 @@ garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *data_type)
  * @data_type: The #GArrowDictionaryDataType.
  *
  * Returns: Whether dictionary contents are ordered or not.
+ *
+ * Since: 0.8.0
  */
 gboolean
 garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *data_type)
diff --git a/c_glib/test/test-dictionary-array.rb b/c_glib/test/test-dictionary-array.rb
new file mode 100644
index 0000000..d4f4b34
--- /dev/null
+++ b/c_glib/test/test-dictionary-array.rb
@@ -0,0 +1,63 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDictionaryArray < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    @index_data_type = Arrow::Int32DataType.new
+    @dictionary = build_string_array(["C", "C++", "Ruby"])
+    @ordered = false
+    @data_type = Arrow::DictionaryDataType.new(@index_data_type,
+                                               @dictionary,
+                                               @ordered)
+  end
+
+  sub_test_case(".new") do
+    def test_new
+      indices = build_int32_array([0, 2, 2, 1, 0])
+      dictionary_array = Arrow::DictionaryArray.new(@data_type, indices)
+      assert_equal(<<-STRING.chomp, dictionary_array.to_s)
+
+-- is_valid: all not null
+-- dictionary: ["C", "C++", "Ruby"]
+-- indices: [0, 2, 2, 1, 0]
+      STRING
+    end
+  end
+
+  sub_test_case("instance methods") do
+    def setup
+      super
+      @indices = build_int32_array([0, 2, 2, 1, 0])
+      @dictionary_array = Arrow::DictionaryArray.new(@data_type, @indices)
+    end
+
+    def test_indices
+      assert_equal(@indices, @dictionary_array.indices)
+    end
+
+    def test_dictionary
+      assert_equal(@dictionary, @dictionary_array.dictionary)
+    end
+
+    def test_dictionary_data_type
+      assert_equal(@data_type,
+                   @dictionary_array.dictionary_data_type)
+    end
+  end
+end

-- 
To stop receiving notification emails like this one, please contact
"commits@arrow.apache.org" <commits@arrow.apache.org>.

Mime
View raw message