parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject parquet-cpp git commit: PARQUET-603: Implement missing information in schema descriptor
Date Mon, 09 May 2016 05:25:51 GMT
Repository: parquet-cpp
Updated Branches:
  refs/heads/master a80bf0294 -> bf51fc04e


PARQUET-603: Implement missing information in schema descriptor

Author: Deepak Majeti <deepak.majeti@hpe.com>

Closes #97 from majetideepak/LeafToBase and squashes the following commits:

9ded368 [Deepak Majeti] review comments
d80352f [Deepak Majeti] added tests
2a95b67 [Deepak Majeti] Implemented leaf_to_base


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/bf51fc04
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/bf51fc04
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/bf51fc04

Branch: refs/heads/master
Commit: bf51fc04e6cf4683ea4050061c0705dc95cf0927
Parents: a80bf02
Author: Deepak Majeti <deepak.majeti@hpe.com>
Authored: Sun May 8 22:25:44 2016 -0700
Committer: Wes McKinney <wesm@apache.org>
Committed: Sun May 8 22:25:44 2016 -0700

----------------------------------------------------------------------
 src/parquet/column/CMakeLists.txt            |  1 +
 src/parquet/column/properties.h              |  1 -
 src/parquet/schema/descriptor.cc             | 16 ++++++++++++----
 src/parquet/schema/descriptor.h              | 11 ++++++++---
 src/parquet/schema/schema-descriptor-test.cc | 10 +++++++++-
 5 files changed, 30 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf51fc04/src/parquet/column/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/column/CMakeLists.txt b/src/parquet/column/CMakeLists.txt
index 4c50c0a..d64be6c 100644
--- a/src/parquet/column/CMakeLists.txt
+++ b/src/parquet/column/CMakeLists.txt
@@ -19,6 +19,7 @@
 install(FILES
   page.h
   levels.h
+  properties.h
   reader.h
   scanner.h
   writer.h

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf51fc04/src/parquet/column/properties.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/properties.h b/src/parquet/column/properties.h
index 40d04c3..132b1a6 100644
--- a/src/parquet/column/properties.h
+++ b/src/parquet/column/properties.h
@@ -23,7 +23,6 @@
 
 #include "parquet/util/input.h"
 #include "parquet/util/mem-allocator.h"
-#include "parquet/types.h"
 
 namespace parquet {
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf51fc04/src/parquet/schema/descriptor.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/descriptor.cc b/src/parquet/schema/descriptor.cc
index 01f0421..de63e5e 100644
--- a/src/parquet/schema/descriptor.cc
+++ b/src/parquet/schema/descriptor.cc
@@ -18,6 +18,7 @@
 #include "parquet/schema/descriptor.h"
 
 #include "parquet/exception.h"
+#include "parquet/util/logging.h"
 
 namespace parquet {
 
@@ -42,12 +43,12 @@ void SchemaDescriptor::Init(const NodePtr& schema) {
   leaves_.clear();
 
   for (int i = 0; i < group_->field_count(); ++i) {
-    BuildTree(group_->field(i), 0, 0);
+    BuildTree(group_->field(i), 0, 0, group_->field(i));
   }
 }
 
-void SchemaDescriptor::BuildTree(
-    const NodePtr& node, int16_t max_def_level, int16_t max_rep_level) {
+void SchemaDescriptor::BuildTree(const NodePtr& node, int16_t max_def_level,
+    int16_t max_rep_level, const NodePtr& base) {
   if (node->is_optional()) {
     ++max_def_level;
   } else if (node->is_repeated()) {
@@ -61,11 +62,12 @@ void SchemaDescriptor::BuildTree(
   if (node->is_group()) {
     const GroupNode* group = static_cast<const GroupNode*>(node.get());
     for (int i = 0; i < group->field_count(); ++i) {
-      BuildTree(group->field(i), max_def_level, max_rep_level);
+      BuildTree(group->field(i), max_def_level, max_rep_level, base);
     }
   } else {
     // Primitive node, append to leaves
     leaves_.push_back(ColumnDescriptor(node, max_def_level, max_rep_level, this));
+    leaf_to_base_.emplace(leaves_.size() - 1, base);
   }
 }
 
@@ -81,9 +83,15 @@ ColumnDescriptor::ColumnDescriptor(const schema::NodePtr& node,
 }
 
 const ColumnDescriptor* SchemaDescriptor::Column(int i) const {
+  DCHECK(i >= 0 && i < static_cast<int>(leaves_.size()));
   return &leaves_[i];
 }
 
+const schema::NodePtr& SchemaDescriptor::GetColumnRoot(int i) const {
+  DCHECK(i >= 0 && i < static_cast<int>(leaves_.size()));
+  return leaf_to_base_.find(i)->second;
+}
+
 int ColumnDescriptor::type_scale() const {
   return primitive_node_->decimal_metadata().scale;
 }

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf51fc04/src/parquet/schema/descriptor.h
----------------------------------------------------------------------
diff --git a/src/parquet/schema/descriptor.h b/src/parquet/schema/descriptor.h
index 7c04e59..eb6eac6 100644
--- a/src/parquet/schema/descriptor.h
+++ b/src/parquet/schema/descriptor.h
@@ -101,14 +101,19 @@ class SchemaDescriptor {
 
   const schema::NodePtr& schema() const { return schema_; }
 
+  const schema::GroupNode* group() const { return group_; }
+
+  // Returns the root (child of the schema root) node of the leaf(column) node
+  const schema::NodePtr& GetColumnRoot(int i) const;
+
  private:
   friend class ColumnDescriptor;
 
   schema::NodePtr schema_;
   const schema::GroupNode* group_;
 
-  void BuildTree(
-      const schema::NodePtr& node, int16_t max_def_level, int16_t max_rep_level);
+  void BuildTree(const schema::NodePtr& node, int16_t max_def_level,
+      int16_t max_rep_level, const schema::NodePtr& base);
 
   // Result of leaf node / tree analysis
   std::vector<ColumnDescriptor> leaves_;
@@ -122,7 +127,7 @@ class SchemaDescriptor {
   // -- -- b     |
   // -- -- -- c  |
   // -- -- -- -- d
-  std::unordered_map<int, schema::NodePtr> leaf_to_base_;
+  std::unordered_map<int, const schema::NodePtr> leaf_to_base_;
 };
 
 }  // namespace parquet

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf51fc04/src/parquet/schema/schema-descriptor-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/schema-descriptor-test.cc b/src/parquet/schema/schema-descriptor-test.cc
index dd552be..d88cd0d 100644
--- a/src/parquet/schema/schema-descriptor-test.cc
+++ b/src/parquet/schema/schema-descriptor-test.cc
@@ -75,7 +75,8 @@ TEST_F(TestSchemaDescriptor, BuildTree) {
   NodeVector fields;
   NodePtr schema;
 
-  fields.push_back(Int32("a", Repetition::REQUIRED));
+  NodePtr inta = Int32("a", Repetition::REQUIRED);
+  fields.push_back(inta);
   fields.push_back(Int64("b", Repetition::OPTIONAL));
   fields.push_back(ByteArray("c", Repetition::REPEATED));
 
@@ -122,6 +123,13 @@ TEST_F(TestSchemaDescriptor, BuildTree) {
   ASSERT_EQ(descr_.Column(4)->path()->ToDotString(), "bag.records.item2");
   ASSERT_EQ(descr_.Column(5)->path()->ToDotString(), "bag.records.item3");
 
+  ASSERT_EQ(inta.get(), descr_.GetColumnRoot(0).get());
+  ASSERT_EQ(bag.get(), descr_.GetColumnRoot(3).get());
+  ASSERT_EQ(bag.get(), descr_.GetColumnRoot(4).get());
+  ASSERT_EQ(bag.get(), descr_.GetColumnRoot(5).get());
+
+  ASSERT_EQ(schema.get(), descr_.group());
+
   // Init clears the leaves
   descr_.Init(schema);
   ASSERT_EQ(nleaves, descr_.num_columns());


Mime
View raw message