Repository: parquet-cpp
Updated Branches:
refs/heads/master a80bf0294 -> bf51fc04e
PARQUET-603: Implement missing information in schema descriptor
Author: Deepak Majeti <deepak.majeti@hpe.com>
Closes #97 from majetideepak/LeafToBase and squashes the following commits:
9ded368 [Deepak Majeti] review comments
d80352f [Deepak Majeti] added tests
2a95b67 [Deepak Majeti] Implemented leaf_to_base
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/bf51fc04
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/bf51fc04
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/bf51fc04
Branch: refs/heads/master
Commit: bf51fc04e6cf4683ea4050061c0705dc95cf0927
Parents: a80bf02
Author: Deepak Majeti <deepak.majeti@hpe.com>
Authored: Sun May 8 22:25:44 2016 -0700
Committer: Wes McKinney <wesm@apache.org>
Committed: Sun May 8 22:25:44 2016 -0700
----------------------------------------------------------------------
src/parquet/column/CMakeLists.txt | 1 +
src/parquet/column/properties.h | 1 -
src/parquet/schema/descriptor.cc | 16 ++++++++++++----
src/parquet/schema/descriptor.h | 11 ++++++++---
src/parquet/schema/schema-descriptor-test.cc | 10 +++++++++-
5 files changed, 30 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf51fc04/src/parquet/column/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/column/CMakeLists.txt b/src/parquet/column/CMakeLists.txt
index 4c50c0a..d64be6c 100644
--- a/src/parquet/column/CMakeLists.txt
+++ b/src/parquet/column/CMakeLists.txt
@@ -19,6 +19,7 @@
install(FILES
page.h
levels.h
+ properties.h
reader.h
scanner.h
writer.h
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf51fc04/src/parquet/column/properties.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/properties.h b/src/parquet/column/properties.h
index 40d04c3..132b1a6 100644
--- a/src/parquet/column/properties.h
+++ b/src/parquet/column/properties.h
@@ -23,7 +23,6 @@
#include "parquet/util/input.h"
#include "parquet/util/mem-allocator.h"
-#include "parquet/types.h"
namespace parquet {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf51fc04/src/parquet/schema/descriptor.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/descriptor.cc b/src/parquet/schema/descriptor.cc
index 01f0421..de63e5e 100644
--- a/src/parquet/schema/descriptor.cc
+++ b/src/parquet/schema/descriptor.cc
@@ -18,6 +18,7 @@
#include "parquet/schema/descriptor.h"
#include "parquet/exception.h"
+#include "parquet/util/logging.h"
namespace parquet {
@@ -42,12 +43,12 @@ void SchemaDescriptor::Init(const NodePtr& schema) {
leaves_.clear();
for (int i = 0; i < group_->field_count(); ++i) {
- BuildTree(group_->field(i), 0, 0);
+ BuildTree(group_->field(i), 0, 0, group_->field(i));
}
}
-void SchemaDescriptor::BuildTree(
- const NodePtr& node, int16_t max_def_level, int16_t max_rep_level) {
+void SchemaDescriptor::BuildTree(const NodePtr& node, int16_t max_def_level,
+ int16_t max_rep_level, const NodePtr& base) {
if (node->is_optional()) {
++max_def_level;
} else if (node->is_repeated()) {
@@ -61,11 +62,12 @@ void SchemaDescriptor::BuildTree(
if (node->is_group()) {
const GroupNode* group = static_cast<const GroupNode*>(node.get());
for (int i = 0; i < group->field_count(); ++i) {
- BuildTree(group->field(i), max_def_level, max_rep_level);
+ BuildTree(group->field(i), max_def_level, max_rep_level, base);
}
} else {
// Primitive node, append to leaves
leaves_.push_back(ColumnDescriptor(node, max_def_level, max_rep_level, this));
+ leaf_to_base_.emplace(leaves_.size() - 1, base);
}
}
@@ -81,9 +83,15 @@ ColumnDescriptor::ColumnDescriptor(const schema::NodePtr& node,
}
const ColumnDescriptor* SchemaDescriptor::Column(int i) const {
+ DCHECK(i >= 0 && i < static_cast<int>(leaves_.size()));
return &leaves_[i];
}
+const schema::NodePtr& SchemaDescriptor::GetColumnRoot(int i) const {
+ DCHECK(i >= 0 && i < static_cast<int>(leaves_.size()));
+ return leaf_to_base_.find(i)->second;
+}
+
int ColumnDescriptor::type_scale() const {
return primitive_node_->decimal_metadata().scale;
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf51fc04/src/parquet/schema/descriptor.h
----------------------------------------------------------------------
diff --git a/src/parquet/schema/descriptor.h b/src/parquet/schema/descriptor.h
index 7c04e59..eb6eac6 100644
--- a/src/parquet/schema/descriptor.h
+++ b/src/parquet/schema/descriptor.h
@@ -101,14 +101,19 @@ class SchemaDescriptor {
const schema::NodePtr& schema() const { return schema_; }
+ const schema::GroupNode* group() const { return group_; }
+
+ // Returns the root (child of the schema root) node of the leaf(column) node
+ const schema::NodePtr& GetColumnRoot(int i) const;
+
private:
friend class ColumnDescriptor;
schema::NodePtr schema_;
const schema::GroupNode* group_;
- void BuildTree(
- const schema::NodePtr& node, int16_t max_def_level, int16_t max_rep_level);
+ void BuildTree(const schema::NodePtr& node, int16_t max_def_level,
+ int16_t max_rep_level, const schema::NodePtr& base);
// Result of leaf node / tree analysis
std::vector<ColumnDescriptor> leaves_;
@@ -122,7 +127,7 @@ class SchemaDescriptor {
// -- -- b |
// -- -- -- c |
// -- -- -- -- d
- std::unordered_map<int, schema::NodePtr> leaf_to_base_;
+ std::unordered_map<int, const schema::NodePtr> leaf_to_base_;
};
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/bf51fc04/src/parquet/schema/schema-descriptor-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/schema-descriptor-test.cc b/src/parquet/schema/schema-descriptor-test.cc
index dd552be..d88cd0d 100644
--- a/src/parquet/schema/schema-descriptor-test.cc
+++ b/src/parquet/schema/schema-descriptor-test.cc
@@ -75,7 +75,8 @@ TEST_F(TestSchemaDescriptor, BuildTree) {
NodeVector fields;
NodePtr schema;
- fields.push_back(Int32("a", Repetition::REQUIRED));
+ NodePtr inta = Int32("a", Repetition::REQUIRED);
+ fields.push_back(inta);
fields.push_back(Int64("b", Repetition::OPTIONAL));
fields.push_back(ByteArray("c", Repetition::REPEATED));
@@ -122,6 +123,13 @@ TEST_F(TestSchemaDescriptor, BuildTree) {
ASSERT_EQ(descr_.Column(4)->path()->ToDotString(), "bag.records.item2");
ASSERT_EQ(descr_.Column(5)->path()->ToDotString(), "bag.records.item3");
+ ASSERT_EQ(inta.get(), descr_.GetColumnRoot(0).get());
+ ASSERT_EQ(bag.get(), descr_.GetColumnRoot(3).get());
+ ASSERT_EQ(bag.get(), descr_.GetColumnRoot(4).get());
+ ASSERT_EQ(bag.get(), descr_.GetColumnRoot(5).get());
+
+ ASSERT_EQ(schema.get(), descr_.group());
+
// Init clears the leaves
descr_.Init(schema);
ASSERT_EQ(nleaves, descr_.num_columns());
|