parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From u..@apache.org
Subject parquet-cpp git commit: PARQUET-842: Do not set unnecessary fields in the parquet::SchemaElement
Date Wed, 25 Jan 2017 07:14:46 GMT
Repository: parquet-cpp
Updated Branches:
  refs/heads/master df59ffcf6 -> aacc8b528


PARQUET-842: Do not set unnecessary fields in the parquet::SchemaElement

The Impala Parquet scanner can reject our data otherwise

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #226 from wesm/PARQUET-842 and squashes the following commits:

0cfa53c [Wes McKinney] Do not set field_id in SchemaElement until we understand why it is
causing parquet-mr problems
4220b48 [Wes McKinney] Do not set unnecessary fields in the parquet::SchemaElement


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/aacc8b52
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/aacc8b52
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/aacc8b52

Branch: refs/heads/master
Commit: aacc8b52816a660f2fb9af525c66c56f845ddf2c
Parents: df59ffc
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Wed Jan 25 08:14:31 2017 +0100
Committer: Uwe L. Korn <uwelk@xhochy.com>
Committed: Wed Jan 25 08:14:31 2017 +0100

----------------------------------------------------------------------
 src/parquet/schema/converter.cc |  2 --
 src/parquet/schema/test-util.h  |  6 ------
 src/parquet/schema/types.cc     | 16 ++++++++--------
 3 files changed, 8 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/aacc8b52/src/parquet/schema/converter.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/converter.cc b/src/parquet/schema/converter.cc
index 08eeb66..3b18af3 100644
--- a/src/parquet/schema/converter.cc
+++ b/src/parquet/schema/converter.cc
@@ -96,8 +96,6 @@ class SchemaVisitor : public Node::ConstVisitor {
   void Visit(const Node* node) override {
     format::SchemaElement element;
     node->ToParquet(&element);
-    // Override field_id here as we can get user-generated Nodes without a valid id
-    element.__set_field_id(elements_->size());
     elements_->push_back(element);
 
     if (node->is_group()) {

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/aacc8b52/src/parquet/schema/test-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/schema/test-util.h b/src/parquet/schema/test-util.h
index ee9e225..752b8f3 100644
--- a/src/parquet/schema/test-util.h
+++ b/src/parquet/schema/test-util.h
@@ -42,11 +42,6 @@ static inline SchemaElement NewPrimitive(const std::string& name,
   result.__set_repetition_type(repetition);
   result.__set_type(type);
   result.__set_num_children(0);
-  result.__set_field_id(id);
-  // Set default (non-set) values
-  result.__set_type_length(-1);
-  result.__set_precision(-1);
-  result.__set_scale(-1);
 
   return result;
 }
@@ -57,7 +52,6 @@ static inline SchemaElement NewGroup(const std::string& name,
   result.__set_name(name);
   result.__set_repetition_type(repetition);
   result.__set_num_children(num_children);
-  result.__set_field_id(id);
 
   return result;
 }

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/aacc8b52/src/parquet/schema/types.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/types.cc b/src/parquet/schema/types.cc
index 043ef00..7d452c3 100644
--- a/src/parquet/schema/types.cc
+++ b/src/parquet/schema/types.cc
@@ -86,16 +86,16 @@ PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type
repetitio
       physical_type_(type),
       type_length_(length) {
   std::stringstream ss;
-  decimal_metadata_.isset = false;
+
+  // PARQUET-842: In an earlier revision, decimal_metadata_.isset was being
+  // set to true, but Impala will raise an incompatible metadata in such cases
+  memset(&decimal_metadata_, 0, sizeof(decimal_metadata_));
+
   // Check if the physical and logical types match
   // Mapping referred from Apache parquet-mr as on 2016-02-22
   switch (logical_type) {
     case LogicalType::NONE:
       // Logical type not set
-      // Clients should be able to read these values
-      decimal_metadata_.isset = true;
-      decimal_metadata_.precision = precision;
-      decimal_metadata_.scale = scale;
       break;
     case LogicalType::UTF8:
     case LogicalType::JSON:
@@ -289,7 +289,6 @@ void GroupNode::ToParquet(void* opaque_element) const {
   if (logical_type_ != LogicalType::NONE) {
     element->__set_converted_type(ToThrift(logical_type_));
   }
-  // FIXME: SchemaFlattener does this for us: element->__set_field_id(id_);
 }
 
 void PrimitiveNode::ToParquet(void* opaque_element) const {
@@ -302,8 +301,9 @@ void PrimitiveNode::ToParquet(void* opaque_element) const {
     element->__set_converted_type(ToThrift(logical_type_));
   }
   element->__set_type(ToThrift(physical_type_));
-  // FIXME: SchemaFlattener does this for us: element->__set_field_id(id_);
-  element->__set_type_length(type_length_);
+  if (physical_type_ == Type::FIXED_LEN_BYTE_ARRAY) {
+    element->__set_type_length(type_length_);
+  }
   if (decimal_metadata_.isset) {
     element->__set_precision(decimal_metadata_.precision);
     element->__set_scale(decimal_metadata_.scale);


Mime
View raw message