parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [1/3] parquet-cpp git commit: PARQUET-844: Schema, compression consolidation / flattening
Date Thu, 26 Jan 2017 17:02:50 GMT
Repository: parquet-cpp
Updated Branches:
  refs/heads/master 257e65b81 -> 13da51d3f


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/13da51d3/src/parquet/schema/schema-types-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/schema-types-test.cc b/src/parquet/schema/schema-types-test.cc
deleted file mode 100644
index 37c8b14..0000000
--- a/src/parquet/schema/schema-types-test.cc
+++ /dev/null
@@ -1,311 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "parquet/exception.h"
-#include "parquet/schema/test-util.h"
-#include "parquet/schema/types.h"
-#include "parquet/thrift/parquet_types.h"
-#include "parquet/types.h"
-
-using std::string;
-using std::vector;
-
-namespace parquet {
-
-namespace schema {
-
-// ----------------------------------------------------------------------
-// ColumnPath
-
-TEST(TestColumnPath, TestAttrs) {
-  ColumnPath path(std::vector<std::string>({"toplevel", "leaf"}));
-
-  ASSERT_EQ(path.ToDotString(), "toplevel.leaf");
-
-  std::shared_ptr<ColumnPath> path_ptr = ColumnPath::FromDotString("toplevel.leaf");
-  ASSERT_EQ(path_ptr->ToDotString(), "toplevel.leaf");
-
-  std::shared_ptr<ColumnPath> extended = path_ptr->extend("anotherlevel");
-  ASSERT_EQ(extended->ToDotString(), "toplevel.leaf.anotherlevel");
-}
-
-// ----------------------------------------------------------------------
-// Primitive node
-
-class TestPrimitiveNode : public ::testing::Test {
- public:
-  void SetUp() {
-    name_ = "name";
-    id_ = 5;
-  }
-
-  void Convert(const format::SchemaElement* element) {
-    node_ = PrimitiveNode::FromParquet(element, id_);
-    ASSERT_TRUE(node_->is_primitive());
-    prim_node_ = static_cast<const PrimitiveNode*>(node_.get());
-  }
-
- protected:
-  std::string name_;
-  const PrimitiveNode* prim_node_;
-
-  int id_;
-  std::unique_ptr<Node> node_;
-};
-
-TEST_F(TestPrimitiveNode, Attrs) {
-  PrimitiveNode node1("foo", Repetition::REPEATED, Type::INT32);
-
-  PrimitiveNode node2("bar", Repetition::OPTIONAL, Type::BYTE_ARRAY, LogicalType::UTF8);
-
-  ASSERT_EQ("foo", node1.name());
-
-  ASSERT_TRUE(node1.is_primitive());
-  ASSERT_FALSE(node1.is_group());
-
-  ASSERT_EQ(Repetition::REPEATED, node1.repetition());
-  ASSERT_EQ(Repetition::OPTIONAL, node2.repetition());
-
-  ASSERT_EQ(Node::PRIMITIVE, node1.node_type());
-
-  ASSERT_EQ(Type::INT32, node1.physical_type());
-  ASSERT_EQ(Type::BYTE_ARRAY, node2.physical_type());
-
-  // logical types
-  ASSERT_EQ(LogicalType::NONE, node1.logical_type());
-  ASSERT_EQ(LogicalType::UTF8, node2.logical_type());
-
-  // repetition
-  node1 = PrimitiveNode("foo", Repetition::REQUIRED, Type::INT32);
-  node2 = PrimitiveNode("foo", Repetition::OPTIONAL, Type::INT32);
-  PrimitiveNode node3("foo", Repetition::REPEATED, Type::INT32);
-
-  ASSERT_TRUE(node1.is_required());
-
-  ASSERT_TRUE(node2.is_optional());
-  ASSERT_FALSE(node2.is_required());
-
-  ASSERT_TRUE(node3.is_repeated());
-  ASSERT_FALSE(node3.is_optional());
-}
-
-TEST_F(TestPrimitiveNode, FromParquet) {
-  SchemaElement elt =
-      NewPrimitive(name_, FieldRepetitionType::OPTIONAL, format::Type::INT32, 0);
-  Convert(&elt);
-  ASSERT_EQ(name_, prim_node_->name());
-  ASSERT_EQ(id_, prim_node_->id());
-  ASSERT_EQ(Repetition::OPTIONAL, prim_node_->repetition());
-  ASSERT_EQ(Type::INT32, prim_node_->physical_type());
-  ASSERT_EQ(LogicalType::NONE, prim_node_->logical_type());
-
-  // Test a logical type
-  elt = NewPrimitive(name_, FieldRepetitionType::REQUIRED, format::Type::BYTE_ARRAY, 0);
-  elt.__set_converted_type(ConvertedType::UTF8);
-
-  Convert(&elt);
-  ASSERT_EQ(Repetition::REQUIRED, prim_node_->repetition());
-  ASSERT_EQ(Type::BYTE_ARRAY, prim_node_->physical_type());
-  ASSERT_EQ(LogicalType::UTF8, prim_node_->logical_type());
-
-  // FIXED_LEN_BYTE_ARRAY
-  elt = NewPrimitive(
-      name_, FieldRepetitionType::OPTIONAL, format::Type::FIXED_LEN_BYTE_ARRAY, 0);
-  elt.__set_type_length(16);
-
-  Convert(&elt);
-  ASSERT_EQ(name_, prim_node_->name());
-  ASSERT_EQ(id_, prim_node_->id());
-  ASSERT_EQ(Repetition::OPTIONAL, prim_node_->repetition());
-  ASSERT_EQ(Type::FIXED_LEN_BYTE_ARRAY, prim_node_->physical_type());
-  ASSERT_EQ(16, prim_node_->type_length());
-
-  // ConvertedType::Decimal
-  elt = NewPrimitive(
-      name_, FieldRepetitionType::OPTIONAL, format::Type::FIXED_LEN_BYTE_ARRAY, 0);
-  elt.__set_converted_type(ConvertedType::DECIMAL);
-  elt.__set_type_length(6);
-  elt.__set_scale(2);
-  elt.__set_precision(12);
-
-  Convert(&elt);
-  ASSERT_EQ(Type::FIXED_LEN_BYTE_ARRAY, prim_node_->physical_type());
-  ASSERT_EQ(LogicalType::DECIMAL, prim_node_->logical_type());
-  ASSERT_EQ(6, prim_node_->type_length());
-  ASSERT_EQ(2, prim_node_->decimal_metadata().scale);
-  ASSERT_EQ(12, prim_node_->decimal_metadata().precision);
-}
-
-TEST_F(TestPrimitiveNode, Equals) {
-  PrimitiveNode node1("foo", Repetition::REQUIRED, Type::INT32);
-  PrimitiveNode node2("foo", Repetition::REQUIRED, Type::INT64);
-  PrimitiveNode node3("bar", Repetition::REQUIRED, Type::INT32);
-  PrimitiveNode node4("foo", Repetition::OPTIONAL, Type::INT32);
-  PrimitiveNode node5("foo", Repetition::REQUIRED, Type::INT32);
-
-  ASSERT_TRUE(node1.Equals(&node1));
-  ASSERT_FALSE(node1.Equals(&node2));
-  ASSERT_FALSE(node1.Equals(&node3));
-  ASSERT_FALSE(node1.Equals(&node4));
-  ASSERT_TRUE(node1.Equals(&node5));
-
-  PrimitiveNode flba1("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
-      LogicalType::DECIMAL, 12, 4, 2);
-
-  PrimitiveNode flba2("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
-      LogicalType::DECIMAL, 1, 4, 2);
-  flba2.SetTypeLength(12);
-
-  PrimitiveNode flba3("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
-      LogicalType::DECIMAL, 1, 4, 2);
-  flba3.SetTypeLength(16);
-
-  PrimitiveNode flba4("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
-      LogicalType::DECIMAL, 12, 4, 0);
-
-  PrimitiveNode flba5("foo", Repetition::REQUIRED, Type::FIXED_LEN_BYTE_ARRAY,
-      LogicalType::NONE, 12, 4, 0);
-
-  ASSERT_TRUE(flba1.Equals(&flba2));
-  ASSERT_FALSE(flba1.Equals(&flba3));
-  ASSERT_FALSE(flba1.Equals(&flba4));
-  ASSERT_FALSE(flba1.Equals(&flba5));
-}
-
-TEST_F(TestPrimitiveNode, PhysicalLogicalMapping) {
-  ASSERT_NO_THROW(
-      PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32, LogicalType::INT_32));
-  ASSERT_NO_THROW(PrimitiveNode::Make(
-      "foo", Repetition::REQUIRED, Type::BYTE_ARRAY, LogicalType::JSON));
-  ASSERT_THROW(
-      PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32, LogicalType::JSON),
-      ParquetException);
-  ASSERT_NO_THROW(PrimitiveNode::Make(
-      "foo", Repetition::REQUIRED, Type::INT64, LogicalType::TIMESTAMP_MILLIS));
-  ASSERT_THROW(
-      PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::INT32, LogicalType::INT_64),
-      ParquetException);
-  ASSERT_THROW(PrimitiveNode::Make(
-                   "foo", Repetition::REQUIRED, Type::BYTE_ARRAY, LogicalType::INT_8),
-      ParquetException);
-  ASSERT_THROW(PrimitiveNode::Make(
-                   "foo", Repetition::REQUIRED, Type::BYTE_ARRAY, LogicalType::INTERVAL),
-      ParquetException);
-  ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
-                   Type::FIXED_LEN_BYTE_ARRAY, LogicalType::ENUM),
-      ParquetException);
-  ASSERT_NO_THROW(PrimitiveNode::Make(
-      "foo", Repetition::REQUIRED, Type::BYTE_ARRAY, LogicalType::ENUM));
-  ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
-                   Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 0, 2, 4),
-      ParquetException);
-  ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED, Type::FLOAT,
-                   LogicalType::DECIMAL, 0, 2, 4),
-      ParquetException);
-  ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
-                   Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 0, 4, 0),
-      ParquetException);
-  ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
-                   Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 10, 0, 4),
-      ParquetException);
-  ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
-                   Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 10, 4, -1),
-      ParquetException);
-  ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
-                   Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 10, 2, 4),
-      ParquetException);
-  ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
-      Type::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 10, 6, 4));
-  ASSERT_NO_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
-      Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL, 12));
-  ASSERT_THROW(PrimitiveNode::Make("foo", Repetition::REQUIRED,
-                   Type::FIXED_LEN_BYTE_ARRAY, LogicalType::INTERVAL, 10),
-      ParquetException);
-}
-
-// ----------------------------------------------------------------------
-// Group node
-
-class TestGroupNode : public ::testing::Test {
- public:
-  NodeVector Fields1() {
-    NodeVector fields;
-
-    fields.push_back(Int32("one", Repetition::REQUIRED));
-    fields.push_back(Int64("two"));
-    fields.push_back(Double("three"));
-
-    return fields;
-  }
-};
-
-TEST_F(TestGroupNode, Attrs) {
-  NodeVector fields = Fields1();
-
-  GroupNode node1("foo", Repetition::REPEATED, fields);
-  GroupNode node2("bar", Repetition::OPTIONAL, fields, LogicalType::LIST);
-
-  ASSERT_EQ("foo", node1.name());
-
-  ASSERT_TRUE(node1.is_group());
-  ASSERT_FALSE(node1.is_primitive());
-
-  ASSERT_EQ(fields.size(), node1.field_count());
-
-  ASSERT_TRUE(node1.is_repeated());
-  ASSERT_TRUE(node2.is_optional());
-
-  ASSERT_EQ(Repetition::REPEATED, node1.repetition());
-  ASSERT_EQ(Repetition::OPTIONAL, node2.repetition());
-
-  ASSERT_EQ(Node::GROUP, node1.node_type());
-
-  // logical types
-  ASSERT_EQ(LogicalType::NONE, node1.logical_type());
-  ASSERT_EQ(LogicalType::LIST, node2.logical_type());
-}
-
-TEST_F(TestGroupNode, Equals) {
-  NodeVector f1 = Fields1();
-  NodeVector f2 = Fields1();
-
-  GroupNode group1("group", Repetition::REPEATED, f1);
-  GroupNode group2("group", Repetition::REPEATED, f2);
-  GroupNode group3("group2", Repetition::REPEATED, f2);
-
-  // This is copied in the GroupNode ctor, so this is okay
-  f2.push_back(Float("four", Repetition::OPTIONAL));
-  GroupNode group4("group", Repetition::REPEATED, f2);
-  GroupNode group5("group", Repetition::REPEATED, Fields1());
-
-  ASSERT_TRUE(group1.Equals(&group1));
-  ASSERT_TRUE(group1.Equals(&group2));
-  ASSERT_FALSE(group1.Equals(&group3));
-
-  ASSERT_FALSE(group1.Equals(&group4));
-  ASSERT_FALSE(group5.Equals(&group4));
-}
-
-}  // namespace schema
-
-}  // namespace parquet

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/13da51d3/src/parquet/schema/test-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/schema/test-util.h b/src/parquet/schema/test-util.h
deleted file mode 100644
index 752b8f3..0000000
--- a/src/parquet/schema/test-util.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// This module defines an abstract interface for iterating through pages in a
-// Parquet column chunk within a row group. It could be extended in the future
-// to iterate through all data pages in all chunks in a file.
-
-#ifndef PARQUET_SCHEMA_TEST_UTIL_H
-#define PARQUET_SCHEMA_TEST_UTIL_H
-
-#include <string>
-
-#include "parquet/schema/types.h"
-#include "parquet/thrift/parquet_types.h"
-
-using parquet::format::ConvertedType;
-using parquet::format::FieldRepetitionType;
-using parquet::format::SchemaElement;
-
-namespace parquet {
-
-namespace schema {
-
-static inline SchemaElement NewPrimitive(const std::string& name,
-    FieldRepetitionType::type repetition, format::Type::type type, int id = 0) {
-  SchemaElement result;
-  result.__set_name(name);
-  result.__set_repetition_type(repetition);
-  result.__set_type(type);
-  result.__set_num_children(0);
-
-  return result;
-}
-
-static inline SchemaElement NewGroup(const std::string& name,
-    FieldRepetitionType::type repetition, int num_children, int id = 0) {
-  SchemaElement result;
-  result.__set_name(name);
-  result.__set_repetition_type(repetition);
-  result.__set_num_children(num_children);
-
-  return result;
-}
-
-}  // namespace schema
-
-}  // namespace parquet
-
-#endif  // PARQUET_COLUMN_TEST_UTIL_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/13da51d3/src/parquet/schema/types.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/types.cc b/src/parquet/schema/types.cc
deleted file mode 100644
index 7d452c3..0000000
--- a/src/parquet/schema/types.cc
+++ /dev/null
@@ -1,315 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "parquet/schema/types.h"
-
-#include <algorithm>
-#include <memory>
-
-#include "parquet/exception.h"
-#include "parquet/thrift/parquet_types.h"
-#include "parquet/thrift/util.h"
-
-namespace parquet {
-
-namespace schema {
-
-// ----------------------------------------------------------------------
-// ColumnPath
-
-std::shared_ptr<ColumnPath> ColumnPath::FromDotString(const std::string& dotstring)
{
-  std::stringstream ss(dotstring);
-  std::string item;
-  std::vector<std::string> path;
-  while (std::getline(ss, item, '.')) {
-    path.push_back(item);
-  }
-  return std::shared_ptr<ColumnPath>(new ColumnPath(std::move(path)));
-}
-
-std::shared_ptr<ColumnPath> ColumnPath::extend(const std::string& node_name) const
{
-  std::vector<std::string> path;
-  path.reserve(path_.size() + 1);
-  path.resize(path_.size() + 1);
-  std::copy(path_.cbegin(), path_.cend(), path.begin());
-  path[path_.size()] = node_name;
-
-  return std::shared_ptr<ColumnPath>(new ColumnPath(std::move(path)));
-}
-
-std::string ColumnPath::ToDotString() const {
-  std::stringstream ss;
-  for (auto it = path_.cbegin(); it != path_.cend(); ++it) {
-    if (it != path_.cbegin()) { ss << "."; }
-    ss << *it;
-  }
-  return ss.str();
-}
-
-const std::vector<std::string>& ColumnPath::ToDotVector() const {
-  return path_;
-}
-
-// ----------------------------------------------------------------------
-// Base node
-
-bool Node::EqualsInternal(const Node* other) const {
-  return type_ == other->type_ && name_ == other->name_ &&
-         repetition_ == other->repetition_ && logical_type_ == other->logical_type_;
-}
-
-void Node::SetParent(const Node* parent) {
-  parent_ = parent;
-}
-
-// ----------------------------------------------------------------------
-// Primitive node
-
-PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetition,
-    Type::type type, LogicalType::type logical_type, int length, int precision, int scale,
-    int id)
-    : Node(Node::PRIMITIVE, name, repetition, logical_type, id),
-      physical_type_(type),
-      type_length_(length) {
-  std::stringstream ss;
-
-  // PARQUET-842: In an earlier revision, decimal_metadata_.isset was being
-  // set to true, but Impala will raise an incompatible metadata in such cases
-  memset(&decimal_metadata_, 0, sizeof(decimal_metadata_));
-
-  // Check if the physical and logical types match
-  // Mapping referred from Apache parquet-mr as on 2016-02-22
-  switch (logical_type) {
-    case LogicalType::NONE:
-      // Logical type not set
-      break;
-    case LogicalType::UTF8:
-    case LogicalType::JSON:
-    case LogicalType::BSON:
-      if (type != Type::BYTE_ARRAY) {
-        ss << LogicalTypeToString(logical_type);
-        ss << " can only annotate BYTE_ARRAY fields";
-        throw ParquetException(ss.str());
-      }
-      break;
-    case LogicalType::DECIMAL:
-      if ((type != Type::INT32) && (type != Type::INT64) && (type != Type::BYTE_ARRAY)
&&
-          (type != Type::FIXED_LEN_BYTE_ARRAY)) {
-        ss << "DECIMAL can only annotate INT32, INT64, BYTE_ARRAY, and FIXED";
-        throw ParquetException(ss.str());
-      }
-      if (precision <= 0) {
-        ss << "Invalid DECIMAL precision: " << precision;
-        throw ParquetException(ss.str());
-      }
-      if (scale < 0) {
-        ss << "Invalid DECIMAL scale: " << scale;
-        throw ParquetException(ss.str());
-      }
-      if (scale > precision) {
-        ss << "Invalid DECIMAL scale " << scale;
-        ss << " cannot be greater than precision " << precision;
-        throw ParquetException(ss.str());
-      }
-      decimal_metadata_.isset = true;
-      decimal_metadata_.precision = precision;
-      decimal_metadata_.scale = scale;
-      break;
-    case LogicalType::DATE:
-    case LogicalType::TIME_MILLIS:
-    case LogicalType::UINT_8:
-    case LogicalType::UINT_16:
-    case LogicalType::UINT_32:
-    case LogicalType::INT_8:
-    case LogicalType::INT_16:
-    case LogicalType::INT_32:
-      if (type != Type::INT32) {
-        ss << LogicalTypeToString(logical_type);
-        ss << " can only annotate INT32";
-        throw ParquetException(ss.str());
-      }
-      break;
-    case LogicalType::TIME_MICROS:
-    case LogicalType::TIMESTAMP_MILLIS:
-    case LogicalType::TIMESTAMP_MICROS:
-    case LogicalType::UINT_64:
-    case LogicalType::INT_64:
-      if (type != Type::INT64) {
-        ss << LogicalTypeToString(logical_type);
-        ss << " can only annotate INT64";
-        throw ParquetException(ss.str());
-      }
-      break;
-    case LogicalType::INTERVAL:
-      if ((type != Type::FIXED_LEN_BYTE_ARRAY) || (length != 12)) {
-        ss << "INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)";
-        throw ParquetException(ss.str());
-      }
-      break;
-    case LogicalType::ENUM:
-      if (type != Type::BYTE_ARRAY) {
-        ss << "ENUM can only annotate BYTE_ARRAY fields";
-        throw ParquetException(ss.str());
-      }
-      break;
-    default:
-      ss << LogicalTypeToString(logical_type);
-      ss << " can not be applied to a primitive type";
-      throw ParquetException(ss.str());
-  }
-  if (type == Type::FIXED_LEN_BYTE_ARRAY) {
-    if (length <= 0) {
-      ss << "Invalid FIXED_LEN_BYTE_ARRAY length: " << length;
-      throw ParquetException(ss.str());
-    }
-    type_length_ = length;
-  }
-}
-
-bool PrimitiveNode::EqualsInternal(const PrimitiveNode* other) const {
-  bool is_equal = true;
-  if ((physical_type_ != other->physical_type_) ||
-      (logical_type_ != other->logical_type_)) {
-    return false;
-  }
-  if (logical_type_ == LogicalType::DECIMAL) {
-    is_equal &= (decimal_metadata_.precision == other->decimal_metadata_.precision)
&&
-                (decimal_metadata_.scale == other->decimal_metadata_.scale);
-  }
-  if (physical_type_ == Type::FIXED_LEN_BYTE_ARRAY) {
-    is_equal &= (type_length_ == other->type_length_);
-  }
-  return is_equal;
-}
-
-bool PrimitiveNode::Equals(const Node* other) const {
-  if (!Node::EqualsInternal(other)) { return false; }
-  return EqualsInternal(static_cast<const PrimitiveNode*>(other));
-}
-
-void PrimitiveNode::Visit(Node::Visitor* visitor) {
-  visitor->Visit(this);
-}
-
-void PrimitiveNode::VisitConst(Node::ConstVisitor* visitor) const {
-  visitor->Visit(this);
-}
-
-// ----------------------------------------------------------------------
-// Group node
-
-bool GroupNode::EqualsInternal(const GroupNode* other) const {
-  if (this == other) { return true; }
-  if (this->field_count() != other->field_count()) { return false; }
-  for (int i = 0; i < this->field_count(); ++i) {
-    if (!this->field(i)->Equals(other->field(i).get())) { return false; }
-  }
-  return true;
-}
-
-bool GroupNode::Equals(const Node* other) const {
-  if (!Node::EqualsInternal(other)) { return false; }
-  return EqualsInternal(static_cast<const GroupNode*>(other));
-}
-
-void GroupNode::Visit(Node::Visitor* visitor) {
-  visitor->Visit(this);
-}
-
-void GroupNode::VisitConst(Node::ConstVisitor* visitor) const {
-  visitor->Visit(this);
-}
-
-// ----------------------------------------------------------------------
-// Node construction from Parquet metadata
-
-struct NodeParams {
-  explicit NodeParams(const std::string& name) : name(name) {}
-
-  const std::string& name;
-  Repetition::type repetition;
-  LogicalType::type logical_type;
-};
-
-static inline NodeParams GetNodeParams(const format::SchemaElement* element) {
-  NodeParams params(element->name);
-
-  params.repetition = FromThrift(element->repetition_type);
-  if (element->__isset.converted_type) {
-    params.logical_type = FromThrift(element->converted_type);
-  } else {
-    params.logical_type = LogicalType::NONE;
-  }
-  return params;
-}
-
-std::unique_ptr<Node> GroupNode::FromParquet(
-    const void* opaque_element, int node_id, const NodeVector& fields) {
-  const format::SchemaElement* element =
-      static_cast<const format::SchemaElement*>(opaque_element);
-  NodeParams params = GetNodeParams(element);
-  return std::unique_ptr<Node>(new GroupNode(
-      params.name, params.repetition, fields, params.logical_type, node_id));
-}
-
-std::unique_ptr<Node> PrimitiveNode::FromParquet(
-    const void* opaque_element, int node_id) {
-  const format::SchemaElement* element =
-      static_cast<const format::SchemaElement*>(opaque_element);
-  NodeParams params = GetNodeParams(element);
-
-  std::unique_ptr<PrimitiveNode> result =
-      std::unique_ptr<PrimitiveNode>(new PrimitiveNode(params.name, params.repetition,
-          FromThrift(element->type), params.logical_type, element->type_length,
-          element->precision, element->scale, node_id));
-
-  // Return as unique_ptr to the base type
-  return std::unique_ptr<Node>(result.release());
-}
-
-void GroupNode::ToParquet(void* opaque_element) const {
-  format::SchemaElement* element = static_cast<format::SchemaElement*>(opaque_element);
-  element->__set_name(name_);
-  element->__set_num_children(field_count());
-  element->__set_repetition_type(ToThrift(repetition_));
-  if (logical_type_ != LogicalType::NONE) {
-    element->__set_converted_type(ToThrift(logical_type_));
-  }
-}
-
-void PrimitiveNode::ToParquet(void* opaque_element) const {
-  format::SchemaElement* element = static_cast<format::SchemaElement*>(opaque_element);
-
-  element->__set_name(name_);
-  element->__set_num_children(0);
-  element->__set_repetition_type(ToThrift(repetition_));
-  if (logical_type_ != LogicalType::NONE) {
-    element->__set_converted_type(ToThrift(logical_type_));
-  }
-  element->__set_type(ToThrift(physical_type_));
-  if (physical_type_ == Type::FIXED_LEN_BYTE_ARRAY) {
-    element->__set_type_length(type_length_);
-  }
-  if (decimal_metadata_.isset) {
-    element->__set_precision(decimal_metadata_.precision);
-    element->__set_scale(decimal_metadata_.scale);
-  }
-}
-
-}  // namespace schema
-
-}  // namespace parquet

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/13da51d3/src/parquet/schema/types.h
----------------------------------------------------------------------
diff --git a/src/parquet/schema/types.h b/src/parquet/schema/types.h
deleted file mode 100644
index f315480..0000000
--- a/src/parquet/schema/types.h
+++ /dev/null
@@ -1,292 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-// This module contains the logical parquet-cpp types (independent of Thrift
-// structures), schema nodes, and related type tools
-
-#ifndef PARQUET_SCHEMA_TYPES_H
-#define PARQUET_SCHEMA_TYPES_H
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "parquet/types.h"
-#include "parquet/util/macros.h"
-#include "parquet/util/visibility.h"
-
-namespace parquet {
-namespace schema {
-
-// List encodings: using the terminology from Impala to define different styles
-// of representing logical lists (a.k.a. ARRAY types) in Parquet schemas. Since
-// the converted type named in the Parquet metadata is ConvertedType::LIST we
-// use that terminology here. It also helps distinguish from the *_ARRAY
-// primitive types.
-//
-// One-level encoding: Only allows required lists with required cells
-//   repeated value_type name
-//
-// Two-level encoding: Enables optional lists with only required cells
-//   <required/optional> group list
-//     repeated value_type item
-//
-// Three-level encoding: Enables optional lists with optional cells
-//   <required/optional> group bag
-//     repeated group list
-//       <required/optional> value_type item
-//
-// 2- and 1-level encoding are respectively equivalent to 3-level encoding with
-// the non-repeated nodes set to required.
-//
-// The "official" encoding recommended in the Parquet spec is the 3-level, and
-// we use that as the default when creating list types. For semantic completeness
-// we allow the other two. Since all types of encodings will occur "in the
-// wild" we need to be able to interpret the associated definition levels in
-// the context of the actual encoding used in the file.
-//
-// NB: Some Parquet writers may not set ConvertedType::LIST on the repeated
-// SchemaElement, which could make things challenging if we are trying to infer
-// that a sequence of nodes semantically represents an array according to one
-// of these encodings (versus a struct containing an array). We should refuse
-// the temptation to guess, as they say.
-struct ListEncoding {
-  enum type { ONE_LEVEL, TWO_LEVEL, THREE_LEVEL };
-};
-
-struct DecimalMetadata {
-  bool isset;
-  int32_t scale;
-  int32_t precision;
-};
-
-class PARQUET_EXPORT ColumnPath {
- public:
-  ColumnPath() : path_() {}
-  explicit ColumnPath(const std::vector<std::string>& path) : path_(path) {}
-  explicit ColumnPath(std::vector<std::string>&& path) : path_(path) {}
-
-  static std::shared_ptr<ColumnPath> FromDotString(const std::string& dotstring);
-
-  std::shared_ptr<ColumnPath> extend(const std::string& node_name) const;
-  std::string ToDotString() const;
-  const std::vector<std::string>& ToDotVector() const;
-
- protected:
-  std::vector<std::string> path_;
-};
-
-class GroupNode;
-
-// Base class for logical schema types. A type has a name, repetition level,
-// and optionally a logical type (ConvertedType in Parquet metadata parlance)
-class PARQUET_EXPORT Node {
- public:
-  enum type { PRIMITIVE, GROUP };
-
-  Node(Node::type type, const std::string& name, Repetition::type repetition,
-      LogicalType::type logical_type = LogicalType::NONE, int id = -1)
-      : type_(type),
-        name_(name),
-        repetition_(repetition),
-        logical_type_(logical_type),
-        id_(id),
-        parent_(nullptr) {}
-
-  virtual ~Node() {}
-
-  bool is_primitive() const { return type_ == Node::PRIMITIVE; }
-
-  bool is_group() const { return type_ == Node::GROUP; }
-
-  bool is_optional() const { return repetition_ == Repetition::OPTIONAL; }
-
-  bool is_repeated() const { return repetition_ == Repetition::REPEATED; }
-
-  bool is_required() const { return repetition_ == Repetition::REQUIRED; }
-
-  virtual bool Equals(const Node* other) const = 0;
-
-  const std::string& name() const { return name_; }
-
-  Node::type node_type() const { return type_; }
-
-  Repetition::type repetition() const { return repetition_; }
-
-  LogicalType::type logical_type() const { return logical_type_; }
-
-  int id() const { return id_; }
-
-  const Node* parent() const { return parent_; }
-
-  // ToParquet returns an opaque void* to avoid exporting
-  // parquet::SchemaElement into the public API
-  virtual void ToParquet(void* opaque_element) const = 0;
-
-  // Node::Visitor abstract class for walking schemas with the visitor pattern
-  class Visitor {
-   public:
-    virtual ~Visitor() {}
-
-    virtual void Visit(Node* node) = 0;
-  };
-  class ConstVisitor {
-   public:
-    virtual ~ConstVisitor() {}
-
-    virtual void Visit(const Node* node) = 0;
-  };
-
-  virtual void Visit(Visitor* visitor) = 0;
-  virtual void VisitConst(ConstVisitor* visitor) const = 0;
-
- protected:
-  friend class GroupNode;
-
-  Node::type type_;
-  std::string name_;
-  Repetition::type repetition_;
-  LogicalType::type logical_type_;
-  int id_;
-  // Nodes should not be shared, they have a single parent.
-  const Node* parent_;
-
-  bool EqualsInternal(const Node* other) const;
-  void SetParent(const Node* p_parent);
-};
-
-// Save our breath all over the place with these typedefs
-typedef std::shared_ptr<Node> NodePtr;
-typedef std::vector<NodePtr> NodeVector;
-
-// A type that is one of the primitive Parquet storage types. In addition to
-// the other type metadata (name, repetition level, logical type), also has the
-// physical storage type and their type-specific metadata (byte width, decimal
-// parameters)
-class PARQUET_EXPORT PrimitiveNode : public Node {
- public:
-  // FromParquet accepts an opaque void* to avoid exporting
-  // parquet::SchemaElement into the public API
-  static std::unique_ptr<Node> FromParquet(const void* opaque_element, int id);
-
-  static inline NodePtr Make(const std::string& name, Repetition::type repetition,
-      Type::type type, LogicalType::type logical_type = LogicalType::NONE,
-      int length = -1, int precision = -1, int scale = -1) {
-    return NodePtr(new PrimitiveNode(
-        name, repetition, type, logical_type, length, precision, scale));
-  }
-
-  bool Equals(const Node* other) const override;
-
-  Type::type physical_type() const { return physical_type_; }
-
-  int32_t type_length() const { return type_length_; }
-
-  const DecimalMetadata& decimal_metadata() const { return decimal_metadata_; }
-
-  void ToParquet(void* opaque_element) const override;
-  void Visit(Visitor* visitor) override;
-  void VisitConst(ConstVisitor* visitor) const override;
-
- private:
-  PrimitiveNode(const std::string& name, Repetition::type repetition, Type::type type,
-      LogicalType::type logical_type = LogicalType::NONE, int length = -1,
-      int precision = -1, int scale = -1, int id = -1);
-
-  Type::type physical_type_;
-  int32_t type_length_;
-  DecimalMetadata decimal_metadata_;
-
-  // For FIXED_LEN_BYTE_ARRAY
-  void SetTypeLength(int32_t length) { type_length_ = length; }
-
-  // For Decimal logical type: Precision and scale
-  void SetDecimalMetadata(int32_t scale, int32_t precision) {
-    decimal_metadata_.scale = scale;
-    decimal_metadata_.precision = precision;
-  }
-
-  bool EqualsInternal(const PrimitiveNode* other) const;
-
-  FRIEND_TEST(TestPrimitiveNode, Attrs);
-  FRIEND_TEST(TestPrimitiveNode, Equals);
-  FRIEND_TEST(TestPrimitiveNode, PhysicalLogicalMapping);
-  FRIEND_TEST(TestPrimitiveNode, FromParquet);
-};
-
-class PARQUET_EXPORT GroupNode : public Node {
- public:
-  // Like PrimitiveNode, GroupNode::FromParquet accepts an opaque void* to avoid exporting
-  // parquet::SchemaElement into the public API
-  static std::unique_ptr<Node> FromParquet(
-      const void* opaque_element, int id, const NodeVector& fields);
-
-  static inline NodePtr Make(const std::string& name, Repetition::type repetition,
-      const NodeVector& fields, LogicalType::type logical_type = LogicalType::NONE) {
-    return NodePtr(new GroupNode(name, repetition, fields, logical_type));
-  }
-
-  bool Equals(const Node* other) const override;
-
-  const NodePtr& field(int i) const { return fields_[i]; }
-
-  int field_count() const { return fields_.size(); }
-
-  void ToParquet(void* opaque_element) const override;
-  void Visit(Visitor* visitor) override;
-  void VisitConst(ConstVisitor* visitor) const override;
-
- private:
-  GroupNode(const std::string& name, Repetition::type repetition,
-      const NodeVector& fields, LogicalType::type logical_type = LogicalType::NONE,
-      int id = -1)
-      : Node(Node::GROUP, name, repetition, logical_type, id), fields_(fields) {
-    for (NodePtr& field : fields_) {
-      field->SetParent(this);
-    }
-  }
-
-  NodeVector fields_;
-  bool EqualsInternal(const GroupNode* other) const;
-
-  FRIEND_TEST(TestGroupNode, Attrs);
-  FRIEND_TEST(TestGroupNode, Equals);
-};
-
-// ----------------------------------------------------------------------
-// Convenience primitive type factory functions
-
-#define PRIMITIVE_FACTORY(FuncName, TYPE)                                            \
-  static inline NodePtr FuncName(                                                    \
-      const std::string& name, Repetition::type repetition = Repetition::OPTIONAL) {
\
-    return PrimitiveNode::Make(name, repetition, Type::TYPE);                        \
-  }
-
-PRIMITIVE_FACTORY(Boolean, BOOLEAN);
-PRIMITIVE_FACTORY(Int32, INT32);
-PRIMITIVE_FACTORY(Int64, INT64);
-PRIMITIVE_FACTORY(Int96, INT96);
-PRIMITIVE_FACTORY(Float, FLOAT);
-PRIMITIVE_FACTORY(Double, DOUBLE);
-PRIMITIVE_FACTORY(ByteArray, BYTE_ARRAY);
-
-}  // namespace schema
-
-}  // namespace parquet
-
-#endif  // PARQUET_SCHEMA_TYPES_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/13da51d3/src/parquet/util/comparison-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/comparison-test.cc b/src/parquet/util/comparison-test.cc
index d2689ff..ec85485 100644
--- a/src/parquet/util/comparison-test.cc
+++ b/src/parquet/util/comparison-test.cc
@@ -21,7 +21,7 @@
 #include <iostream>
 #include <vector>
 
-#include "parquet/schema/descriptor.h"
+#include "parquet/schema.h"
 #include "parquet/types.h"
 #include "parquet/util/comparison.h"
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/13da51d3/src/parquet/util/comparison.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/comparison.h b/src/parquet/util/comparison.h
index 5ca7520..103f4c5 100644
--- a/src/parquet/util/comparison.h
+++ b/src/parquet/util/comparison.h
@@ -20,7 +20,7 @@
 
 #include <algorithm>
 
-#include "parquet/schema/descriptor.h"
+#include "parquet/schema.h"
 #include "parquet/types.h"
 
 namespace parquet {


Mime
View raw message