parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jul...@apache.org
Subject [1/2] parquet-cpp git commit: PARQUET-446: Hide Thrift compiled headers and Boost from public API, #include scrubbing
Date Mon, 15 Feb 2016 23:55:23 GMT
Repository: parquet-cpp
Updated Branches:
  refs/heads/master 05cd4ec28 -> b71e826f0


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/schema/CMakeLists.txt b/src/parquet/schema/CMakeLists.txt
index 0902ccf..8aa9969 100644
--- a/src/parquet/schema/CMakeLists.txt
+++ b/src/parquet/schema/CMakeLists.txt
@@ -17,9 +17,8 @@
 
 # Headers: top level
 install(FILES
-  builder.h
-  converter.h
   descriptor.h
+  printer.h
   types.h
   DESTINATION include/parquet/schema)
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/converter.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/converter.cc b/src/parquet/schema/converter.cc
index 9b45cc9..9eb59b0 100644
--- a/src/parquet/schema/converter.cc
+++ b/src/parquet/schema/converter.cc
@@ -17,9 +17,10 @@
 
 #include "parquet/schema/converter.h"
 
-#include <string>
-
 #include "parquet/exception.h"
+#include "parquet/schema/descriptor.h"
+#include "parquet/schema/types.h"
+#include "parquet/thrift/parquet_types.h"
 
 using parquet::SchemaElement;
 
@@ -46,7 +47,7 @@ std::unique_ptr<Node> FlatSchemaConverter::Convert() {
 std::unique_ptr<Node> FlatSchemaConverter::NextNode() {
   const SchemaElement& element = Next();
 
-  size_t node_id = next_id();
+  int node_id = next_id();
 
   const void* opaque_element = static_cast<const void*>(&element);
 
@@ -56,7 +57,7 @@ std::unique_ptr<Node> FlatSchemaConverter::NextNode() {
   } else {
     // Group
     NodeVector fields;
-    for (size_t i = 0; i < element.num_children; ++i) {
+    for (int i = 0; i < element.num_children; ++i) {
       std::unique_ptr<Node> field = NextNode();
       fields.push_back(NodePtr(field.release()));
     }
@@ -82,25 +83,6 @@ std::shared_ptr<SchemaDescriptor> FromParquet(const std::vector<SchemaElement>&
   return descr;
 }
 
-// ----------------------------------------------------------------------
-// Conversion back to Parquet metadata
-
-// TODO: decide later what to do with these. When converting back only need to
-// write into a parquet::SchemaElement
-
-// FieldRepetitionType::type ToParquet(Repetition::type type) {
-//   return static_cast<FieldRepetitionType::type>(type);
-// }
-
-// parquet::ConvertedType::type ToParquet(LogicalType::type type) {
-//   // item 0 is NONE
-//   return static_cast<parquet::ConvertedType::type>(static_cast<int>(type)
- 1);
-// }
-
-// parquet::Type::type ToParquet(Type::type type) {
-//   return static_cast<parquet::Type::type>(type);
-// }
-
 } // namespace schema
 
 } // namespace parquet_cpp

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/converter.h
----------------------------------------------------------------------
diff --git a/src/parquet/schema/converter.h b/src/parquet/schema/converter.h
index cde48c9..055eb69 100644
--- a/src/parquet/schema/converter.h
+++ b/src/parquet/schema/converter.h
@@ -25,20 +25,20 @@
 #ifndef PARQUET_SCHEMA_CONVERTER_H
 #define PARQUET_SCHEMA_CONVERTER_H
 
-#include <cstdint>
 #include <memory>
-#include <unordered_map>
 #include <vector>
 
-#include "parquet/schema/descriptor.h"
-#include "parquet/schema/types.h"
-
-#include "parquet/thrift/parquet_types.h"
+namespace parquet { class SchemaElement;}
 
 namespace parquet_cpp {
 
+class SchemaDescriptor;
+
 namespace schema {
 
+class GroupNode;
+class Node;
+
 // ----------------------------------------------------------------------
 // Conversion from Parquet Thrift metadata
 
@@ -47,7 +47,7 @@ std::shared_ptr<SchemaDescriptor> FromParquet(
 
 class FlatSchemaConverter {
  public:
-  FlatSchemaConverter(const parquet::SchemaElement* elements, size_t length) :
+  FlatSchemaConverter(const parquet::SchemaElement* elements, int length) :
       elements_(elements),
       length_(length),
       pos_(0),
@@ -57,11 +57,11 @@ class FlatSchemaConverter {
 
  private:
   const parquet::SchemaElement* elements_;
-  size_t length_;
-  size_t pos_;
-  size_t current_id_;
+  int length_;
+  int pos_;
+  int current_id_;
 
-  size_t next_id() {
+  int next_id() {
     return current_id_++;
   }
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/descriptor.h
----------------------------------------------------------------------
diff --git a/src/parquet/schema/descriptor.h b/src/parquet/schema/descriptor.h
index d27dcc1..7991dea 100644
--- a/src/parquet/schema/descriptor.h
+++ b/src/parquet/schema/descriptor.h
@@ -19,12 +19,14 @@
 #define PARQUET_SCHEMA_DESCRIPTOR_H
 
 #include <cstdint>
+#include <cstdlib>
 #include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
 
 #include "parquet/schema/types.h"
+#include "parquet/types.h"
 
 namespace parquet_cpp {
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/printer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/printer.cc b/src/parquet/schema/printer.cc
index 9c43e8e..2aa2940 100644
--- a/src/parquet/schema/printer.cc
+++ b/src/parquet/schema/printer.cc
@@ -17,15 +17,19 @@
 
 #include "parquet/schema/printer.h"
 
+#include <memory>
 #include <string>
 
+#include "parquet/schema/types.h"
+#include "parquet/types.h"
+
 namespace parquet_cpp {
 
 namespace schema {
 
 class SchemaPrinter : public Node::Visitor {
  public:
-  explicit SchemaPrinter(std::ostream& stream, size_t indent_width) :
+  explicit SchemaPrinter(std::ostream& stream, int indent_width) :
       stream_(stream),
       indent_(0),
       indent_width_(2) {}
@@ -40,8 +44,8 @@ class SchemaPrinter : public Node::Visitor {
 
   std::ostream& stream_;
 
-  size_t indent_;
-  size_t indent_width_;
+  int indent_;
+  int indent_width_;
 };
 
 static void PrintRepLevel(Repetition::type repetition, std::ostream& stream) {
@@ -103,7 +107,7 @@ void SchemaPrinter::Visit(const GroupNode* node) {
   stream_ << " group " << node->name() << " {" << std::endl;
 
   indent_ += indent_width_;
-  for (size_t i = 0; i < node->field_count(); ++i) {
+  for (int i = 0; i < node->field_count(); ++i) {
     node->field(i)->Visit(this);
   }
   indent_ -= indent_width_;
@@ -129,7 +133,7 @@ void SchemaPrinter::Visit(const Node* node) {
 }
 
 void PrintSchema(const Node* schema, std::ostream& stream,
-    size_t indent_width) {
+    int indent_width) {
   SchemaPrinter printer(stream, indent_width);
   printer.Visit(schema);
 }

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/printer.h
----------------------------------------------------------------------
diff --git a/src/parquet/schema/printer.h b/src/parquet/schema/printer.h
index 535262f..6df78d0 100644
--- a/src/parquet/schema/printer.h
+++ b/src/parquet/schema/printer.h
@@ -20,16 +20,16 @@
 #ifndef PARQUET_SCHEMA_PRINTER_H
 #define PARQUET_SCHEMA_PRINTER_H
 
-#include "parquet/schema/types.h"
-
 #include <ostream>
 
 namespace parquet_cpp {
 
 namespace schema {
 
+class Node;
+
 void PrintSchema(const Node* schema, std::ostream& stream,
-    size_t indent_width = 2);
+    int indent_width = 2);
 
 } // namespace schema
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/schema-converter-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/schema-converter-test.cc b/src/parquet/schema/schema-converter-test.cc
index f2dadf2..93cfd24 100644
--- a/src/parquet/schema/schema-converter-test.cc
+++ b/src/parquet/schema/schema-converter-test.cc
@@ -15,17 +15,19 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <cstdint>
+#include <cstdlib>
+#include <memory>
 #include <string>
 #include <vector>
 
 #include <gtest/gtest.h>
-#include "parquet/util/test-common.h"
 
+#include "parquet/exception.h"
 #include "parquet/schema/converter.h"
-#include "parquet/thrift/parquet_types.h"
-
 #include "parquet/schema/test-util.h"
+#include "parquet/schema/types.h"
+#include "parquet/thrift/parquet_types.h"
+#include "parquet/types.h"
 
 using std::string;
 using std::vector;

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/schema-descriptor-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/schema-descriptor-test.cc b/src/parquet/schema/schema-descriptor-test.cc
index 1328bed..c63df54 100644
--- a/src/parquet/schema/schema-descriptor-test.cc
+++ b/src/parquet/schema/schema-descriptor-test.cc
@@ -18,16 +18,15 @@
 // Schema / column descriptor correctness tests (from flat Parquet schemas)
 
 #include <cstdint>
+#include <cstdlib>
 #include <string>
 #include <vector>
 
 #include <gtest/gtest.h>
-#include "parquet/util/test-common.h"
 
-#include "parquet/schema/converter.h"
+#include "parquet/exception.h"
 #include "parquet/schema/descriptor.h"
-
-#include "parquet/thrift/parquet_types.h"
+#include "parquet/schema/types.h"
 
 using std::string;
 using std::vector;

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/schema-printer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/schema-printer-test.cc b/src/parquet/schema/schema-printer-test.cc
index c21429a..094829b 100644
--- a/src/parquet/schema/schema-printer-test.cc
+++ b/src/parquet/schema/schema-printer-test.cc
@@ -15,15 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <cstdint>
+#include <gtest/gtest.h>
+
+#include <iosfwd>
 #include <string>
 #include <vector>
 
-#include <gtest/gtest.h>
-#include "parquet/util/test-common.h"
-
 #include "parquet/schema/printer.h"
-#include "parquet/schema/test-util.h"
+#include "parquet/schema/types.h"
+#include "parquet/types.h"
 
 using std::string;
 using std::vector;

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/schema-types-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/schema-types-test.cc b/src/parquet/schema/schema-types-test.cc
index 72d38c0..cac7dc5 100644
--- a/src/parquet/schema/schema-types-test.cc
+++ b/src/parquet/schema/schema-types-test.cc
@@ -15,15 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <cstdint>
+#include <gtest/gtest.h>
+
+#include <memory>
 #include <string>
 #include <vector>
 
-#include <gtest/gtest.h>
-#include "parquet/util/test-common.h"
-
-#include "parquet/schema/types.h"
 #include "parquet/schema/test-util.h"
+#include "parquet/schema/types.h"
+#include "parquet/thrift/parquet_types.h"
+#include "parquet/types.h"
 
 using std::string;
 using std::vector;

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/types.cc
----------------------------------------------------------------------
diff --git a/src/parquet/schema/types.cc b/src/parquet/schema/types.cc
index e088eed..fae7c84 100644
--- a/src/parquet/schema/types.cc
+++ b/src/parquet/schema/types.cc
@@ -19,7 +19,9 @@
 
 #include <memory>
 
+#include "parquet/exception.h"
 #include "parquet/thrift/parquet_types.h"
+#include "parquet/thrift/util.h"
 
 namespace parquet_cpp {
 
@@ -72,7 +74,7 @@ bool GroupNode::EqualsInternal(const GroupNode* other) const {
   if (this->field_count() != other->field_count()) {
     return false;
   }
-  for (size_t i = 0; i < this->field_count(); ++i) {
+  for (int i = 0; i < this->field_count(); ++i) {
     if (!this->field(i)->Equals(other->field(i).get())) {
       return false;
     }
@@ -94,19 +96,6 @@ void GroupNode::Visit(Node::Visitor* visitor) {
 // ----------------------------------------------------------------------
 // Node construction from Parquet metadata
 
-static Type::type ConvertEnum(parquet::Type::type type) {
-  return static_cast<Type::type>(type);
-}
-
-static LogicalType::type ConvertEnum(parquet::ConvertedType::type type) {
-  // item 0 is NONE
-  return static_cast<LogicalType::type>(static_cast<int>(type) + 1);
-}
-
-static Repetition::type ConvertEnum(parquet::FieldRepetitionType::type type) {
-  return static_cast<Repetition::type>(type);
-}
-
 struct NodeParams {
   explicit NodeParams(const std::string& name) :
       name(name) {}
@@ -119,9 +108,9 @@ struct NodeParams {
 static inline NodeParams GetNodeParams(const parquet::SchemaElement* element) {
   NodeParams params(element->name);
 
-  params.repetition = ConvertEnum(element->repetition_type);
+  params.repetition = FromThrift(element->repetition_type);
   if (element->__isset.converted_type) {
-    params.logical_type = ConvertEnum(element->converted_type);
+    params.logical_type = FromThrift(element->converted_type);
   } else {
     params.logical_type = LogicalType::NONE;
   }
@@ -145,7 +134,7 @@ std::unique_ptr<Node> PrimitiveNode::FromParquet(const void* opaque_element,
 
   std::unique_ptr<PrimitiveNode> result = std::unique_ptr<PrimitiveNode>(
       new PrimitiveNode(params.name, params.repetition,
-          ConvertEnum(element->type), params.logical_type, node_id));
+          FromThrift(element->type), params.logical_type, node_id));
 
   if (element->type == parquet::Type::FIXED_LEN_BYTE_ARRAY) {
     result->SetTypeLength(element->type_length);

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/schema/types.h
----------------------------------------------------------------------
diff --git a/src/parquet/schema/types.h b/src/parquet/schema/types.h
index 82db233..83b9fd2 100644
--- a/src/parquet/schema/types.h
+++ b/src/parquet/schema/types.h
@@ -26,7 +26,6 @@
 #include <string>
 #include <vector>
 
-#include "parquet/exception.h"
 #include "parquet/types.h"
 #include "parquet/util/macros.h"
 
@@ -254,11 +253,11 @@ class GroupNode : public Node {
 
   virtual bool Equals(const Node* other) const;
 
-  const NodePtr& field(size_t i) const {
+  const NodePtr& field(int i) const {
     return fields_[i];
   }
 
-  size_t field_count() const {
+  int field_count() const {
     return fields_.size();
   }
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/thrift/serializer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/serializer-test.cc b/src/parquet/thrift/serializer-test.cc
index e89b108..756fd10 100644
--- a/src/parquet/thrift/serializer-test.cc
+++ b/src/parquet/thrift/serializer-test.cc
@@ -15,18 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <cstdlib>
-#include <iostream>
-#include <sstream>
-#include <string>
-
 #include <gtest/gtest.h>
 
+#include <cstdint>
+#include <exception>
+#include <string>
+
+#include "parquet/column/test-util.h"
 #include "parquet/thrift/parquet_types.h"
 #include "parquet/thrift/util.h"
-#include "parquet/column/page.h"
-#include "parquet/column/reader.h"
-#include "parquet/column/test-util.h"
 
 using std::string;
 
@@ -59,12 +56,12 @@ TEST_F(TestThrift, TestSerializerDeserializer) {
   uint32_t header_size = 1024;
   // Deserialize the serialized page buffer
   ASSERT_NO_THROW(DeserializeThriftMsg(reinterpret_cast<const uint8_t*>(serialized_buffer.c_str()),
-      &header_size, &out_page_header));  
+      &header_size, &out_page_header));
   ASSERT_LE(stats_size, header_size);
   ASSERT_GE(max_header_len, header_size);
 
   ASSERT_EQ(parquet::Encoding::PLAIN, out_page_header.data_page_header.encoding);
-  ASSERT_EQ(parquet::Encoding::RLE, out_page_header.data_page_header.definition_level_encoding);

+  ASSERT_EQ(parquet::Encoding::RLE, out_page_header.data_page_header.definition_level_encoding);
   ASSERT_EQ(parquet::Encoding::RLE, out_page_header.data_page_header.repetition_level_encoding);
   for(int i = 0; i < stats_size; i++){
     EXPECT_EQ(i % 255, (reinterpret_cast<const uint8_t*>

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/thrift/util.h
----------------------------------------------------------------------
diff --git a/src/parquet/thrift/util.h b/src/parquet/thrift/util.h
index a472dc2..8c34197 100644
--- a/src/parquet/thrift/util.h
+++ b/src/parquet/thrift/util.h
@@ -17,11 +17,39 @@
 #include <thrift/transport/TBufferTransports.h>
 #include <sstream>
 
-#include "parquet/util/logging.h"
 #include "parquet/exception.h"
+#include "parquet/util/logging.h"
+#include "parquet/thrift/parquet_types.h"
 
 namespace parquet_cpp {
 
+// ----------------------------------------------------------------------
+// Convert Thrift enums to / from parquet_cpp enums
+
+static inline Type::type FromThrift(parquet::Type::type type) {
+  return static_cast<Type::type>(type);
+}
+
+static inline LogicalType::type FromThrift(parquet::ConvertedType::type type) {
+  // item 0 is NONE
+  return static_cast<LogicalType::type>(static_cast<int>(type) + 1);
+}
+
+static inline Repetition::type FromThrift(parquet::FieldRepetitionType::type type) {
+  return static_cast<Repetition::type>(type);
+}
+
+static inline Encoding::type FromThrift(parquet::Encoding::type type) {
+  return static_cast<Encoding::type>(type);
+}
+
+static inline Compression::type FromThrift(parquet::CompressionCodec::type type) {
+  return static_cast<Compression::type>(type);
+}
+
+// ----------------------------------------------------------------------
+// Thrift struct serialization / deserialization utilities
+
 // Deserialize a thrift message from buf/len.  buf/len must at least contain
 // all the bytes needed to store the thrift message.  On return, len will be
 // set to the actual length of the header.

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/types.h
----------------------------------------------------------------------
diff --git a/src/parquet/types.h b/src/parquet/types.h
index 2d15cad..8c5e123 100644
--- a/src/parquet/types.h
+++ b/src/parquet/types.h
@@ -108,7 +108,6 @@ struct Encoding {
 // Compression, mirrors parquet::CompressionCodec
 struct Compression {
   enum type {
-    NONE,
     UNCOMPRESSED,
     SNAPPY,
     GZIP,

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/util/bit-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/bit-util.h b/src/parquet/util/bit-util.h
index eac5346..714911c 100644
--- a/src/parquet/util/bit-util.h
+++ b/src/parquet/util/bit-util.h
@@ -26,7 +26,7 @@
 #include <endian.h>
 #endif
 
-#include <boost/type_traits/make_unsigned.hpp>
+#include <cstdint>
 
 #include "parquet/util/compiler-util.h"
 #include "parquet/util/cpu-info.h"
@@ -34,10 +34,35 @@
 
 namespace parquet_cpp {
 
-using boost::make_unsigned;
+// TODO(wesm): The source from Impala was depending on boost::make_unsigned
+//
+// We add a partial stub implementation here
+
+template <typename T>
+struct make_unsigned {
+};
+
+template <>
+struct make_unsigned<int8_t> {
+  typedef uint8_t type;
+};
+
+template <>
+struct make_unsigned<int16_t> {
+  typedef uint16_t type;
+};
+
+template <>
+struct make_unsigned<int32_t> {
+  typedef uint32_t type;
+};
+
+template <>
+struct make_unsigned<int64_t> {
+  typedef uint64_t type;
+};
 
 /// Utility class to do standard bit tricks
-/// TODO: is this in boost or something else like that?
 class BitUtil {
  public:
   /// Returns the ceil of value/divisor

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/util/input.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/input.h b/src/parquet/util/input.h
index 4fd9cd7..d9b07fd 100644
--- a/src/parquet/util/input.h
+++ b/src/parquet/util/input.h
@@ -18,6 +18,7 @@
 #ifndef PARQUET_UTIL_INPUT_H
 #define PARQUET_UTIL_INPUT_H
 
+#include <stdio.h>
 #include <cstdint>
 #include <memory>
 #include <string>

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/util/output-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/output-test.cc b/src/parquet/util/output-test.cc
index 84f5b57..5fbca4a 100644
--- a/src/parquet/util/output-test.cc
+++ b/src/parquet/util/output-test.cc
@@ -15,10 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <memory>
-
 #include <gtest/gtest.h>
 
+#include <cstdint>
+#include <memory>
+#include <vector>
+
 #include "parquet/util/output.h"
 #include "parquet/util/test-common.h"
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/util/output.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/output.cc b/src/parquet/util/output.cc
index 9748a69..f0c8989 100644
--- a/src/parquet/util/output.cc
+++ b/src/parquet/util/output.cc
@@ -17,9 +17,7 @@
 
 #include "parquet/util/output.h"
 
-#include <algorithm>
 #include <cstring>
-#include <sstream>
 
 #include "parquet/exception.h"
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/b71e826f/src/parquet/util/output.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/output.h b/src/parquet/util/output.h
index e83b261..be25abd 100644
--- a/src/parquet/util/output.h
+++ b/src/parquet/util/output.h
@@ -19,7 +19,6 @@
 #define PARQUET_UTIL_OUTPUT_H
 
 #include <cstdint>
-#include <memory>
 #include <vector>
 
 namespace parquet_cpp {


Mime
View raw message