parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [1/2] parquet-cpp git commit: PARQUET-593: Add API for writing Page statistics
Date Mon, 03 Oct 2016 17:40:43 GMT
Repository: parquet-cpp
Updated Branches:
  refs/heads/master 7abb9c476 -> 176b08c30


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/176b08c3/src/parquet/types.h
----------------------------------------------------------------------
diff --git a/src/parquet/types.h b/src/parquet/types.h
index a4285be..520326b 100644
--- a/src/parquet/types.h
+++ b/src/parquet/types.h
@@ -117,7 +117,7 @@ struct PageType {
 // ----------------------------------------------------------------------
 
 struct ByteArray {
-  ByteArray() {}
+  ByteArray() : len(0), ptr(nullptr) {}
   ByteArray(uint32_t len, const uint8_t* ptr) : len(len), ptr(ptr) {}
   uint32_t len;
   const uint8_t* ptr;
@@ -132,7 +132,7 @@ struct ByteArray {
 };
 
 struct FixedLenByteArray {
-  FixedLenByteArray() {}
+  FixedLenByteArray() : ptr(nullptr) {}
   explicit FixedLenByteArray(const uint8_t* ptr) : ptr(ptr) {}
   const uint8_t* ptr;
 };

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/176b08c3/src/parquet/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/parquet/util/CMakeLists.txt b/src/parquet/util/CMakeLists.txt
index 52c4811..3a4b1c9 100644
--- a/src/parquet/util/CMakeLists.txt
+++ b/src/parquet/util/CMakeLists.txt
@@ -71,6 +71,7 @@ endif()
 
 ADD_PARQUET_TEST(bit-util-test)
 ADD_PARQUET_TEST(buffer-test)
+ADD_PARQUET_TEST(comparison-test)
 ADD_PARQUET_TEST(input-output-test)
 ADD_PARQUET_TEST(mem-allocator-test)
 ADD_PARQUET_TEST(mem-pool-test)

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/176b08c3/src/parquet/util/comparison-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/comparison-test.cc b/src/parquet/util/comparison-test.cc
new file mode 100644
index 0000000..d2689ff
--- /dev/null
+++ b/src/parquet/util/comparison-test.cc
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <cstdint>
+#include <iostream>
+#include <vector>
+
+#include "parquet/schema/descriptor.h"
+#include "parquet/types.h"
+#include "parquet/util/comparison.h"
+
+namespace parquet {
+
+namespace test {
+
+using parquet::schema::NodePtr;
+using parquet::schema::PrimitiveNode;
+
+static ByteArray ByteArrayFromString(const std::string& s) {
+  auto ptr = reinterpret_cast<const uint8_t*>(s.data());
+  return ByteArray(s.size(), ptr);
+}
+
+static FLBA FLBAFromString(const std::string& s) {
+  auto ptr = reinterpret_cast<const uint8_t*>(s.data());
+  return FLBA(ptr);
+}
+
+TEST(Comparison, ByteArray) {
+  NodePtr node = PrimitiveNode::Make("bytearray", Repetition::REQUIRED, Type::BYTE_ARRAY);
+  ColumnDescriptor descr(node, 0, 0);
+  Compare<parquet::ByteArray> less(&descr);
+
+  std::string a = "arrange";
+  std::string b = "arrangement";
+  auto arr1 = ByteArrayFromString(a);
+  auto arr2 = ByteArrayFromString(b);
+  ASSERT_TRUE(less(arr1, arr2));
+
+  a = u8"braten";
+  b = u8"b├╝geln";
+  auto arr3 = ByteArrayFromString(a);
+  auto arr4 = ByteArrayFromString(b);
+  // see PARQUET-686 discussion about binary comparison
+  ASSERT_TRUE(!less(arr3, arr4));
+}
+
+TEST(Comparison, FLBA) {
+  std::string a = "Antidisestablishmentarianism";
+  std::string b = "Bundesgesundheitsministerium";
+  auto arr1 = FLBAFromString(a);
+  auto arr2 = FLBAFromString(b);
+
+  NodePtr node = PrimitiveNode::Make("FLBA", Repetition::REQUIRED,
+      Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, a.size());
+  ColumnDescriptor descr(node, 0, 0);
+  Compare<parquet::FixedLenByteArray> less(&descr);
+  ASSERT_TRUE(less(arr1, arr2));
+}
+
+TEST(Comparison, Int96) {
+  parquet::Int96 a{{1, 41, 14}}, b{{1, 41, 42}};
+
+  NodePtr node = PrimitiveNode::Make("int96", Repetition::REQUIRED, Type::INT96);
+  ColumnDescriptor descr(node, 0, 0);
+  Compare<parquet::Int96> less(&descr);
+  ASSERT_TRUE(less(a, b));
+  b.value[2] = 14;
+  ASSERT_TRUE(!less(a, b) && !less(b, a));
+}
+
+}  // namespace test
+
+}  // namespace parquet

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/176b08c3/src/parquet/util/comparison.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/comparison.h b/src/parquet/util/comparison.h
new file mode 100644
index 0000000..9d44e7e
--- /dev/null
+++ b/src/parquet/util/comparison.h
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PARQUET_UTIL_COMPARISON_H
+#define PARQUET_UTIL_COMPARISON_H
+
+#include <algorithm>
+
+#include "parquet/types.h"
+#include "parquet/schema/descriptor.h"
+
+namespace parquet {
+
+template <typename T>
+struct Compare {
+  explicit Compare(const ColumnDescriptor* descr) : type_length_(descr->type_length())
{}
+
+  inline bool operator()(const T& a, const T& b) { return a < b; }
+
+ private:
+  int32_t type_length_;
+};
+
+template <>
+inline bool Compare<Int96>::operator()(const Int96& a, const Int96& b) {
+  return std::lexicographical_compare(a.value, a.value + 3, b.value, b.value + 3);
+}
+
+template <>
+inline bool Compare<ByteArray>::operator()(const ByteArray& a, const ByteArray&
b) {
+  auto aptr = reinterpret_cast<const int8_t*>(a.ptr);
+  auto bptr = reinterpret_cast<const int8_t*>(b.ptr);
+  return std::lexicographical_compare(aptr, aptr + a.len, bptr, bptr + b.len);
+}
+
+template <>
+inline bool Compare<FLBA>::operator()(const FLBA& a, const FLBA& b) {
+  auto aptr = reinterpret_cast<const int8_t*>(a.ptr);
+  auto bptr = reinterpret_cast<const int8_t*>(b.ptr);
+  return std::lexicographical_compare(
+      aptr, aptr + type_length_, bptr, bptr + type_length_);
+}
+
+}  // namespace parquet
+
+#endif  // PARQUET_UTIL_COMPARISON_H

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/176b08c3/src/parquet/util/test-common.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/test-common.h b/src/parquet/util/test-common.h
index 2327aeb..edadb53 100644
--- a/src/parquet/util/test-common.h
+++ b/src/parquet/util/test-common.h
@@ -32,8 +32,7 @@ namespace parquet {
 namespace test {
 
 typedef ::testing::Types<BooleanType, Int32Type, Int64Type, Int96Type, FloatType,
-    DoubleType, ByteArrayType, FLBAType>
-    ParquetTypes;
+    DoubleType, ByteArrayType, FLBAType> ParquetTypes;
 
 template <typename T>
 static inline void assert_vector_equal(const vector<T>& left, const vector<T>&
right) {


Mime
View raw message