parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From u..@apache.org
Subject parquet-cpp git commit: PARQUET-1108: Fix Int96 comparators
Date Thu, 21 Sep 2017 07:31:24 GMT
Repository: parquet-cpp
Updated Branches:
  refs/heads/master c5b694323 -> 96f868f78


PARQUET-1108: Fix Int96 comparators

Int96 must be treated as a hardware intrinsic type. Therefore, the comparator for little-endian
must start from the end.

Author: Deepak Majeti <deepak.majeti@hpe.com>

Closes #399 from majetideepak/PARQUET-1108 and squashes the following commits:

352522a [Deepak Majeti] PARQUET-1108: Fix Int96 comparators


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/96f868f7
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/96f868f7
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/96f868f7

Branch: refs/heads/master
Commit: 96f868f7817275f06ded618731c01d6f861bd8b6
Parents: c5b6943
Author: Deepak Majeti <deepak.majeti@hpe.com>
Authored: Thu Sep 21 09:31:19 2017 +0200
Committer: Uwe L. Korn <uwe@apache.org>
Committed: Thu Sep 21 09:31:19 2017 +0200

----------------------------------------------------------------------
 src/parquet/util/comparison-test.cc | 30 ++++++++++++++++++++++++++++--
 src/parquet/util/comparison.h       | 22 ++++++++++++++++------
 2 files changed, 44 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/96f868f7/src/parquet/util/comparison-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/comparison-test.cc b/src/parquet/util/comparison-test.cc
index dc915bf..c05f934 100644
--- a/src/parquet/util/comparison-test.cc
+++ b/src/parquet/util/comparison-test.cc
@@ -137,7 +137,7 @@ TEST(Comparison, UnsignedFLBA) {
 TEST(Comparison, SignedInt96) {
   parquet::Int96 a{{1, 41, 14}}, b{{1, 41, 42}};
   parquet::Int96 aa{{1, 41, 14}}, bb{{1, 41, 14}};
-  parquet::Int96 aaa{{static_cast<uint32_t>(-1), 41, 14}}, bbb{{1, 41, 42}};
+  parquet::Int96 aaa{{1, 41, static_cast<uint32_t>(-14)}}, bbb{{1, 41, 42}};
 
   NodePtr node = PrimitiveNode::Make("SignedInt96", Repetition::REQUIRED, Type::INT96);
   ColumnDescriptor descr(node, 0, 0);
@@ -151,7 +151,8 @@ TEST(Comparison, SignedInt96) {
 
 TEST(Comparison, UnsignedInt96) {
   parquet::Int96 a{{1, 41, 14}}, b{{1, static_cast<uint32_t>(-41), 42}};
-  parquet::Int96 aa{{1, 41, 14}}, bb{{static_cast<uint32_t>(-1), 41, 14}};
+  parquet::Int96 aa{{1, 41, 14}}, bb{{1, 41, static_cast<uint32_t>(-14)}};
+  parquet::Int96 aaa, bbb;
 
   NodePtr node = PrimitiveNode::Make("UnsignedInt96", Repetition::REQUIRED, Type::INT96);
   ColumnDescriptor descr(node, 0, 0);
@@ -160,6 +161,31 @@ TEST(Comparison, UnsignedInt96) {
 
   ASSERT_TRUE(uless(a, b));
   ASSERT_TRUE(uless(aa, bb));
+
+  // INT96 Timestamp
+  aaa.value[2] = 2451545;  // 2000-01-01
+  bbb.value[2] = 2451546;  // 2000-01-02
+  // 12 hours + 34 minutes + 56 seconds.
+  reinterpret_cast<uint64_t*>(&aaa.value[0])[0] = 45296000000000;
+  // 12 hours + 34 minutes + 50 seconds.
+  reinterpret_cast<uint64_t*>(&bbb.value[0])[0] = 45290000000000;
+  ASSERT_TRUE(uless(aaa, bbb));
+
+  aaa.value[2] = 2451545;  // 2000-01-01
+  bbb.value[2] = 2451545;  // 2000-01-01
+  // 11 hours + 34 minutes + 56 seconds.
+  reinterpret_cast<uint64_t*>(&aaa.value[0])[0] = 41696000000000;
+  // 12 hours + 34 minutes + 50 seconds.
+  reinterpret_cast<uint64_t*>(&bbb.value[0])[0] = 45290000000000;
+  ASSERT_TRUE(uless(aaa, bbb));
+
+  aaa.value[2] = 2451545;  // 2000-01-01
+  bbb.value[2] = 2451545;  // 2000-01-01
+  // 12 hours + 34 minutes + 55 seconds.
+  reinterpret_cast<uint64_t*>(&aaa.value[0])[0] = 45295000000000;
+  // 12 hours + 34 minutes + 56 seconds.
+  reinterpret_cast<uint64_t*>(&bbb.value[0])[0] = 45296000000000;
+  ASSERT_TRUE(uless(aaa, bbb));
 }
 
 TEST(Comparison, SignedInt64) {

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/96f868f7/src/parquet/util/comparison.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/comparison.h b/src/parquet/util/comparison.h
index 803f0da..12be7ba 100644
--- a/src/parquet/util/comparison.h
+++ b/src/parquet/util/comparison.h
@@ -48,9 +48,16 @@ class PARQUET_EXPORT CompareDefault<Int96Type> : public Comparator
{
   CompareDefault() {}
   virtual ~CompareDefault() {}
   virtual bool operator()(const Int96& a, const Int96& b) {
-    const int32_t* aptr = reinterpret_cast<const int32_t*>(&a.value[0]);
-    const int32_t* bptr = reinterpret_cast<const int32_t*>(&b.value[0]);
-    return std::lexicographical_compare(aptr, aptr + 3, bptr, bptr + 3);
+    // Only the MSB bit is by Signed comparison
+    // For little-endian, this is the last bit of Int96 type
+    const int32_t amsb = static_cast<const int32_t>(a.value[2]);
+    const int32_t bmsb = static_cast<const int32_t>(b.value[2]);
+    if (amsb != bmsb) {
+      return (amsb < bmsb);
+    } else if (a.value[1] != b.value[1]) {
+      return (a.value[1] < b.value[1]);
+    }
+    return (a.value[0] < b.value[0]);
   }
 };
 
@@ -132,9 +139,12 @@ class PARQUET_EXPORT CompareUnsignedInt96 : public CompareDefaultInt96
{
  public:
   virtual ~CompareUnsignedInt96() {}
   bool operator()(const Int96& a, const Int96& b) override {
-    const uint32_t* aptr = reinterpret_cast<const uint32_t*>(&a.value[0]);
-    const uint32_t* bptr = reinterpret_cast<const uint32_t*>(&b.value[0]);
-    return std::lexicographical_compare(aptr, aptr + 3, bptr, bptr + 3);
+    if (a.value[2] != b.value[2]) {
+      return (a.value[2] < b.value[2]);
+    } else if (a.value[1] != b.value[1]) {
+      return (a.value[1] < b.value[1]);
+    }
+    return (a.value[0] < b.value[0]);
   }
 };
 


Mime
View raw message