parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject parquet-cpp git commit: PARQUET-639: Do not export DCHECK in public headers
Date Sat, 25 Jun 2016 01:48:06 GMT
Repository: parquet-cpp
Updated Branches:
  refs/heads/master 5e831d65d -> 3ca5a7034


PARQUET-639: Do not export DCHECK in public headers

I added a test so that DCHECK does not leak in the public headers. I prefer this to renaming
the macro

Author: Wes McKinney <wesm@apache.org>

Closes #127 from wesm/no-export-dcheck and squashes the following commits:

52a2d22 [Wes McKinney] Remove exposure of DCHECK macros from publicly-visible headers


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/3ca5a703
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/3ca5a703
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/3ca5a703

Branch: refs/heads/master
Commit: 3ca5a70348f59db66ffa18eb08c80208a4614e2c
Parents: 5e831d6
Author: Wes McKinney <wesm@apache.org>
Authored: Fri Jun 24 18:47:59 2016 -0700
Committer: Wes McKinney <wesm@apache.org>
Committed: Fri Jun 24 18:47:59 2016 -0700

----------------------------------------------------------------------
 CMakeLists.txt                   |   1 +
 src/parquet/column/levels.cc     | 146 ++++++++++++++++++++++++++++++++++
 src/parquet/column/levels.h      | 121 ++++------------------------
 src/parquet/public-api-test.cc   |   7 ++
 src/parquet/util/input.cc        |   1 +
 src/parquet/util/mem-allocator.h |   3 +-
 src/parquet/util/output.cc       |   1 +
 7 files changed, 171 insertions(+), 109 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3ca5a703/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8751af6..2417449 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -444,6 +444,7 @@ endif()
 set(LIBPARQUET_SRCS
   src/parquet/types.cc
 
+  src/parquet/column/levels.cc
   src/parquet/column/reader.cc
   src/parquet/column/writer.cc
   src/parquet/column/scanner.cc

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3ca5a703/src/parquet/column/levels.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/levels.cc b/src/parquet/column/levels.cc
new file mode 100644
index 0000000..6f87ad8
--- /dev/null
+++ b/src/parquet/column/levels.cc
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/column/levels.h"
+
+#include <cstdint>
+
+#include "parquet/util/rle-encoding.h"
+
+namespace parquet {
+
+LevelEncoder::LevelEncoder() {}
+LevelEncoder::~LevelEncoder() {}
+
+void LevelEncoder::Init(Encoding::type encoding, int16_t max_level,
+    int num_buffered_values, uint8_t* data, int data_size) {
+  bit_width_ = BitUtil::Log2(max_level + 1);
+  encoding_ = encoding;
+  switch (encoding) {
+    case Encoding::RLE: {
+      rle_encoder_.reset(new RleEncoder(data, data_size, bit_width_));
+      break;
+    }
+    case Encoding::BIT_PACKED: {
+      int num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8);
+      bit_packed_encoder_.reset(new BitWriter(data, num_bytes));
+      break;
+    }
+    default:
+      throw ParquetException("Unknown encoding type for levels.");
+  }
+}
+
+int LevelEncoder::MaxBufferSize(
+    Encoding::type encoding, int16_t max_level, int num_buffered_values) {
+  int bit_width = BitUtil::Log2(max_level + 1);
+  int num_bytes = 0;
+  switch (encoding) {
+    case Encoding::RLE: {
+      // TODO: Due to the way we currently check if the buffer is full enough,
+      // we need to have MinBufferSize as head room.
+      num_bytes = RleEncoder::MaxBufferSize(bit_width, num_buffered_values) +
+                  RleEncoder::MinBufferSize(bit_width);
+      break;
+    }
+    case Encoding::BIT_PACKED: {
+      num_bytes = BitUtil::Ceil(num_buffered_values * bit_width, 8);
+      break;
+    }
+    default:
+      throw ParquetException("Unknown encoding type for levels.");
+  }
+  return num_bytes;
+}
+
+int LevelEncoder::Encode(int batch_size, const int16_t* levels) {
+  int num_encoded = 0;
+  if (!rle_encoder_ && !bit_packed_encoder_) {
+    throw ParquetException("Level encoders are not initialized.");
+  }
+
+  if (encoding_ == Encoding::RLE) {
+    for (int i = 0; i < batch_size; ++i) {
+      if (!rle_encoder_->Put(*(levels + i))) { break; }
+      ++num_encoded;
+    }
+    rle_encoder_->Flush();
+    rle_length_ = rle_encoder_->len();
+  } else {
+    for (int i = 0; i < batch_size; ++i) {
+      if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) { break; }
+      ++num_encoded;
+    }
+    bit_packed_encoder_->Flush();
+  }
+  return num_encoded;
+}
+
+LevelDecoder::LevelDecoder()
+    : num_values_remaining_(0) {}
+
+LevelDecoder::~LevelDecoder() {}
+
+int LevelDecoder::SetData(Encoding::type encoding, int16_t max_level,
+    int num_buffered_values, const uint8_t* data) {
+  uint32_t num_bytes = 0;
+  encoding_ = encoding;
+  num_values_remaining_ = num_buffered_values;
+  bit_width_ = BitUtil::Log2(max_level + 1);
+  switch (encoding) {
+    case Encoding::RLE: {
+      num_bytes = *reinterpret_cast<const uint32_t*>(data);
+      const uint8_t* decoder_data = data + sizeof(uint32_t);
+      if (!rle_decoder_) {
+        rle_decoder_.reset(new RleDecoder(decoder_data, num_bytes, bit_width_));
+      } else {
+        rle_decoder_->Reset(decoder_data, num_bytes, bit_width_);
+      }
+      return sizeof(uint32_t) + num_bytes;
+    }
+    case Encoding::BIT_PACKED: {
+      num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8);
+      if (!bit_packed_decoder_) {
+        bit_packed_decoder_.reset(new BitReader(data, num_bytes));
+      } else {
+        bit_packed_decoder_->Reset(data, num_bytes);
+      }
+      return num_bytes;
+    }
+    default:
+      throw ParquetException("Unknown encoding type for levels.");
+  }
+  return -1;
+}
+
+int LevelDecoder::Decode(int batch_size, int16_t* levels) {
+  int num_decoded = 0;
+
+  int num_values = std::min(num_values_remaining_, batch_size);
+  if (encoding_ == Encoding::RLE) {
+    num_decoded = rle_decoder_->GetBatch(levels, num_values);
+  } else {
+    for (int i = 0; i < num_values; ++i) {
+      if (!bit_packed_decoder_->GetValue(bit_width_, levels + i)) { break; }
+      ++num_decoded;
+    }
+  }
+  num_values_remaining_ -= num_decoded;
+  return num_decoded;
+}
+
+}  // namespace parquet

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3ca5a703/src/parquet/column/levels.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/levels.h b/src/parquet/column/levels.h
index ce751d0..c57ca2f 100644
--- a/src/parquet/column/levels.h
+++ b/src/parquet/column/levels.h
@@ -23,79 +23,28 @@
 
 #include "parquet/exception.h"
 #include "parquet/types.h"
-#include "parquet/util/rle-encoding.h"
 
 namespace parquet {
 
+class BitReader;
+class BitWriter;
+class RleDecoder;
+class RleEncoder;
+
 class LevelEncoder {
  public:
-  LevelEncoder() {}
+  LevelEncoder();
+  ~LevelEncoder();
 
   static int MaxBufferSize(
-      Encoding::type encoding, int16_t max_level, int num_buffered_values) {
-    int bit_width = BitUtil::Log2(max_level + 1);
-    int num_bytes = 0;
-    switch (encoding) {
-      case Encoding::RLE: {
-        // TODO: Due to the way we currently check if the buffer is full enough,
-        // we need to have MinBufferSize as head room.
-        num_bytes = RleEncoder::MaxBufferSize(bit_width, num_buffered_values) +
-                    RleEncoder::MinBufferSize(bit_width);
-        break;
-      }
-      case Encoding::BIT_PACKED: {
-        num_bytes = BitUtil::Ceil(num_buffered_values * bit_width, 8);
-        break;
-      }
-      default:
-        throw ParquetException("Unknown encoding type for levels.");
-    }
-    return num_bytes;
-  }
+      Encoding::type encoding, int16_t max_level, int num_buffered_values);
 
   // Initialize the LevelEncoder.
   void Init(Encoding::type encoding, int16_t max_level, int num_buffered_values,
-      uint8_t* data, int data_size) {
-    bit_width_ = BitUtil::Log2(max_level + 1);
-    encoding_ = encoding;
-    switch (encoding) {
-      case Encoding::RLE: {
-        rle_encoder_.reset(new RleEncoder(data, data_size, bit_width_));
-        break;
-      }
-      case Encoding::BIT_PACKED: {
-        int num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8);
-        bit_packed_encoder_.reset(new BitWriter(data, num_bytes));
-        break;
-      }
-      default:
-        throw ParquetException("Unknown encoding type for levels.");
-    }
-  }
+      uint8_t* data, int data_size);
 
   // Encodes a batch of levels from an array and returns the number of levels encoded
-  int Encode(int batch_size, const int16_t* levels) {
-    int num_encoded = 0;
-    if (!rle_encoder_ && !bit_packed_encoder_) {
-      throw ParquetException("Level encoders are not initialized.");
-    }
-
-    if (encoding_ == Encoding::RLE) {
-      for (int i = 0; i < batch_size; ++i) {
-        if (!rle_encoder_->Put(*(levels + i))) { break; }
-        ++num_encoded;
-      }
-      rle_encoder_->Flush();
-      rle_length_ = rle_encoder_->len();
-    } else {
-      for (int i = 0; i < batch_size; ++i) {
-        if (!bit_packed_encoder_->PutValue(*(levels + i), bit_width_)) { break; }
-        ++num_encoded;
-      }
-      bit_packed_encoder_->Flush();
-    }
-    return num_encoded;
-  }
+  int Encode(int batch_size, const int16_t* levels);
 
   int32_t len() {
     if (encoding_ != Encoding::RLE) {
@@ -114,58 +63,16 @@ class LevelEncoder {
 
 class LevelDecoder {
  public:
-  LevelDecoder() : num_values_remaining_(0) {}
+  LevelDecoder();
+  ~LevelDecoder();
 
   // Initialize the LevelDecoder state with new data
   // and return the number of bytes consumed
   int SetData(Encoding::type encoding, int16_t max_level, int num_buffered_values,
-      const uint8_t* data) {
-    uint32_t num_bytes = 0;
-    encoding_ = encoding;
-    num_values_remaining_ = num_buffered_values;
-    bit_width_ = BitUtil::Log2(max_level + 1);
-    switch (encoding) {
-      case Encoding::RLE: {
-        num_bytes = *reinterpret_cast<const uint32_t*>(data);
-        const uint8_t* decoder_data = data + sizeof(uint32_t);
-        if (!rle_decoder_) {
-          rle_decoder_.reset(new RleDecoder(decoder_data, num_bytes, bit_width_));
-        } else {
-          rle_decoder_->Reset(decoder_data, num_bytes, bit_width_);
-        }
-        return sizeof(uint32_t) + num_bytes;
-      }
-      case Encoding::BIT_PACKED: {
-        num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8);
-        if (!bit_packed_decoder_) {
-          bit_packed_decoder_.reset(new BitReader(data, num_bytes));
-        } else {
-          bit_packed_decoder_->Reset(data, num_bytes);
-        }
-        return num_bytes;
-      }
-      default:
-        throw ParquetException("Unknown encoding type for levels.");
-    }
-    return -1;
-  }
+      const uint8_t* data);
 
   // Decodes a batch of levels into an array and returns the number of levels decoded
-  int Decode(int batch_size, int16_t* levels) {
-    int num_decoded = 0;
-
-    int num_values = std::min(num_values_remaining_, batch_size);
-    if (encoding_ == Encoding::RLE) {
-      num_decoded = rle_decoder_->GetBatch(levels, num_values);
-    } else {
-      for (int i = 0; i < num_values; ++i) {
-        if (!bit_packed_decoder_->GetValue(bit_width_, levels + i)) { break; }
-        ++num_decoded;
-      }
-    }
-    num_values_remaining_ -= num_decoded;
-    return num_decoded;
-  }
+  int Decode(int batch_size, int16_t* levels);
 
  private:
   int bit_width_;

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3ca5a703/src/parquet/public-api-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/public-api-test.cc b/src/parquet/public-api-test.cc
index 1dc7621..e307f52 100644
--- a/src/parquet/public-api-test.cc
+++ b/src/parquet/public-api-test.cc
@@ -20,6 +20,7 @@
 #include "parquet/api/io.h"
 #include "parquet/api/reader.h"
 #include "parquet/api/schema.h"
+#include "parquet/api/writer.h"
 
 namespace parquet {
 
@@ -29,4 +30,10 @@ TEST(TestPublicAPI, DoesNotIncludeThrift) {
 #endif
 }
 
+TEST(TestPublicAPI, DoesNotExportDCHECK) {
+#ifdef DCHECK
+  FAIL() << "parquet/util/logging.h should not be transitively included";
+#endif
+}
+
 }  // namespace parquet

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3ca5a703/src/parquet/util/input.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/input.cc b/src/parquet/util/input.cc
index 73ca8a5..e1659f7 100644
--- a/src/parquet/util/input.cc
+++ b/src/parquet/util/input.cc
@@ -24,6 +24,7 @@
 
 #include "parquet/exception.h"
 #include "parquet/util/buffer.h"
+#include "parquet/util/logging.h"
 
 namespace parquet {
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3ca5a703/src/parquet/util/mem-allocator.h
----------------------------------------------------------------------
diff --git a/src/parquet/util/mem-allocator.h b/src/parquet/util/mem-allocator.h
index eb68f02..3f387c3 100644
--- a/src/parquet/util/mem-allocator.h
+++ b/src/parquet/util/mem-allocator.h
@@ -18,8 +18,7 @@
 #ifndef PARQUET_UTIL_MEMORY_POOL_H
 #define PARQUET_UTIL_MEMORY_POOL_H
 
-#include "parquet/util/logging.h"
-#include "parquet/util/bit-util.h"
+#include <cstdint>
 
 namespace parquet {
 

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/3ca5a703/src/parquet/util/output.cc
----------------------------------------------------------------------
diff --git a/src/parquet/util/output.cc b/src/parquet/util/output.cc
index 4f024a5..6fc4ed8 100644
--- a/src/parquet/util/output.cc
+++ b/src/parquet/util/output.cc
@@ -22,6 +22,7 @@
 
 #include "parquet/exception.h"
 #include "parquet/util/buffer.h"
+#include "parquet/util/logging.h"
 
 namespace parquet {
 


Mime
View raw message