Return-Path: X-Original-To: apmail-parquet-commits-archive@minotaur.apache.org Delivered-To: apmail-parquet-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id DE19018FB6 for ; Fri, 8 Jan 2016 23:51:52 +0000 (UTC) Received: (qmail 31300 invoked by uid 500); 8 Jan 2016 23:51:52 -0000 Delivered-To: apmail-parquet-commits-archive@parquet.apache.org Received: (qmail 31251 invoked by uid 500); 8 Jan 2016 23:51:52 -0000 Mailing-List: contact commits-help@parquet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@parquet.apache.org Delivered-To: mailing list commits@parquet.apache.org Received: (qmail 31139 invoked by uid 99); 8 Jan 2016 23:51:52 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 08 Jan 2016 23:51:52 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 86E9CE05D9; Fri, 8 Jan 2016 23:51:52 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: nong@apache.org To: commits@parquet.apache.org Date: Fri, 08 Jan 2016 23:51:55 -0000 Message-Id: <3aa328ae373445cd9ae05355dd48c4e5@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [4/7] parquet-cpp git commit: PARQUET-416: C++11 compilation, code reorg, libparquet and installation targets http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/generated/gen-cpp/parquet_types.h ---------------------------------------------------------------------- diff --git a/generated/gen-cpp/parquet_types.h b/generated/gen-cpp/parquet_types.h deleted file mode 100644 index 4360d02..0000000 --- a/generated/gen-cpp/parquet_types.h +++ /dev/null @@ -1,1123 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.9.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -#ifndef parquet_TYPES_H -#define parquet_TYPES_H - -#include -#include -#include -#include - - - -namespace parquet { - -struct Type { - enum type { - BOOLEAN = 0, - INT32 = 1, - INT64 = 2, - INT96 = 3, - FLOAT = 4, - DOUBLE = 5, - BYTE_ARRAY = 6, - FIXED_LEN_BYTE_ARRAY = 7 - }; -}; - -extern const std::map _Type_VALUES_TO_NAMES; - -struct ConvertedType { - enum type { - UTF8 = 0, - MAP = 1, - MAP_KEY_VALUE = 2, - LIST = 3, - ENUM = 4, - DECIMAL = 5 - }; -}; - -extern const std::map _ConvertedType_VALUES_TO_NAMES; - -struct FieldRepetitionType { - enum type { - REQUIRED = 0, - OPTIONAL = 1, - REPEATED = 2 - }; -}; - -extern const std::map _FieldRepetitionType_VALUES_TO_NAMES; - -struct Encoding { - enum type { - PLAIN = 0, - PLAIN_DICTIONARY = 2, - RLE = 3, - BIT_PACKED = 4, - DELTA_BINARY_PACKED = 5, - DELTA_LENGTH_BYTE_ARRAY = 6, - DELTA_BYTE_ARRAY = 7, - RLE_DICTIONARY = 8 - }; -}; - -extern const std::map _Encoding_VALUES_TO_NAMES; - -struct CompressionCodec { - enum type { - UNCOMPRESSED = 0, - SNAPPY = 1, - GZIP = 2, - LZO = 3 - }; -}; - -extern const std::map _CompressionCodec_VALUES_TO_NAMES; - -struct PageType { - enum type { - DATA_PAGE = 0, - INDEX_PAGE = 1, - DICTIONARY_PAGE = 2, - DATA_PAGE_V2 = 3 - }; -}; - -extern const std::map _PageType_VALUES_TO_NAMES; - -typedef struct _Statistics__isset { - _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false) {} - bool max; - bool min; - bool null_count; - bool distinct_count; -} _Statistics__isset; - -class Statistics { - public: - - static const char* ascii_fingerprint; // = "CE004821871820DD79A8FD98BB101F6D"; - static const uint8_t binary_fingerprint[16]; // = {0xCE,0x00,0x48,0x21,0x87,0x18,0x20,0xDD,0x79,0xA8,0xFD,0x98,0xBB,0x10,0x1F,0x6D}; - - Statistics() : max(), min(), null_count(0), distinct_count(0) { - } - - virtual ~Statistics() throw() {} - - std::string max; - std::string min; - int64_t null_count; - int64_t distinct_count; - - _Statistics__isset __isset; - - void __set_max(const std::string& val) { - max = val; - __isset.max = true; - } - - void __set_min(const std::string& val) { - min = val; - __isset.min = true; - } - - void __set_null_count(const int64_t val) { - null_count = val; - __isset.null_count = true; - } - - void __set_distinct_count(const int64_t val) { - distinct_count = val; - __isset.distinct_count = true; - } - - bool operator == (const Statistics & rhs) const - { - if (__isset.max != rhs.__isset.max) - return false; - else if (__isset.max && !(max == rhs.max)) - return false; - if (__isset.min != rhs.__isset.min) - return false; - else if (__isset.min && !(min == rhs.min)) - return false; - if (__isset.null_count != rhs.__isset.null_count) - return false; - else if (__isset.null_count && !(null_count == rhs.null_count)) - return false; - if (__isset.distinct_count != rhs.__isset.distinct_count) - return false; - else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count)) - return false; - return true; - } - bool operator != (const Statistics &rhs) const { - return !(*this == rhs); - } - - bool operator < (const Statistics & ) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - -}; - -void swap(Statistics &a, Statistics &b); - -typedef struct _SchemaElement__isset { - _SchemaElement__isset() : type(false), type_length(false), repetition_type(false), num_children(false), converted_type(false), scale(false), precision(false) {} - bool type; - bool type_length; - bool repetition_type; - bool num_children; - bool converted_type; - bool scale; - bool precision; -} _SchemaElement__isset; - -class SchemaElement { - public: - - static const char* ascii_fingerprint; // = "388A784401753800444CFEAC8BC1B1A1"; - static const uint8_t binary_fingerprint[16]; // = {0x38,0x8A,0x78,0x44,0x01,0x75,0x38,0x00,0x44,0x4C,0xFE,0xAC,0x8B,0xC1,0xB1,0xA1}; - - SchemaElement() : type((Type::type)0), type_length(0), repetition_type((FieldRepetitionType::type)0), name(), num_children(0), converted_type((ConvertedType::type)0), scale(0), precision(0) { - } - - virtual ~SchemaElement() throw() {} - - Type::type type; - int32_t type_length; - FieldRepetitionType::type repetition_type; - std::string name; - int32_t num_children; - ConvertedType::type converted_type; - int32_t scale; - int32_t precision; - - _SchemaElement__isset __isset; - - void __set_type(const Type::type val) { - type = val; - __isset.type = true; - } - - void __set_type_length(const int32_t val) { - type_length = val; - __isset.type_length = true; - } - - void __set_repetition_type(const FieldRepetitionType::type val) { - repetition_type = val; - __isset.repetition_type = true; - } - - void __set_name(const std::string& val) { - name = val; - } - - void __set_num_children(const int32_t val) { - num_children = val; - __isset.num_children = true; - } - - void __set_converted_type(const ConvertedType::type val) { - converted_type = val; - __isset.converted_type = true; - } - - void __set_scale(const int32_t val) { - scale = val; - __isset.scale = true; - } - - void __set_precision(const int32_t val) { - precision = val; - __isset.precision = true; - } - - bool operator == (const SchemaElement & rhs) const - { - if (__isset.type != rhs.__isset.type) - return false; - else if (__isset.type && !(type == rhs.type)) - return false; - if (__isset.type_length != rhs.__isset.type_length) - return false; - else if (__isset.type_length && !(type_length == rhs.type_length)) - return false; - if (__isset.repetition_type != rhs.__isset.repetition_type) - return false; - else if (__isset.repetition_type && !(repetition_type == rhs.repetition_type)) - return false; - if (!(name == rhs.name)) - return false; - if (__isset.num_children != rhs.__isset.num_children) - return false; - else if (__isset.num_children && !(num_children == rhs.num_children)) - return false; - if (__isset.converted_type != rhs.__isset.converted_type) - return false; - else if (__isset.converted_type && !(converted_type == rhs.converted_type)) - return false; - if (__isset.scale != rhs.__isset.scale) - return false; - else if (__isset.scale && !(scale == rhs.scale)) - return false; - if (__isset.precision != rhs.__isset.precision) - return false; - else if (__isset.precision && !(precision == rhs.precision)) - return false; - return true; - } - bool operator != (const SchemaElement &rhs) const { - return !(*this == rhs); - } - - bool operator < (const SchemaElement & ) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - -}; - -void swap(SchemaElement &a, SchemaElement &b); - -typedef struct _DataPageHeader__isset { - _DataPageHeader__isset() : statistics(false) {} - bool statistics; -} _DataPageHeader__isset; - -class DataPageHeader { - public: - - static const char* ascii_fingerprint; // = "5FC1792B0483E9C984475384165040B1"; - static const uint8_t binary_fingerprint[16]; // = {0x5F,0xC1,0x79,0x2B,0x04,0x83,0xE9,0xC9,0x84,0x47,0x53,0x84,0x16,0x50,0x40,0xB1}; - - DataPageHeader() : num_values(0), encoding((Encoding::type)0), definition_level_encoding((Encoding::type)0), repetition_level_encoding((Encoding::type)0) { - } - - virtual ~DataPageHeader() throw() {} - - int32_t num_values; - Encoding::type encoding; - Encoding::type definition_level_encoding; - Encoding::type repetition_level_encoding; - Statistics statistics; - - _DataPageHeader__isset __isset; - - void __set_num_values(const int32_t val) { - num_values = val; - } - - void __set_encoding(const Encoding::type val) { - encoding = val; - } - - void __set_definition_level_encoding(const Encoding::type val) { - definition_level_encoding = val; - } - - void __set_repetition_level_encoding(const Encoding::type val) { - repetition_level_encoding = val; - } - - void __set_statistics(const Statistics& val) { - statistics = val; - __isset.statistics = true; - } - - bool operator == (const DataPageHeader & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(definition_level_encoding == rhs.definition_level_encoding)) - return false; - if (!(repetition_level_encoding == rhs.repetition_level_encoding)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - return true; - } - bool operator != (const DataPageHeader &rhs) const { - return !(*this == rhs); - } - - bool operator < (const DataPageHeader & ) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - -}; - -void swap(DataPageHeader &a, DataPageHeader &b); - - -class IndexPageHeader { - public: - - static const char* ascii_fingerprint; // = "99914B932BD37A50B983C5E7C90AE93B"; - static const uint8_t binary_fingerprint[16]; // = {0x99,0x91,0x4B,0x93,0x2B,0xD3,0x7A,0x50,0xB9,0x83,0xC5,0xE7,0xC9,0x0A,0xE9,0x3B}; - - IndexPageHeader() { - } - - virtual ~IndexPageHeader() throw() {} - - - bool operator == (const IndexPageHeader & /* rhs */) const - { - return true; - } - bool operator != (const IndexPageHeader &rhs) const { - return !(*this == rhs); - } - - bool operator < (const IndexPageHeader & ) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - -}; - -void swap(IndexPageHeader &a, IndexPageHeader &b); - -typedef struct _DictionaryPageHeader__isset { - _DictionaryPageHeader__isset() : is_sorted(false) {} - bool is_sorted; -} _DictionaryPageHeader__isset; - -class DictionaryPageHeader { - public: - - static const char* ascii_fingerprint; // = "B149E4528254D495610C22AE4BD539C5"; - static const uint8_t binary_fingerprint[16]; // = {0xB1,0x49,0xE4,0x52,0x82,0x54,0xD4,0x95,0x61,0x0C,0x22,0xAE,0x4B,0xD5,0x39,0xC5}; - - DictionaryPageHeader() : num_values(0), encoding((Encoding::type)0), is_sorted(0) { - } - - virtual ~DictionaryPageHeader() throw() {} - - int32_t num_values; - Encoding::type encoding; - bool is_sorted; - - _DictionaryPageHeader__isset __isset; - - void __set_num_values(const int32_t val) { - num_values = val; - } - - void __set_encoding(const Encoding::type val) { - encoding = val; - } - - void __set_is_sorted(const bool val) { - is_sorted = val; - __isset.is_sorted = true; - } - - bool operator == (const DictionaryPageHeader & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (__isset.is_sorted != rhs.__isset.is_sorted) - return false; - else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted)) - return false; - return true; - } - bool operator != (const DictionaryPageHeader &rhs) const { - return !(*this == rhs); - } - - bool operator < (const DictionaryPageHeader & ) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - -}; - -void swap(DictionaryPageHeader &a, DictionaryPageHeader &b); - -typedef struct _DataPageHeaderV2__isset { - _DataPageHeaderV2__isset() : is_compressed(true), statistics(false) {} - bool is_compressed; - bool statistics; -} _DataPageHeaderV2__isset; - -class DataPageHeaderV2 { - public: - - static const char* ascii_fingerprint; // = "69FF2F6BD1A443440D5E46ABA5A3A919"; - static const uint8_t binary_fingerprint[16]; // = {0x69,0xFF,0x2F,0x6B,0xD1,0xA4,0x43,0x44,0x0D,0x5E,0x46,0xAB,0xA5,0xA3,0xA9,0x19}; - - DataPageHeaderV2() : num_values(0), num_nulls(0), num_rows(0), encoding((Encoding::type)0), definition_levels_byte_length(0), repetition_levels_byte_length(0), is_compressed(true) { - } - - virtual ~DataPageHeaderV2() throw() {} - - int32_t num_values; - int32_t num_nulls; - int32_t num_rows; - Encoding::type encoding; - int32_t definition_levels_byte_length; - int32_t repetition_levels_byte_length; - bool is_compressed; - Statistics statistics; - - _DataPageHeaderV2__isset __isset; - - void __set_num_values(const int32_t val) { - num_values = val; - } - - void __set_num_nulls(const int32_t val) { - num_nulls = val; - } - - void __set_num_rows(const int32_t val) { - num_rows = val; - } - - void __set_encoding(const Encoding::type val) { - encoding = val; - } - - void __set_definition_levels_byte_length(const int32_t val) { - definition_levels_byte_length = val; - } - - void __set_repetition_levels_byte_length(const int32_t val) { - repetition_levels_byte_length = val; - } - - void __set_is_compressed(const bool val) { - is_compressed = val; - __isset.is_compressed = true; - } - - void __set_statistics(const Statistics& val) { - statistics = val; - __isset.statistics = true; - } - - bool operator == (const DataPageHeaderV2 & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(num_nulls == rhs.num_nulls)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(definition_levels_byte_length == rhs.definition_levels_byte_length)) - return false; - if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length)) - return false; - if (__isset.is_compressed != rhs.__isset.is_compressed) - return false; - else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - return true; - } - bool operator != (const DataPageHeaderV2 &rhs) const { - return !(*this == rhs); - } - - bool operator < (const DataPageHeaderV2 & ) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - -}; - -void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b); - -typedef struct _PageHeader__isset { - _PageHeader__isset() : crc(false), data_page_header(false), index_page_header(false), dictionary_page_header(false), data_page_header_v2(false) {} - bool crc; - bool data_page_header; - bool index_page_header; - bool dictionary_page_header; - bool data_page_header_v2; -} _PageHeader__isset; - -class PageHeader { - public: - - static const char* ascii_fingerprint; // = "B5BD2BDF3756C883A58B30B9C9F204A0"; - static const uint8_t binary_fingerprint[16]; // = {0xB5,0xBD,0x2B,0xDF,0x37,0x56,0xC8,0x83,0xA5,0x8B,0x30,0xB9,0xC9,0xF2,0x04,0xA0}; - - PageHeader() : type((PageType::type)0), uncompressed_page_size(0), compressed_page_size(0), crc(0) { - } - - virtual ~PageHeader() throw() {} - - PageType::type type; - int32_t uncompressed_page_size; - int32_t compressed_page_size; - int32_t crc; - DataPageHeader data_page_header; - IndexPageHeader index_page_header; - DictionaryPageHeader dictionary_page_header; - DataPageHeaderV2 data_page_header_v2; - - _PageHeader__isset __isset; - - void __set_type(const PageType::type val) { - type = val; - } - - void __set_uncompressed_page_size(const int32_t val) { - uncompressed_page_size = val; - } - - void __set_compressed_page_size(const int32_t val) { - compressed_page_size = val; - } - - void __set_crc(const int32_t val) { - crc = val; - __isset.crc = true; - } - - void __set_data_page_header(const DataPageHeader& val) { - data_page_header = val; - __isset.data_page_header = true; - } - - void __set_index_page_header(const IndexPageHeader& val) { - index_page_header = val; - __isset.index_page_header = true; - } - - void __set_dictionary_page_header(const DictionaryPageHeader& val) { - dictionary_page_header = val; - __isset.dictionary_page_header = true; - } - - void __set_data_page_header_v2(const DataPageHeaderV2& val) { - data_page_header_v2 = val; - __isset.data_page_header_v2 = true; - } - - bool operator == (const PageHeader & rhs) const - { - if (!(type == rhs.type)) - return false; - if (!(uncompressed_page_size == rhs.uncompressed_page_size)) - return false; - if (!(compressed_page_size == rhs.compressed_page_size)) - return false; - if (__isset.crc != rhs.__isset.crc) - return false; - else if (__isset.crc && !(crc == rhs.crc)) - return false; - if (__isset.data_page_header != rhs.__isset.data_page_header) - return false; - else if (__isset.data_page_header && !(data_page_header == rhs.data_page_header)) - return false; - if (__isset.index_page_header != rhs.__isset.index_page_header) - return false; - else if (__isset.index_page_header && !(index_page_header == rhs.index_page_header)) - return false; - if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header) - return false; - else if (__isset.dictionary_page_header && !(dictionary_page_header == rhs.dictionary_page_header)) - return false; - if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2) - return false; - else if (__isset.data_page_header_v2 && !(data_page_header_v2 == rhs.data_page_header_v2)) - return false; - return true; - } - bool operator != (const PageHeader &rhs) const { - return !(*this == rhs); - } - - bool operator < (const PageHeader & ) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - -}; - -void swap(PageHeader &a, PageHeader &b); - -typedef struct _KeyValue__isset { - _KeyValue__isset() : value(false) {} - bool value; -} _KeyValue__isset; - -class KeyValue { - public: - - static const char* ascii_fingerprint; // = "5B708A954C550ECA9C1A49D3C5CAFAB9"; - static const uint8_t binary_fingerprint[16]; // = {0x5B,0x70,0x8A,0x95,0x4C,0x55,0x0E,0xCA,0x9C,0x1A,0x49,0xD3,0xC5,0xCA,0xFA,0xB9}; - - KeyValue() : key(), value() { - } - - virtual ~KeyValue() throw() {} - - std::string key; - std::string value; - - _KeyValue__isset __isset; - - void __set_key(const std::string& val) { - key = val; - } - - void __set_value(const std::string& val) { - value = val; - __isset.value = true; - } - - bool operator == (const KeyValue & rhs) const - { - if (!(key == rhs.key)) - return false; - if (__isset.value != rhs.__isset.value) - return false; - else if (__isset.value && !(value == rhs.value)) - return false; - return true; - } - bool operator != (const KeyValue &rhs) const { - return !(*this == rhs); - } - - bool operator < (const KeyValue & ) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - -}; - -void swap(KeyValue &a, KeyValue &b); - - -class SortingColumn { - public: - - static const char* ascii_fingerprint; // = "F079C2D58A783AD90F9BE05D10DBBC6F"; - static const uint8_t binary_fingerprint[16]; // = {0xF0,0x79,0xC2,0xD5,0x8A,0x78,0x3A,0xD9,0x0F,0x9B,0xE0,0x5D,0x10,0xDB,0xBC,0x6F}; - - SortingColumn() : column_idx(0), descending(0), nulls_first(0) { - } - - virtual ~SortingColumn() throw() {} - - int32_t column_idx; - bool descending; - bool nulls_first; - - void __set_column_idx(const int32_t val) { - column_idx = val; - } - - void __set_descending(const bool val) { - descending = val; - } - - void __set_nulls_first(const bool val) { - nulls_first = val; - } - - bool operator == (const SortingColumn & rhs) const - { - if (!(column_idx == rhs.column_idx)) - return false; - if (!(descending == rhs.descending)) - return false; - if (!(nulls_first == rhs.nulls_first)) - return false; - return true; - } - bool operator != (const SortingColumn &rhs) const { - return !(*this == rhs); - } - - bool operator < (const SortingColumn & ) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - -}; - -void swap(SortingColumn &a, SortingColumn &b); - -typedef struct _ColumnMetaData__isset { - _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false) {} - bool key_value_metadata; - bool index_page_offset; - bool dictionary_page_offset; - bool statistics; -} _ColumnMetaData__isset; - -class ColumnMetaData { - public: - - static const char* ascii_fingerprint; // = "1AF797732BCB4465C6314FB29B86638D"; - static const uint8_t binary_fingerprint[16]; // = {0x1A,0xF7,0x97,0x73,0x2B,0xCB,0x44,0x65,0xC6,0x31,0x4F,0xB2,0x9B,0x86,0x63,0x8D}; - - ColumnMetaData() : type((Type::type)0), codec((CompressionCodec::type)0), num_values(0), total_uncompressed_size(0), total_compressed_size(0), data_page_offset(0), index_page_offset(0), dictionary_page_offset(0) { - } - - virtual ~ColumnMetaData() throw() {} - - Type::type type; - std::vector encodings; - std::vector path_in_schema; - CompressionCodec::type codec; - int64_t num_values; - int64_t total_uncompressed_size; - int64_t total_compressed_size; - std::vector key_value_metadata; - int64_t data_page_offset; - int64_t index_page_offset; - int64_t dictionary_page_offset; - Statistics statistics; - - _ColumnMetaData__isset __isset; - - void __set_type(const Type::type val) { - type = val; - } - - void __set_encodings(const std::vector & val) { - encodings = val; - } - - void __set_path_in_schema(const std::vector & val) { - path_in_schema = val; - } - - void __set_codec(const CompressionCodec::type val) { - codec = val; - } - - void __set_num_values(const int64_t val) { - num_values = val; - } - - void __set_total_uncompressed_size(const int64_t val) { - total_uncompressed_size = val; - } - - void __set_total_compressed_size(const int64_t val) { - total_compressed_size = val; - } - - void __set_key_value_metadata(const std::vector & val) { - key_value_metadata = val; - __isset.key_value_metadata = true; - } - - void __set_data_page_offset(const int64_t val) { - data_page_offset = val; - } - - void __set_index_page_offset(const int64_t val) { - index_page_offset = val; - __isset.index_page_offset = true; - } - - void __set_dictionary_page_offset(const int64_t val) { - dictionary_page_offset = val; - __isset.dictionary_page_offset = true; - } - - void __set_statistics(const Statistics& val) { - statistics = val; - __isset.statistics = true; - } - - bool operator == (const ColumnMetaData & rhs) const - { - if (!(type == rhs.type)) - return false; - if (!(encodings == rhs.encodings)) - return false; - if (!(path_in_schema == rhs.path_in_schema)) - return false; - if (!(codec == rhs.codec)) - return false; - if (!(num_values == rhs.num_values)) - return false; - if (!(total_uncompressed_size == rhs.total_uncompressed_size)) - return false; - if (!(total_compressed_size == rhs.total_compressed_size)) - return false; - if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) - return false; - else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) - return false; - if (!(data_page_offset == rhs.data_page_offset)) - return false; - if (__isset.index_page_offset != rhs.__isset.index_page_offset) - return false; - else if (__isset.index_page_offset && !(index_page_offset == rhs.index_page_offset)) - return false; - if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset) - return false; - else if (__isset.dictionary_page_offset && !(dictionary_page_offset == rhs.dictionary_page_offset)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - return true; - } - bool operator != (const ColumnMetaData &rhs) const { - return !(*this == rhs); - } - - bool operator < (const ColumnMetaData & ) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - -}; - -void swap(ColumnMetaData &a, ColumnMetaData &b); - -typedef struct _ColumnChunk__isset { - _ColumnChunk__isset() : file_path(false), meta_data(false) {} - bool file_path; - bool meta_data; -} _ColumnChunk__isset; - -class ColumnChunk { - public: - - static const char* ascii_fingerprint; // = "169FC47057EF3D82E2FACDDEC2641AE8"; - static const uint8_t binary_fingerprint[16]; // = {0x16,0x9F,0xC4,0x70,0x57,0xEF,0x3D,0x82,0xE2,0xFA,0xCD,0xDE,0xC2,0x64,0x1A,0xE8}; - - ColumnChunk() : file_path(), file_offset(0) { - } - - virtual ~ColumnChunk() throw() {} - - std::string file_path; - int64_t file_offset; - ColumnMetaData meta_data; - - _ColumnChunk__isset __isset; - - void __set_file_path(const std::string& val) { - file_path = val; - __isset.file_path = true; - } - - void __set_file_offset(const int64_t val) { - file_offset = val; - } - - void __set_meta_data(const ColumnMetaData& val) { - meta_data = val; - __isset.meta_data = true; - } - - bool operator == (const ColumnChunk & rhs) const - { - if (__isset.file_path != rhs.__isset.file_path) - return false; - else if (__isset.file_path && !(file_path == rhs.file_path)) - return false; - if (!(file_offset == rhs.file_offset)) - return false; - if (__isset.meta_data != rhs.__isset.meta_data) - return false; - else if (__isset.meta_data && !(meta_data == rhs.meta_data)) - return false; - return true; - } - bool operator != (const ColumnChunk &rhs) const { - return !(*this == rhs); - } - - bool operator < (const ColumnChunk & ) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - -}; - -void swap(ColumnChunk &a, ColumnChunk &b); - -typedef struct _RowGroup__isset { - _RowGroup__isset() : sorting_columns(false) {} - bool sorting_columns; -} _RowGroup__isset; - -class RowGroup { - public: - - static const char* ascii_fingerprint; // = "DC7968627FA826DDC4C6C9BE773586C9"; - static const uint8_t binary_fingerprint[16]; // = {0xDC,0x79,0x68,0x62,0x7F,0xA8,0x26,0xDD,0xC4,0xC6,0xC9,0xBE,0x77,0x35,0x86,0xC9}; - - RowGroup() : total_byte_size(0), num_rows(0) { - } - - virtual ~RowGroup() throw() {} - - std::vector columns; - int64_t total_byte_size; - int64_t num_rows; - std::vector sorting_columns; - - _RowGroup__isset __isset; - - void __set_columns(const std::vector & val) { - columns = val; - } - - void __set_total_byte_size(const int64_t val) { - total_byte_size = val; - } - - void __set_num_rows(const int64_t val) { - num_rows = val; - } - - void __set_sorting_columns(const std::vector & val) { - sorting_columns = val; - __isset.sorting_columns = true; - } - - bool operator == (const RowGroup & rhs) const - { - if (!(columns == rhs.columns)) - return false; - if (!(total_byte_size == rhs.total_byte_size)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (__isset.sorting_columns != rhs.__isset.sorting_columns) - return false; - else if (__isset.sorting_columns && !(sorting_columns == rhs.sorting_columns)) - return false; - return true; - } - bool operator != (const RowGroup &rhs) const { - return !(*this == rhs); - } - - bool operator < (const RowGroup & ) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - -}; - -void swap(RowGroup &a, RowGroup &b); - -typedef struct _FileMetaData__isset { - _FileMetaData__isset() : key_value_metadata(false), created_by(false) {} - bool key_value_metadata; - bool created_by; -} _FileMetaData__isset; - -class FileMetaData { - public: - - static const char* ascii_fingerprint; // = "44DC7D83A66D54A7B7892A985C4125C9"; - static const uint8_t binary_fingerprint[16]; // = {0x44,0xDC,0x7D,0x83,0xA6,0x6D,0x54,0xA7,0xB7,0x89,0x2A,0x98,0x5C,0x41,0x25,0xC9}; - - FileMetaData() : version(0), num_rows(0), created_by() { - } - - virtual ~FileMetaData() throw() {} - - int32_t version; - std::vector schema; - int64_t num_rows; - std::vector row_groups; - std::vector key_value_metadata; - std::string created_by; - - _FileMetaData__isset __isset; - - void __set_version(const int32_t val) { - version = val; - } - - void __set_schema(const std::vector & val) { - schema = val; - } - - void __set_num_rows(const int64_t val) { - num_rows = val; - } - - void __set_row_groups(const std::vector & val) { - row_groups = val; - } - - void __set_key_value_metadata(const std::vector & val) { - key_value_metadata = val; - __isset.key_value_metadata = true; - } - - void __set_created_by(const std::string& val) { - created_by = val; - __isset.created_by = true; - } - - bool operator == (const FileMetaData & rhs) const - { - if (!(version == rhs.version)) - return false; - if (!(schema == rhs.schema)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (!(row_groups == rhs.row_groups)) - return false; - if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) - return false; - else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) - return false; - if (__isset.created_by != rhs.__isset.created_by) - return false; - else if (__isset.created_by && !(created_by == rhs.created_by)) - return false; - return true; - } - bool operator != (const FileMetaData &rhs) const { - return !(*this == rhs); - } - - bool operator < (const FileMetaData & ) const; - - uint32_t read(::apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; - -}; - -void swap(FileMetaData &a, FileMetaData &b); - -} // namespace - -#endif http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt deleted file mode 100644 index 82725d7..0000000 --- a/src/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright 2012 Cloudera Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -add_library(Parquet STATIC - parquet.cc -) - -add_subdirectory(compression) http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/compression/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/src/compression/CMakeLists.txt b/src/compression/CMakeLists.txt deleted file mode 100644 index c8f0d2b..0000000 --- a/src/compression/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2012 Cloudera Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -add_library(ParquetCompression STATIC - lz4-codec.cc - snappy-codec.cc -) http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/compression/codec.h ---------------------------------------------------------------------- diff --git a/src/compression/codec.h b/src/compression/codec.h deleted file mode 100644 index 4ce0139..0000000 --- a/src/compression/codec.h +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef PARQUET_COMPRESSION_CODEC_H -#define PARQUET_COMPRESSION_CODEC_H - -#include "parquet/parquet.h" - -#include -#include "gen-cpp/parquet_constants.h" -#include "gen-cpp/parquet_types.h" - -namespace parquet_cpp { - -class Codec { - public: - virtual ~Codec() {} - virtual void Decompress(int input_len, const uint8_t* input, - int output_len, uint8_t* output_buffer) = 0; - - virtual int Compress(int input_len, const uint8_t* input, - int output_buffer_len, uint8_t* output_buffer) = 0; - - virtual int MaxCompressedLen(int input_len, const uint8_t* input) = 0; - - virtual const char* name() const = 0; -}; - - -// Snappy codec. -class SnappyCodec : public Codec { - public: - virtual void Decompress(int input_len, const uint8_t* input, - int output_len, uint8_t* output_buffer); - - virtual int Compress(int input_len, const uint8_t* input, - int output_buffer_len, uint8_t* output_buffer); - - virtual int MaxCompressedLen(int input_len, const uint8_t* input); - - virtual const char* name() const { return "snappy"; } -}; - -// Lz4 codec. -class Lz4Codec : public Codec { - public: - virtual void Decompress(int input_len, const uint8_t* input, - int output_len, uint8_t* output_buffer); - - virtual int Compress(int input_len, const uint8_t* input, - int output_buffer_len, uint8_t* output_buffer); - - virtual int MaxCompressedLen(int input_len, const uint8_t* input); - - virtual const char* name() const { return "lz4"; } -}; - -} - -#endif - http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/compression/lz4-codec.cc ---------------------------------------------------------------------- diff --git a/src/compression/lz4-codec.cc b/src/compression/lz4-codec.cc deleted file mode 100644 index 8b8588c..0000000 --- a/src/compression/lz4-codec.cc +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "codec.h" - -#include - -using namespace parquet_cpp; - -void Lz4Codec::Decompress(int input_len, const uint8_t* input, - int output_len, uint8_t* output_buffer) { - int n = LZ4_uncompress(reinterpret_cast(input), - reinterpret_cast(output_buffer), output_len); - if (n != input_len) { - throw ParquetException("Corrupt lz4 compressed data."); - } -} - -int Lz4Codec::MaxCompressedLen(int input_len, const uint8_t* input) { - return LZ4_compressBound(input_len); -} - -int Lz4Codec::Compress(int input_len, const uint8_t* input, - int output_buffer_len, uint8_t* output_buffer) { - return LZ4_compress(reinterpret_cast(input), - reinterpret_cast(output_buffer), input_len); -} http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/compression/snappy-codec.cc ---------------------------------------------------------------------- diff --git a/src/compression/snappy-codec.cc b/src/compression/snappy-codec.cc deleted file mode 100644 index 96d6559..0000000 --- a/src/compression/snappy-codec.cc +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "codec.h" - -#include - -using namespace parquet_cpp; - -void SnappyCodec::Decompress(int input_len, const uint8_t* input, - int output_len, uint8_t* output_buffer) { - if (!snappy::RawUncompress(reinterpret_cast(input), - static_cast(input_len), reinterpret_cast(output_buffer))) { - throw ParquetException("Corrupt snappy compressed data."); - } -} - -int SnappyCodec::MaxCompressedLen(int input_len, const uint8_t* input) { - return snappy::MaxCompressedLength(input_len); -} - -int SnappyCodec::Compress(int input_len, const uint8_t* input, - int output_buffer_len, uint8_t* output_buffer) { - size_t output_len; - snappy::RawCompress(reinterpret_cast(input), - static_cast(input_len), reinterpret_cast(output_buffer), - &output_len); - return output_len; -} http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/bool-encoding.h ---------------------------------------------------------------------- diff --git a/src/encodings/bool-encoding.h b/src/encodings/bool-encoding.h deleted file mode 100644 index 1cccd1d..0000000 --- a/src/encodings/bool-encoding.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef PARQUET_BOOL_ENCODING_H -#define PARQUET_BOOL_ENCODING_H - -#include "encodings.h" - -namespace parquet_cpp { - -class BoolDecoder : public Decoder { - public: - BoolDecoder() : Decoder(parquet::Type::BOOLEAN, parquet::Encoding::PLAIN) { } - - virtual void SetData(int num_values, const uint8_t* data, int len) { - num_values_ = num_values; - decoder_ = impala::RleDecoder(data, len, 1); - } - - virtual int GetBool(bool* buffer, int max_values) { - max_values = std::min(max_values, num_values_); - for (int i = 0; i < max_values; ++i) { - if (!decoder_.Get(&buffer[i])) ParquetException::EofException(); - } - num_values_ -= max_values; - return max_values; - } - - private: - impala::RleDecoder decoder_; -}; - -} - -#endif - http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/delta-bit-pack-encoding.h ---------------------------------------------------------------------- diff --git a/src/encodings/delta-bit-pack-encoding.h b/src/encodings/delta-bit-pack-encoding.h deleted file mode 100644 index 12de07a..0000000 --- a/src/encodings/delta-bit-pack-encoding.h +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef PARQUET_DELTA_BIT_PACK_ENCODING_H -#define PARQUET_DELTA_BIT_PACK_ENCODING_H - -#include "encodings.h" - -namespace parquet_cpp { - -class DeltaBitPackDecoder : public Decoder { - public: - DeltaBitPackDecoder(const parquet::Type::type& type) - : Decoder(type, parquet::Encoding::DELTA_BINARY_PACKED) { - if (type != parquet::Type::INT32 && type != parquet::Type::INT64) { - throw ParquetException("Delta bit pack encoding should only be for integer data."); - } - } - - virtual void SetData(int num_values, const uint8_t* data, int len) { - num_values_ = num_values; - decoder_ = impala::BitReader(data, len); - values_current_block_ = 0; - values_current_mini_block_ = 0; - } - - virtual int GetInt32(int32_t* buffer, int max_values) { - return GetInternal(buffer, max_values); - } - - virtual int GetInt64(int64_t* buffer, int max_values) { - return GetInternal(buffer, max_values); - } - - private: - void InitBlock() { - uint64_t block_size; - if (!decoder_.GetVlqInt(&block_size)) ParquetException::EofException(); - if (!decoder_.GetVlqInt(&num_mini_blocks_)) ParquetException::EofException(); - if (!decoder_.GetVlqInt(&values_current_block_)) { - ParquetException::EofException(); - } - if (!decoder_.GetZigZagVlqInt(&last_value_)) ParquetException::EofException(); - delta_bit_widths_.resize(num_mini_blocks_); - - if (!decoder_.GetZigZagVlqInt(&min_delta_)) ParquetException::EofException(); - for (int i = 0; i < num_mini_blocks_; ++i) { - if (!decoder_.GetAligned(1, &delta_bit_widths_[i])) { - ParquetException::EofException(); - } - } - values_per_mini_block_ = block_size / num_mini_blocks_; - mini_block_idx_ = 0; - delta_bit_width_ = delta_bit_widths_[0]; - values_current_mini_block_ = values_per_mini_block_; - } - - template - int GetInternal(T* buffer, int max_values) { - max_values = std::min(max_values, num_values_); - for (int i = 0; i < max_values; ++i) { - if (UNLIKELY(values_current_mini_block_ == 0)) { - ++mini_block_idx_; - if (mini_block_idx_ < delta_bit_widths_.size()) { - delta_bit_width_ = delta_bit_widths_[mini_block_idx_]; - values_current_mini_block_ = values_per_mini_block_; - } else { - InitBlock(); - buffer[i] = last_value_; - continue; - } - } - - // TODO: the key to this algorithm is to decode the entire miniblock at once. - int64_t delta; - if (!decoder_.GetValue(delta_bit_width_, &delta)) ParquetException::EofException(); - delta += min_delta_; - last_value_ += delta; - buffer[i] = last_value_; - --values_current_mini_block_; - } - num_values_ -= max_values; - return max_values; - } - - impala::BitReader decoder_; - uint64_t values_current_block_; - uint64_t num_mini_blocks_; - uint64_t values_per_mini_block_; - uint64_t values_current_mini_block_; - - int64_t min_delta_; - int mini_block_idx_; - std::vector delta_bit_widths_; - int delta_bit_width_; - - int64_t last_value_; -}; - -} - -#endif - http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/delta-byte-array-encoding.h ---------------------------------------------------------------------- diff --git a/src/encodings/delta-byte-array-encoding.h b/src/encodings/delta-byte-array-encoding.h deleted file mode 100644 index cdbbfde..0000000 --- a/src/encodings/delta-byte-array-encoding.h +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef PARQUET_DELTA_BYTE_ARRAY_ENCODING_H -#define PARQUET_DELTA_BYTE_ARRAY_ENCODING_H - -#include "encodings.h" - -namespace parquet_cpp { - -class DeltaByteArrayDecoder : public Decoder { - public: - DeltaByteArrayDecoder() - : Decoder(parquet::Type::BYTE_ARRAY, parquet::Encoding::DELTA_BYTE_ARRAY), - prefix_len_decoder_(parquet::Type::INT32), - suffix_decoder_() { - } - - virtual void SetData(int num_values, const uint8_t* data, int len) { - num_values_ = num_values; - if (len == 0) return; - int prefix_len_length = *reinterpret_cast(data); - data += 4; - len -= 4; - prefix_len_decoder_.SetData(num_values, data, prefix_len_length); - data += prefix_len_length; - len -= prefix_len_length; - suffix_decoder_.SetData(num_values, data, len); - } - - // TODO: this doesn't work and requires memory management. We need to allocate - // new strings to store the results. - virtual int GetByteArray(ByteArray* buffer, int max_values) { - max_values = std::min(max_values, num_values_); - for (int i = 0; i < max_values; ++i) { - int prefix_len = 0; - prefix_len_decoder_.GetInt32(&prefix_len, 1); - ByteArray suffix; - suffix_decoder_.GetByteArray(&suffix, 1); - buffer[i].len = prefix_len + suffix.len; - - uint8_t* result = reinterpret_cast(malloc(buffer[i].len)); - memcpy(result, last_value_.ptr, prefix_len); - memcpy(result + prefix_len, suffix.ptr, suffix.len); - - buffer[i].ptr = result; - last_value_ = buffer[i]; - } - num_values_ -= max_values; - return max_values; - } - - private: - DeltaBitPackDecoder prefix_len_decoder_; - DeltaLengthByteArrayDecoder suffix_decoder_; - ByteArray last_value_; -}; - -} - -#endif - http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/delta-length-byte-array-encoding.h ---------------------------------------------------------------------- diff --git a/src/encodings/delta-length-byte-array-encoding.h b/src/encodings/delta-length-byte-array-encoding.h deleted file mode 100644 index d6f018c..0000000 --- a/src/encodings/delta-length-byte-array-encoding.h +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef PARQUET_DELTA_LENGTH_BYTE_ARRAY_ENCODING_H -#define PARQUET_DELTA_LENGTH_BYTE_ARRAY_ENCODING_H - -#include "encodings.h" - -namespace parquet_cpp { - -class DeltaLengthByteArrayDecoder : public Decoder { - public: - DeltaLengthByteArrayDecoder() - : Decoder(parquet::Type::BYTE_ARRAY, parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY), - len_decoder_(parquet::Type::INT32) { - } - - virtual void SetData(int num_values, const uint8_t* data, int len) { - num_values_ = num_values; - if (len == 0) return; - int total_lengths_len = *reinterpret_cast(data); - data += 4; - len_decoder_.SetData(num_values, data, total_lengths_len); - data_ = data + total_lengths_len; - len_ = len - 4 - total_lengths_len; - } - - virtual int GetByteArray(ByteArray* buffer, int max_values) { - max_values = std::min(max_values, num_values_); - int lengths[max_values]; - len_decoder_.GetInt32(lengths, max_values); - for (int i = 0; i < max_values; ++i) { - buffer[i].len = lengths[i]; - buffer[i].ptr = data_; - data_ += lengths[i]; - len_ -= lengths[i]; - } - num_values_ -= max_values; - return max_values; - } - - private: - DeltaBitPackDecoder len_decoder_; - const uint8_t* data_; - int len_; -}; - -} - -#endif - http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/dictionary-encoding.h ---------------------------------------------------------------------- diff --git a/src/encodings/dictionary-encoding.h b/src/encodings/dictionary-encoding.h deleted file mode 100644 index ba1e302..0000000 --- a/src/encodings/dictionary-encoding.h +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef PARQUET_DICTIONARY_ENCODING_H -#define PARQUET_DICTIONARY_ENCODING_H - -#include "encodings.h" - -namespace parquet_cpp { - -class DictionaryDecoder : public Decoder { - public: - // Initializes the dictionary with values from 'dictionary'. The data in dictionary - // is not guaranteed to persist in memory after this call so the dictionary decoder - // needs to copy the data out if necessary. - DictionaryDecoder(const parquet::Type::type& type, Decoder* dictionary) - : Decoder(type, parquet::Encoding::RLE_DICTIONARY) { - int num_dictionary_values = dictionary->values_left(); - switch (type) { - case parquet::Type::BOOLEAN: - throw ParquetException("Boolean cols should not be dictionary encoded."); - - case parquet::Type::INT32: - int32_dictionary_.resize(num_dictionary_values); - dictionary->GetInt32(&int32_dictionary_[0], num_dictionary_values); - break; - case parquet::Type::INT64: - int64_dictionary_.resize(num_dictionary_values); - dictionary->GetInt64(&int64_dictionary_[0], num_dictionary_values); - break; - case parquet::Type::FLOAT: - float_dictionary_.resize(num_dictionary_values); - dictionary->GetFloat(&float_dictionary_[0], num_dictionary_values); - break; - case parquet::Type::DOUBLE: - double_dictionary_.resize(num_dictionary_values); - dictionary->GetDouble(&double_dictionary_[0], num_dictionary_values); - break; - case parquet::Type::BYTE_ARRAY: { - byte_array_dictionary_.resize(num_dictionary_values); - dictionary->GetByteArray(&byte_array_dictionary_[0], num_dictionary_values); - int total_size = 0; - for (int i = 0; i < num_dictionary_values; ++i) { - total_size += byte_array_dictionary_[i].len; - } - byte_array_data_.resize(total_size); - int offset = 0; - for (int i = 0; i < num_dictionary_values; ++i) { - memcpy(&byte_array_data_[offset], - byte_array_dictionary_[i].ptr, byte_array_dictionary_[i].len); - byte_array_dictionary_[i].ptr = &byte_array_data_[offset]; - offset += byte_array_dictionary_[i].len; - } - break; - } - default: - ParquetException::NYI("Unsupported dictionary type"); - } - } - - virtual void SetData(int num_values, const uint8_t* data, int len) { - num_values_ = num_values; - if (len == 0) return; - uint8_t bit_width = *data; - ++data; - --len; - idx_decoder_ = impala::RleDecoder(data, len, bit_width); - } - - virtual int GetInt32(int32_t* buffer, int max_values) { - max_values = std::min(max_values, num_values_); - for (int i = 0; i < max_values; ++i) { - buffer[i] = int32_dictionary_[index()]; - } - return max_values; - } - - virtual int GetInt64(int64_t* buffer, int max_values) { - max_values = std::min(max_values, num_values_); - for (int i = 0; i < max_values; ++i) { - buffer[i] = int64_dictionary_[index()]; - } - return max_values; - } - - virtual int GetFloat(float* buffer, int max_values) { - max_values = std::min(max_values, num_values_); - for (int i = 0; i < max_values; ++i) { - buffer[i] = float_dictionary_[index()]; - } - return max_values; - } - - virtual int GetDouble(double* buffer, int max_values) { - max_values = std::min(max_values, num_values_); - for (int i = 0; i < max_values; ++i) { - buffer[i] = double_dictionary_[index()]; - } - return max_values; - } - - virtual int GetByteArray(ByteArray* buffer, int max_values) { - max_values = std::min(max_values, num_values_); - for (int i = 0; i < max_values; ++i) { - buffer[i] = byte_array_dictionary_[index()]; - } - return max_values; - } - - private: - int index() { - int idx = 0; - if (!idx_decoder_.Get(&idx)) ParquetException::EofException(); - --num_values_; - return idx; - } - - // Only one is set. - std::vector int32_dictionary_; - std::vector int64_dictionary_; - std::vector float_dictionary_; - std::vector double_dictionary_; - std::vector byte_array_dictionary_; - - // Data that contains the byte array data (byte_array_dictionary_ just has the - // pointers). - std::vector byte_array_data_; - - impala::RleDecoder idx_decoder_; -}; - -} - -#endif - http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/encodings.h ---------------------------------------------------------------------- diff --git a/src/encodings/encodings.h b/src/encodings/encodings.h deleted file mode 100644 index e888c1f..0000000 --- a/src/encodings/encodings.h +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef PARQUET_ENCODINGS_H -#define PARQUET_ENCODINGS_H - -#include -#include "gen-cpp/parquet_constants.h" -#include "gen-cpp/parquet_types.h" - -#include "impala/rle-encoding.h" -#include "impala/bit-stream-utils.inline.h" - -namespace parquet_cpp { - -class Decoder { - public: - virtual ~Decoder() {} - - // Sets the data for a new page. This will be called multiple times on the same - // decoder and should reset all internal state. - virtual void SetData(int num_values, const uint8_t* data, int len) = 0; - - // Subclasses should override the ones they support. In each of these functions, - // the decoder would decode put to 'max_values', storing the result in 'buffer'. - // The function returns the number of values decoded, which should be max_values - // except for end of the current data page. - virtual int GetBool(bool* buffer, int max_values) { - throw ParquetException("Decoder does not implement this type."); - } - virtual int GetInt32(int32_t* buffer, int max_values) { - throw ParquetException("Decoder does not implement this type."); - } - virtual int GetInt64(int64_t* buffer, int max_values) { - throw ParquetException("Decoder does not implement this type."); - } - virtual int GetFloat(float* buffer, int max_values) { - throw ParquetException("Decoder does not implement this type."); - } - virtual int GetDouble(double* buffer, int max_values) { - throw ParquetException("Decoder does not implement this type."); - } - virtual int GetByteArray(ByteArray* buffer, int max_values) { - throw ParquetException("Decoder does not implement this type."); - } - - // Returns the number of values left (for the last call to SetData()). This is - // the number of values left in this page. - int values_left() const { return num_values_; } - - const parquet::Encoding::type encoding() const { return encoding_; } - - protected: - Decoder(const parquet::Type::type& type, const parquet::Encoding::type& encoding) - : type_(type), encoding_(encoding), num_values_(0) {} - - const parquet::Type::type type_; - const parquet::Encoding::type encoding_; - int num_values_; -}; - -} - -#include "bool-encoding.h" -#include "plain-encoding.h" -#include "dictionary-encoding.h" -#include "delta-bit-pack-encoding.h" -#include "delta-length-byte-array-encoding.h" -#include "delta-byte-array-encoding.h" - -#endif - http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/encodings/plain-encoding.h ---------------------------------------------------------------------- diff --git a/src/encodings/plain-encoding.h b/src/encodings/plain-encoding.h deleted file mode 100644 index 511dbdd..0000000 --- a/src/encodings/plain-encoding.h +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef PARQUET_PLAIN_ENCODING_H -#define PARQUET_PLAIN_ENCODING_H - -#include "encodings.h" - -namespace parquet_cpp { - -class PlainDecoder : public Decoder { - public: - PlainDecoder(const parquet::Type::type& type) - : Decoder(type, parquet::Encoding::PLAIN), data_(NULL), len_(0) { - } - - virtual void SetData(int num_values, const uint8_t* data, int len) { - num_values_ = num_values; - data_ = data; - len_ = len; - } - - int GetValues(void* buffer, int max_values, int byte_size) { - max_values = std::min(max_values, num_values_); - int size = max_values * byte_size; - if (len_ < size) ParquetException::EofException(); - memcpy(buffer, data_, size); - data_ += size; - len_ -= size; - num_values_ -= max_values; - return max_values; - } - - virtual int GetInt32(int32_t* buffer, int max_values) { - return GetValues(buffer, max_values, sizeof(int32_t)); - } - - virtual int GetInt64(int64_t* buffer, int max_values) { - return GetValues(buffer, max_values, sizeof(int64_t)); - } - - virtual int GetFloat(float* buffer, int max_values) { - return GetValues(buffer, max_values, sizeof(float)); - } - - virtual int GetDouble(double* buffer, int max_values) { - return GetValues(buffer, max_values, sizeof(double)); - } - - virtual int GetByteArray(ByteArray* buffer, int max_values) { - max_values = std::min(max_values, num_values_); - for (int i = 0; i < max_values; ++i) { - buffer[i].len = *reinterpret_cast(data_); - if (len_ < sizeof(uint32_t) + buffer[i].len) ParquetException::EofException(); - buffer[i].ptr = data_ + sizeof(uint32_t); - data_ += sizeof(uint32_t) + buffer[i].len; - len_ -= sizeof(uint32_t) + buffer[i].len; - } - num_values_ -= max_values; - return max_values; - } - - private: - const uint8_t* data_; - int len_; -}; - -} - -#endif - http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/bit-stream-utils.h ---------------------------------------------------------------------- diff --git a/src/impala/bit-stream-utils.h b/src/impala/bit-stream-utils.h deleted file mode 100644 index 5eba254..0000000 --- a/src/impala/bit-stream-utils.h +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#ifndef IMPALA_UTIL_BIT_STREAM_UTILS_H -#define IMPALA_UTIL_BIT_STREAM_UTILS_H - -#include -#include -#include "impala/compiler-util.h" -#include "impala/bit-util.h" -#include "impala/logging.h" - -namespace impala { - -// Utility class to write bit/byte streams. This class can write data to either be -// bit packed or byte aligned (and a single stream that has a mix of both). -// This class does not allocate memory. -class BitWriter { - public: - // buffer: buffer to write bits to. Buffer should be preallocated with - // 'buffer_len' bytes. - BitWriter(uint8_t* buffer, int buffer_len) : - buffer_(buffer), - max_bytes_(buffer_len) { - Clear(); - } - - void Clear() { - buffered_values_ = 0; - byte_offset_ = 0; - bit_offset_ = 0; - } - - // The number of current bytes written, including the current byte (i.e. may include a - // fraction of a byte). Includes buffered values. - int bytes_written() const { return byte_offset_ + BitUtil::Ceil(bit_offset_, 8); } - uint8_t* buffer() const { return buffer_; } - int buffer_len() const { return max_bytes_; } - - // Writes a value to buffered_values_, flushing to buffer_ if necessary. This is bit - // packed. Returns false if there was not enough space. num_bits must be <= 32. - bool PutValue(uint64_t v, int num_bits); - - // Writes v to the next aligned byte using num_bytes. If T is larger than num_bytes, the - // extra high-order bytes will be ignored. Returns false if there was not enough space. - template - bool PutAligned(T v, int num_bytes); - - // Write a Vlq encoded int to the buffer. Returns false if there was not enough - // room. The value is written byte aligned. - // For more details on vlq: - // en.wikipedia.org/wiki/Variable-length_quantity - bool PutVlqInt(uint32_t v); - bool PutZigZagVlqInt(int32_t v); - - // Get a pointer to the next aligned byte and advance the underlying buffer - // by num_bytes. - // Returns NULL if there was not enough space. - uint8_t* GetNextBytePtr(int num_bytes = 1); - - // Flushes all buffered values to the buffer. Call this when done writing to the buffer. - // If 'align' is true, buffered_values_ is reset and any future writes will be written - // to the next byte boundary. - void Flush(bool align=false); - - private: - uint8_t* buffer_; - int max_bytes_; - - // Bit-packed values are initially written to this variable before being memcpy'd to - // buffer_. This is faster than writing values byte by byte directly to buffer_. - uint64_t buffered_values_; - - int byte_offset_; // Offset in buffer_ - int bit_offset_; // Offset in buffered_values_ -}; - -// Utility class to read bit/byte stream. This class can read bits or bytes -// that are either byte aligned or not. It also has utilities to read multiple -// bytes in one read (e.g. encoded int). -class BitReader { - public: - // 'buffer' is the buffer to read from. The buffer's length is 'buffer_len'. - BitReader(const uint8_t* buffer, int buffer_len) : - buffer_(buffer), - max_bytes_(buffer_len), - byte_offset_(0), - bit_offset_(0) { - int num_bytes = std::min(8, max_bytes_ - byte_offset_); - memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes); - } - - BitReader() : buffer_(NULL), max_bytes_(0) {} - - // Gets the next value from the buffer. Returns true if 'v' could be read or false if - // there are not enough bytes left. num_bits must be <= 32. - template - bool GetValue(int num_bits, T* v); - - // Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T needs to be a - // little-endian native type and big enough to store 'num_bytes'. The value is assumed - // to be byte-aligned so the stream will be advanced to the start of the next byte - // before 'v' is read. Returns false if there are not enough bytes left. - template - bool GetAligned(int num_bytes, T* v); - - // Reads a vlq encoded int from the stream. The encoded int must start at the - // beginning of a byte. Return false if there were not enough bytes in the buffer. - bool GetVlqInt(uint64_t* v); - bool GetZigZagVlqInt(int64_t* v); - - // Returns the number of bytes left in the stream, not including the current byte (i.e., - // there may be an additional fraction of a byte). - int bytes_left() { return max_bytes_ - (byte_offset_ + BitUtil::Ceil(bit_offset_, 8)); } - - // Maximum byte length of a vlq encoded int - static const int MAX_VLQ_BYTE_LEN = 5; - - private: - const uint8_t* buffer_; - int max_bytes_; - - // Bytes are memcpy'd from buffer_ and values are read from this variable. This is - // faster than reading values byte by byte directly from buffer_. - uint64_t buffered_values_; - - int byte_offset_; // Offset in buffer_ - int bit_offset_; // Offset in buffered_values_ -}; - -} - -#endif http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/bit-stream-utils.inline.h ---------------------------------------------------------------------- diff --git a/src/impala/bit-stream-utils.inline.h b/src/impala/bit-stream-utils.inline.h deleted file mode 100644 index d84ef4d..0000000 --- a/src/impala/bit-stream-utils.inline.h +++ /dev/null @@ -1,164 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#ifndef IMPALA_UTIL_BIT_STREAM_UTILS_INLINE_H -#define IMPALA_UTIL_BIT_STREAM_UTILS_INLINE_H - -#include "impala/bit-stream-utils.h" - -namespace impala { - -inline bool BitWriter::PutValue(uint64_t v, int num_bits) { - // TODO: revisit this limit if necessary (can be raised to 64 by fixing some edge cases) - DCHECK_LE(num_bits, 32); - DCHECK_EQ(v >> num_bits, 0) << "v = " << v << ", num_bits = " << num_bits; - - if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false; - - buffered_values_ |= v << bit_offset_; - bit_offset_ += num_bits; - - if (UNLIKELY(bit_offset_ >= 64)) { - // Flush buffered_values_ and write out bits of v that did not fit - memcpy(buffer_ + byte_offset_, &buffered_values_, 8); - buffered_values_ = 0; - byte_offset_ += 8; - bit_offset_ -= 64; - buffered_values_ = v >> (num_bits - bit_offset_); - } - DCHECK_LT(bit_offset_, 64); - return true; -} - -inline void BitWriter::Flush(bool align) { - int num_bytes = BitUtil::Ceil(bit_offset_, 8); - DCHECK_LE(byte_offset_ + num_bytes, max_bytes_); - memcpy(buffer_ + byte_offset_, &buffered_values_, num_bytes); - - if (align) { - buffered_values_ = 0; - byte_offset_ += num_bytes; - bit_offset_ = 0; - } -} - -inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) { - Flush(/* align */ true); - DCHECK_LE(byte_offset_, max_bytes_); - if (byte_offset_ + num_bytes > max_bytes_) return NULL; - uint8_t* ptr = buffer_ + byte_offset_; - byte_offset_ += num_bytes; - return ptr; -} - -template -inline bool BitWriter::PutAligned(T val, int num_bytes) { - uint8_t* ptr = GetNextBytePtr(num_bytes); - if (ptr == NULL) return false; - memcpy(ptr, &val, num_bytes); - return true; -} - -inline bool BitWriter::PutVlqInt(uint32_t v) { - bool result = true; - while ((v & 0xFFFFFF80) != 0L) { - result &= PutAligned((v & 0x7F) | 0x80, 1); - v >>= 7; - } - result &= PutAligned(v & 0x7F, 1); - return result; -} - -inline bool BitWriter::PutZigZagVlqInt(int32_t v) { - uint32_t u = (v << 1) ^ (v >> 31); - return PutVlqInt(u); -} - -template -inline bool BitReader::GetValue(int num_bits, T* v) { - // TODO: revisit this limit if necessary - DCHECK_LE(num_bits, 32); - DCHECK_LE(num_bits, sizeof(T) * 8); - - if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false; - - *v = BitUtil::TrailingBits(buffered_values_, bit_offset_ + num_bits) >> bit_offset_; - - bit_offset_ += num_bits; - if (bit_offset_ >= 64) { - byte_offset_ += 8; - bit_offset_ -= 64; - - int bytes_remaining = max_bytes_ - byte_offset_; - if (LIKELY(bytes_remaining >= 8)) { - memcpy(&buffered_values_, buffer_ + byte_offset_, 8); - } else { - memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining); - } - - // Read bits of v that crossed into new buffered_values_ - *v |= BitUtil::TrailingBits(buffered_values_, bit_offset_) - << (num_bits - bit_offset_); - } - DCHECK_LE(bit_offset_, 64); - return true; -} - -template -inline bool BitReader::GetAligned(int num_bytes, T* v) { - DCHECK_LE(num_bytes, sizeof(T)); - int bytes_read = BitUtil::Ceil(bit_offset_, 8); - if (UNLIKELY(byte_offset_ + bytes_read + num_bytes > max_bytes_)) return false; - - // Advance byte_offset to next unread byte and read num_bytes - byte_offset_ += bytes_read; - memcpy(v, buffer_ + byte_offset_, num_bytes); - byte_offset_ += num_bytes; - - // Reset buffered_values_ - bit_offset_ = 0; - int bytes_remaining = max_bytes_ - byte_offset_; - if (LIKELY(bytes_remaining >= 8)) { - memcpy(&buffered_values_, buffer_ + byte_offset_, 8); - } else { - memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining); - } - return true; -} - -inline bool BitReader::GetVlqInt(uint64_t* v) { - *v = 0; - int shift = 0; - int num_bytes = 0; - uint8_t byte = 0; - do { - if (!GetAligned(1, &byte)) return false; - *v |= (byte & 0x7F) << shift; - shift += 7; - DCHECK_LE(++num_bytes, MAX_VLQ_BYTE_LEN); - } while ((byte & 0x80) != 0); - return true; -} - -inline bool BitReader::GetZigZagVlqInt(int64_t* v) { - uint64_t u; - if (!GetVlqInt(&u)) return false; - *reinterpret_cast(v) = (u >> 1) ^ -(u & 1); - return true; -} - -} - -#endif http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/bit-util.h ---------------------------------------------------------------------- diff --git a/src/impala/bit-util.h b/src/impala/bit-util.h deleted file mode 100644 index c2b6055..0000000 --- a/src/impala/bit-util.h +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#ifndef IMPALA_BIT_UTIL_H -#define IMPALA_BIT_UTIL_H - -#if defined(__APPLE__) - #include -#else - #include -#endif - -#include "impala/compiler-util.h" -#include "impala/logging.h" - -namespace impala { - -// Utility class to do standard bit tricks -// TODO: is this in boost or something else like that? -class BitUtil { - public: - // Returns the ceil of value/divisor - static inline int Ceil(int value, int divisor) { - return value / divisor + (value % divisor != 0); - } - - // Returns 'value' rounded up to the nearest multiple of 'factor' - static inline int RoundUp(int value, int factor) { - return (value + (factor - 1)) / factor * factor; - } - - // Returns 'value' rounded down to the nearest multiple of 'factor' - static inline int RoundDown(int value, int factor) { - return (value / factor) * factor; - } - - // Returns the number of set bits in x - static inline int Popcount(uint64_t x) { - int count = 0; - for (; x != 0; ++count) x &= x-1; - return count; - } - - // Returns the 'num_bits' least-significant bits of 'v'. - static inline uint64_t TrailingBits(uint64_t v, int num_bits) { - if (UNLIKELY(num_bits == 0)) return 0; - if (UNLIKELY(num_bits >= 64)) return v; - int n = 64 - num_bits; - return (v << n) >> n; - } - - // Returns ceil(log2(x)). - // TODO: this could be faster if we use __builtin_clz. Fix this if this ever shows up - // in a hot path. - static inline int Log2(uint64_t x) { - if (x == 0) return 0; - // Compute result = ceil(log2(x)) - // = floor(log2(x - 1)) + 1, for x > 1 - // by finding the position of the most significant bit (1-indexed) of x - 1 - // (floor(log2(n)) = MSB(n) (0-indexed)) - --x; - int result = 1; - while (x >>= 1) ++result; - return result; - } - - // Returns the minimum number of bits needed to represent the value of 'x' - static inline int NumRequiredBits(uint64_t x) { - for (int i = 63; i >= 0; --i) { - if (x & 1L << i) return i + 1; - } - return 0; - } - - // Swaps the byte order (i.e. endianess) - static inline int64_t ByteSwap(int64_t value) { - return __builtin_bswap64(value); - } - static inline uint64_t ByteSwap(uint64_t value) { - return static_cast(__builtin_bswap64(value)); - } - static inline int32_t ByteSwap(int32_t value) { - return __builtin_bswap32(value); - } - static inline uint32_t ByteSwap(uint32_t value) { - return static_cast(__builtin_bswap32(value)); - } - static inline int16_t ByteSwap(int16_t value) { - return (((value >> 8) & 0xff) | ((value & 0xff) << 8)); - } - static inline uint16_t ByteSwap(uint16_t value) { - return static_cast(ByteSwap(static_cast(value))); - } - - // Write the swapped bytes into dst. Src and st cannot overlap. - static inline void ByteSwap(void* dst, const void* src, int len) { - switch (len) { - case 1: - *reinterpret_cast(dst) = *reinterpret_cast(src); - return; - case 2: - *reinterpret_cast(dst) = - ByteSwap(*reinterpret_cast(src)); - return; - case 4: - *reinterpret_cast(dst) = - ByteSwap(*reinterpret_cast(src)); - return; - case 8: - *reinterpret_cast(dst) = - ByteSwap(*reinterpret_cast(src)); - return; - default: break; - } - - uint8_t* d = reinterpret_cast(dst); - const uint8_t* s = reinterpret_cast(src); - for (int i = 0; i < len; ++i) { - d[i] = s[len - i - 1]; - } - } - - // Converts to big endian format (if not already in big endian) from the - // machine's native endian format. -#if __BYTE_ORDER == __LITTLE_ENDIAN - static inline int64_t ToBigEndian(int64_t value) { return ByteSwap(value); } - static inline uint64_t ToBigEndian(uint64_t value) { return ByteSwap(value); } - static inline int32_t ToBigEndian(int32_t value) { return ByteSwap(value); } - static inline uint32_t ToBigEndian(uint32_t value) { return ByteSwap(value); } - static inline int16_t ToBigEndian(int16_t value) { return ByteSwap(value); } - static inline uint16_t ToBigEndian(uint16_t value) { return ByteSwap(value); } -#else - static inline int64_t ToBigEndian(int64_t val) { return val; } - static inline uint64_t ToBigEndian(uint64_t val) { return val; } - static inline int32_t ToBigEndian(int32_t val) { return val; } - static inline uint32_t ToBigEndian(uint32_t val) { return val; } - static inline int16_t ToBigEndian(int16_t val) { return val; } - static inline uint16_t ToBigEndian(uint16_t val) { return val; } -#endif - - // Converts from big endian format to the machine's native endian format. -#if __BYTE_ORDER == __LITTLE_ENDIAN - static inline int64_t FromBigEndian(int64_t value) { return ByteSwap(value); } - static inline uint64_t FromBigEndian(uint64_t value) { return ByteSwap(value); } - static inline int32_t FromBigEndian(int32_t value) { return ByteSwap(value); } - static inline uint32_t FromBigEndian(uint32_t value) { return ByteSwap(value); } - static inline int16_t FromBigEndian(int16_t value) { return ByteSwap(value); } - static inline uint16_t FromBigEndian(uint16_t value) { return ByteSwap(value); } -#else - static inline int64_t FromBigEndian(int64_t val) { return val; } - static inline uint64_t FromBigEndian(uint64_t val) { return val; } - static inline int32_t FromBigEndian(int32_t val) { return val; } - static inline uint32_t FromBigEndian(uint32_t val) { return val; } - static inline int16_t FromBigEndian(int16_t val) { return val; } - static inline uint16_t FromBigEndian(uint16_t val) { return val; } -#endif - -}; - -} - -#endif http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/compiler-util.h ---------------------------------------------------------------------- diff --git a/src/impala/compiler-util.h b/src/impala/compiler-util.h deleted file mode 100644 index 6b25cdf..0000000 --- a/src/impala/compiler-util.h +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#ifndef IMPALA_COMMON_COMPILER_UTIL_H -#define IMPALA_COMMON_COMPILER_UTIL_H - -// Compiler hint that this branch is likely or unlikely to -// be taken. Take from the "What all programmers should know -// about memory" paper. -// example: if (LIKELY(size > 0)) { ... } -// example: if (UNLIKELY(!status.ok())) { ... } -#ifdef LIKELY -#undef LIKELY -#endif - -#ifdef UNLIKELY -#undef UNLIKELY -#endif - -#define LIKELY(expr) __builtin_expect(!!(expr), 1) -#define UNLIKELY(expr) __builtin_expect(!!(expr), 0) - -#define PREFETCH(addr) __builtin_prefetch(addr) - -#endif - http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/337cf584/src/impala/logging.h ---------------------------------------------------------------------- diff --git a/src/impala/logging.h b/src/impala/logging.h deleted file mode 100644 index fc130f3..0000000 --- a/src/impala/logging.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2012 Cloudera Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#ifndef IMPALA_COMMON_LOGGING_H -#define IMPALA_COMMON_LOGGING_H - -#include - -#define DCHECK(condition) while(false) std::cout -#define DCHECK_EQ(a, b) while(false) std::cout -#define DCHECK_NE(a, b) while(false) std::cout -#define DCHECK_GT(a, b) while(false) std::cout -#define DCHECK_LT(a, b) while(false) std::cout -#define DCHECK_GE(a, b) while(false) std::cout -#define DCHECK_LE(a, b) while(false) std::cout -// Similar to how glog defines DCHECK for release. -#define LOG(level) while(false) std::cout - -#endif