parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From u..@apache.org
Subject [parquet-cpp] branch master updated: PARQUET-1358: index_page_offset should be unset as it is not supported
Date Thu, 26 Jul 2018 21:12:19 GMT
This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-cpp.git


The following commit(s) were added to refs/heads/master by this push:
     new c246da9  PARQUET-1358: index_page_offset should be unset as it is not supported
c246da9 is described below

commit c246da995edf50de5349525996ea321c15facbf5
Author: Korn, Uwe <Uwe.Korn@blue-yonder.com>
AuthorDate: Thu Jul 26 23:12:11 2018 +0200

    PARQUET-1358: index_page_offset should be unset as it is not supported
    
    Author: Korn, Uwe <Uwe.Korn@blue-yonder.com>
    
    Closes #480 from xhochy/PARQUET-1358 and squashes the following commits:
    
    dcf9a94 [Korn, Uwe] PARQUET-1358: index_page_offset should be unset as it is not supported
---
 src/parquet/column_writer.cc       |  4 ++--
 src/parquet/file-serialize-test.cc |  1 +
 src/parquet/metadata.cc            | 12 +++++++++++-
 src/parquet/metadata.h             |  1 +
 4 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/parquet/column_writer.cc b/src/parquet/column_writer.cc
index b3ff8c3..7d47d3f 100644
--- a/src/parquet/column_writer.cc
+++ b/src/parquet/column_writer.cc
@@ -180,8 +180,8 @@ class SerializedPageWriter : public PageWriter {
   }
 
   void Close(bool has_dictionary, bool fallback) override {
-    // index_page_offset = 0 since they are not supported
-    metadata_->Finish(num_values_, dictionary_page_offset_, 0, data_page_offset_,
+    // index_page_offset = -1 since they are not supported
+    metadata_->Finish(num_values_, dictionary_page_offset_, -1, data_page_offset_,
                       total_compressed_size_, total_uncompressed_size_, has_dictionary,
                       fallback);
 
diff --git a/src/parquet/file-serialize-test.cc b/src/parquet/file-serialize-test.cc
index 16a7c4f..31d2bd4 100644
--- a/src/parquet/file-serialize-test.cc
+++ b/src/parquet/file-serialize-test.cc
@@ -98,6 +98,7 @@ class TestSerialize : public PrimitiveTypedTest<TestType> {
       int64_t values_read;
 
       for (int i = 0; i < num_columns_; ++i) {
+        ASSERT_FALSE(rg_reader->metadata()->ColumnChunk(i)->has_index_page());
         std::vector<int16_t> def_levels_out(rows_per_rowgroup_);
         std::vector<int16_t> rep_levels_out(rows_per_rowgroup_);
         auto col_reader =
diff --git a/src/parquet/metadata.cc b/src/parquet/metadata.cc
index 49999a4..d9c5d29 100644
--- a/src/parquet/metadata.cc
+++ b/src/parquet/metadata.cc
@@ -151,6 +151,10 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
 
   inline int64_t data_page_offset() const { return column_->meta_data.data_page_offset;
}
 
+  inline bool has_index_page() const {
+    return column_->meta_data.__isset.index_page_offset;
+  }
+
   inline int64_t index_page_offset() const {
     return column_->meta_data.index_page_offset;
   }
@@ -218,6 +222,10 @@ int64_t ColumnChunkMetaData::data_page_offset() const {
   return impl_->data_page_offset();
 }
 
+bool ColumnChunkMetaData::has_index_page() const {
+  return impl_->has_index_page();
+}
+
 int64_t ColumnChunkMetaData::index_page_offset() const {
   return impl_->index_page_offset();
 }
@@ -607,7 +615,9 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
     }
     column_chunk_->__isset.meta_data = true;
     column_chunk_->meta_data.__set_num_values(num_values);
-    column_chunk_->meta_data.__set_index_page_offset(index_page_offset);
+    if (index_page_offset >= 0) {
+      column_chunk_->meta_data.__set_index_page_offset(index_page_offset);
+    }
     column_chunk_->meta_data.__set_data_page_offset(data_page_offset);
     column_chunk_->meta_data.__set_total_uncompressed_size(uncompressed_size);
     column_chunk_->meta_data.__set_total_compressed_size(compressed_size);
diff --git a/src/parquet/metadata.h b/src/parquet/metadata.h
index 84c1470..a9739ce 100644
--- a/src/parquet/metadata.h
+++ b/src/parquet/metadata.h
@@ -111,6 +111,7 @@ class PARQUET_EXPORT ColumnChunkMetaData {
   bool has_dictionary_page() const;
   int64_t dictionary_page_offset() const;
   int64_t data_page_offset() const;
+  bool has_index_page() const;
   int64_t index_page_offset() const;
   int64_t total_compressed_size() const;
   int64_t total_uncompressed_size() const;


Mime
View raw message