Repository: parquet-cpp
Updated Branches:
refs/heads/master fb9c1166c -> 585469c7f
PARQUET-828: Do not implicitly cast ParquetVersion enum to int
See https://github.com/apache/parquet-mr/blob/df9d8e415436292ae33e1ca0b8da256640de9710/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java#L86,
this number should be 1 for Parquet 1.0 files, I believe.
Author: Wes McKinney <wes.mckinney@twosigma.com>
Closes #216 from wesm/PARQUET-828 and squashes the following commits:
ab6773c [Wes McKinney] Do not implicitly cast ParquetVersion enum to int. Set 1.0 to 1, 2.0
to 2
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/585469c7
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/585469c7
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/585469c7
Branch: refs/heads/master
Commit: 585469c7f4e9421b4abf4ea419bb0bdaaef9ce3f
Parents: fb9c116
Author: Wes McKinney <wes.mckinney@twosigma.com>
Authored: Tue Jan 10 08:48:05 2017 +0100
Committer: Uwe L. Korn <uwelk@xhochy.com>
Committed: Tue Jan 10 08:48:05 2017 +0100
----------------------------------------------------------------------
src/parquet/file/file-metadata-test.cc | 15 +++++++++------
src/parquet/file/metadata.cc | 25 ++++++++++++++++++++++---
src/parquet/file/metadata.h | 2 +-
3 files changed, 32 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/585469c7/src/parquet/file/file-metadata-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/file-metadata-test.cc b/src/parquet/file/file-metadata-test.cc
index 0c9d376..79e2bb1 100644
--- a/src/parquet/file/file-metadata-test.cc
+++ b/src/parquet/file/file-metadata-test.cc
@@ -31,7 +31,10 @@ TEST(Metadata, TestBuildAccess) {
parquet::schema::NodePtr root;
parquet::SchemaDescriptor schema;
- std::shared_ptr<WriterProperties> props = WriterProperties::Builder().build();
+ WriterProperties::Builder prop_builder;
+
+ std::shared_ptr<WriterProperties> props =
+ prop_builder.version(ParquetVersion::PARQUET_2_0)->build();
fields.push_back(parquet::schema::Int32("int_col", Repetition::REQUIRED));
fields.push_back(parquet::schema::Float("float_col", Repetition::REQUIRED));
@@ -84,7 +87,7 @@ TEST(Metadata, TestBuildAccess) {
ASSERT_EQ(nrows, f_accessor->num_rows());
ASSERT_LE(0, f_accessor->size());
ASSERT_EQ(2, f_accessor->num_row_groups());
- ASSERT_EQ(DEFAULT_WRITER_VERSION, f_accessor->version());
+ ASSERT_EQ(ParquetVersion::PARQUET_2_0, f_accessor->version());
ASSERT_EQ(DEFAULT_CREATED_BY, f_accessor->created_by());
ASSERT_EQ(3, f_accessor->num_schema_elements());
@@ -110,8 +113,8 @@ TEST(Metadata, TestBuildAccess) {
ASSERT_EQ(DEFAULT_COMPRESSION_TYPE, rg1_column2->compression());
ASSERT_EQ(nrows / 2, rg1_column1->num_values());
ASSERT_EQ(nrows / 2, rg1_column2->num_values());
- ASSERT_EQ(2, rg1_column1->encodings().size());
- ASSERT_EQ(2, rg1_column2->encodings().size());
+ ASSERT_EQ(3, rg1_column1->encodings().size());
+ ASSERT_EQ(3, rg1_column2->encodings().size());
ASSERT_EQ(512, rg1_column1->total_compressed_size());
ASSERT_EQ(512, rg1_column2->total_compressed_size());
ASSERT_EQ(600, rg1_column1->total_uncompressed_size());
@@ -142,8 +145,8 @@ TEST(Metadata, TestBuildAccess) {
ASSERT_EQ(nrows / 2, rg2_column2->num_values());
ASSERT_EQ(DEFAULT_COMPRESSION_TYPE, rg2_column1->compression());
ASSERT_EQ(DEFAULT_COMPRESSION_TYPE, rg2_column2->compression());
- ASSERT_EQ(2, rg2_column1->encodings().size());
- ASSERT_EQ(2, rg2_column2->encodings().size());
+ ASSERT_EQ(3, rg2_column1->encodings().size());
+ ASSERT_EQ(3, rg2_column2->encodings().size());
ASSERT_EQ(512, rg2_column1->total_compressed_size());
ASSERT_EQ(512, rg2_column2->total_compressed_size());
ASSERT_EQ(600, rg2_column1->total_uncompressed_size());
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/585469c7/src/parquet/file/metadata.cc
----------------------------------------------------------------------
diff --git a/src/parquet/file/metadata.cc b/src/parquet/file/metadata.cc
index 2e22649..a262b63 100644
--- a/src/parquet/file/metadata.cc
+++ b/src/parquet/file/metadata.cc
@@ -358,8 +358,17 @@ int FileMetaData::num_row_groups() const {
return impl_->num_row_groups();
}
-int32_t FileMetaData::version() const {
- return impl_->version();
+ParquetVersion::type FileMetaData::version() const {
+ switch (impl_->version()) {
+ case 1:
+ return ParquetVersion::PARQUET_1_0;
+ case 2:
+ return ParquetVersion::PARQUET_2_0;
+ default:
+ // Improperly set version, assuming Parquet 1.0
+ break;
+ }
+ return ParquetVersion::PARQUET_1_0;
}
const FileMetaData::Version& FileMetaData::writer_version() const {
@@ -656,7 +665,17 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl {
}
metadata_->__set_num_rows(total_rows);
metadata_->__set_row_groups(row_groups);
- metadata_->__set_version(properties_->version());
+
+ int32_t file_version = 0;
+ switch (properties_->version()) {
+ case ParquetVersion::PARQUET_1_0:
+ file_version = 1;
+ case ParquetVersion::PARQUET_2_0:
+ file_version = 2;
+ default:
+ break;
+ }
+ metadata_->__set_version(file_version);
metadata_->__set_created_by(properties_->created_by());
parquet::schema::SchemaFlattener flattener(
static_cast<parquet::schema::GroupNode*>(schema_->schema_root().get()),
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/585469c7/src/parquet/file/metadata.h
----------------------------------------------------------------------
diff --git a/src/parquet/file/metadata.h b/src/parquet/file/metadata.h
index 43419d2..942aa39 100644
--- a/src/parquet/file/metadata.h
+++ b/src/parquet/file/metadata.h
@@ -130,7 +130,7 @@ class PARQUET_EXPORT FileMetaData {
int num_columns() const;
int64_t num_rows() const;
int num_row_groups() const;
- int32_t version() const;
+ ParquetVersion::type version() const;
const std::string& created_by() const;
int num_schema_elements() const;
std::unique_ptr<RowGroupMetaData> RowGroup(int i) const;
|