parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject [parquet-format] branch master updated: PARQUET-1630: add empty compression union for Bloom filter (#149)
Date Tue, 13 Aug 2019 15:49:17 GMT
This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-format.git


The following commit(s) were added to refs/heads/master by this push:
     new 556ebee  PARQUET-1630: add empty compression union for Bloom filter (#149)
556ebee is described below

commit 556ebee2107e4223aad40573e27e9f62075dddd7
Author: Jim Apple <jbapple@apache.org>
AuthorDate: Tue Aug 13 08:49:12 2019 -0700

    PARQUET-1630: add empty compression union for Bloom filter (#149)
    
    Right now no compression methods are supported. For more on Bloom
    filter compression, see Michael Mitzenmacher's "Compressed Bloom
    Filters",
    https://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/cbf2.pdf
---
 BloomFilter.md                 | 10 ++++++++++
 src/main/thrift/parquet.thrift | 11 +++++++++++
 2 files changed, 21 insertions(+)

diff --git a/BloomFilter.md b/BloomFilter.md
index 8ce22ae..e5ec30c 100644
--- a/BloomFilter.md
+++ b/BloomFilter.md
@@ -264,6 +264,14 @@ union BloomFilterHash {
 }
 
 /**
+ * The compression used in the Bloom filter.
+ **/
+struct Uncompressed {}
+union BloomFilterCompression {
+  1: Uncompressed UNCOMPRESSED;
+}
+
+/**
   * Bloom filter header is stored at beginning of Bloom filter data of each column
   * and followed by its bitset.
   **/
@@ -274,6 +282,8 @@ struct BloomFilterPageHeader {
   2: required BloomFilterAlgorithm algorithm;
   /** The hash function used for Bloom filter. **/
   3: required BloomFilterHash hash;
+  /** The compression used in the Bloom filter **/
+  4: required BloomFilterCompression compression;
 }
 
 struct ColumnMetaData {
diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift
index da90acd..a062b4f 100644
--- a/src/main/thrift/parquet.thrift
+++ b/src/main/thrift/parquet.thrift
@@ -582,6 +582,15 @@ union BloomFilterHash {
   /** xxHash Strategy. **/
   1: XxHash XXHASH;
 }
+
+/**
+ * The compression used in the Bloom filter.
+ **/
+struct Uncompressed {}
+union BloomFilterCompression {
+  1: Uncompressed UNCOMPRESSED;
+}
+
 /**
   * Bloom filter header is stored at beginning of Bloom filter data of each column
   * and followed by its bitset.
@@ -593,6 +602,8 @@ struct BloomFilterHeader {
   2: required BloomFilterAlgorithm algorithm;
   /** The hash function used for Bloom filter. **/
   3: required BloomFilterHash hash;
+  /** The compression used in the Bloom filter **/
+  4: required BloomFilterCompression compression;
 }
 
 struct PageHeader {


Mime
View raw message