parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ziva...@apache.org
Subject [parquet-format] branch encryption updated: PARQUET-1419: enable old readers to access unencrypted columns in files with plaint… (#109)
Date Fri, 26 Oct 2018 09:11:50 GMT
This is an automated email from the ASF dual-hosted git repository.

zivanfi pushed a commit to branch encryption
in repository https://gitbox.apache.org/repos/asf/parquet-format.git


The following commit(s) were added to refs/heads/encryption by this push:
     new 411d99e  PARQUET-1419: enable old readers to access unencrypted columns in files
with plaint… (#109)
411d99e is described below

commit 411d99e580bb52a61c0b3e4f1a791cbf21bc0f58
Author: ggershinsky <ggershinsky@users.noreply.github.com>
AuthorDate: Fri Oct 26 12:11:45 2018 +0300

    PARQUET-1419: enable old readers to access unencrypted columns in files with plaint…
(#109)
---
 src/main/thrift/parquet.thrift | 83 ++++++++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 36 deletions(-)

diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift
index c05e871..9d67a54 100644
--- a/src/main/thrift/parquet.thrift
+++ b/src/main/thrift/parquet.thrift
@@ -561,7 +561,7 @@ struct PageHeader {
   /** Uncompressed page size in bytes (not including this header) **/
   2: required i32 uncompressed_page_size
 
-  /** Compressed page size in bytes (not including this header) **/
+  /** Compressed (and potentially encrypted) page size in bytes, not including this header
**/
   3: required i32 compressed_page_size
 
   /** 32bit crc for the data below. This allows for disabling checksumming in HDFS
@@ -638,7 +638,8 @@ struct ColumnMetaData {
   /** total byte size of all uncompressed pages in this column chunk (including the headers)
**/
   6: required i64 total_uncompressed_size
 
-  /** total byte size of all compressed pages in this column chunk (including the headers)
**/
+  /** total byte size of all compressed, and potentially encrypted, pages 
+   *  in this column chunk (including the headers) **/
   7: required i64 total_compressed_size
 
   /** Optional key/value metadata **/
@@ -730,7 +731,8 @@ struct RowGroup {
    * in this row group **/
   5: optional i64 file_offset
 
-  /** Total byte size of all compressed column data in this row group **/
+  /** Total byte size of all compressed (and potentially encrypted) column data 
+   *  in this row group **/
   6: optional i64 total_compressed_size
 }
 
@@ -860,6 +862,31 @@ struct ColumnIndex {
   5: optional list<i64> null_counts
 }
 
+struct AesGcmV1 {
+  /** Retrieval metadata of AAD used for encryption of pages and structures **/
+  1: optional binary aad_metadata
+
+  /** If file IVs are comprised of a fixed part, and variable parts
+   *  (e.g. counter), keep the fixed part here **/
+  2: optional binary iv_prefix
+}
+
+struct AesGcmCtrV1 {
+  /** Retrieval metadata of AAD used for encryption of structures **/
+  1: optional binary aad_metadata
+
+  /** If file IVs are comprised of a fixed part, and variable parts
+   *  (e.g. counter), keep the fixed part here **/
+  2: optional binary gcm_iv_prefix
+
+  3: optional binary ctr_iv_prefix
+}
+
+union EncryptionAlgorithm {
+  1: AesGcmV1 AES_GCM_V1
+  2: AesGcmCtrV1 AES_GCM_CTR_V1
+}
+
 /**
  * Description for file metadata
  */
@@ -902,46 +929,30 @@ struct FileMetaData {
    * The obsolete min and max fields are always sorted by signed comparison
    * regardless of column_orders.
    */
-  7: optional list<ColumnOrder> column_orders;
-}
-
-struct AesGcmV1 {
-  /** Retrieval metadata of AAD used for encryption of pages and structures **/
-  1: optional binary aad_metadata
-
-  /** If file IVs are comprised of a fixed part, and variable parts
-   *  (e.g. counter), keep the fixed part here **/
-  2: optional binary iv_prefix
- 
-}
-
-struct AesGcmCtrV1 {
-  /** Retrieval metadata of AAD used for encryption of structures **/
-  1: optional binary aad_metadata
-
-  /** If file IVs are comprised of a fixed part, and variable parts
-   *  (e.g. counter), keep the fixed part here **/
-  2: optional binary gcm_iv_prefix
-
-  3: optional binary ctr_iv_prefix
-}
-
-union EncryptionAlgorithm {
-  1: AesGcmV1 AES_GCM_V1
-  2: AesGcmCtrV1 AES_GCM_CTR_V1
+  7: optional list<ColumnOrder> column_orders
+  
+  /** 
+   * Encryption algorithm. Note that this field is only used for files
+   * with plaintext footer. Files with encrypted footer store the algorithm id
+   * in FileCryptoMetaData structure.
+   */
+  8: optional EncryptionAlgorithm encryption_algorithm
 }
 
+/** Crypto metadata for files with encrypted footer **/
 struct FileCryptoMetaData {
+  /** 
+   * Encryption algorithm. Note that this field is only used for files
+   * with encrypted footer. Files with plaintext footer store the algorithm id
+   * inside footer (FileMetaData structure).
+   */
   1: required EncryptionAlgorithm encryption_algorithm
-  
-  /** Parquet footer can be encrypted, or left as plaintext **/
-  2: required bool encrypted_footer
     
   /** Retrieval metadata of key used for encryption of footer, 
    *  and (possibly) columns **/
-  3: optional binary footer_key_metadata
+  2: optional binary footer_key_metadata
 
-  /** Offset of Parquet footer (encrypted, or plaintext) **/
-  4: required i64 footer_offset
+  /** Offset of encrypted Parquet footer **/
+  3: required i64 footer_offset
 }
 


Mime
View raw message