From commits-return-1448-archive-asf-public=cust-asf.ponee.io@parquet.apache.org Fri Oct 26 11:11:52 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id DBA80180647 for ; Fri, 26 Oct 2018 11:11:51 +0200 (CEST) Received: (qmail 49633 invoked by uid 500); 26 Oct 2018 09:11:51 -0000 Mailing-List: contact commits-help@parquet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@parquet.apache.org Delivered-To: mailing list commits@parquet.apache.org Received: (qmail 49623 invoked by uid 99); 26 Oct 2018 09:11:50 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 26 Oct 2018 09:11:50 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 3CE0B8516B; Fri, 26 Oct 2018 09:11:50 +0000 (UTC) Date: Fri, 26 Oct 2018 09:11:50 +0000 To: "commits@parquet.apache.org" Subject: =?utf-8?q?=5Bparquet-format=5D_branch_encryption_updated=3A_PARQ?= =?utf-8?q?UET-1419=3A_enable_old_readers_to_access_unencrypted_columns_in?= =?utf-8?q?_files_with_plaint=E2=80=A6_=28=23109=29?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <154054511019.21895.18367418311222936644@gitbox.apache.org> From: zivanfi@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: parquet-format X-Git-Refname: refs/heads/encryption X-Git-Reftype: branch X-Git-Oldrev: 4bd026caff8816c4d2a4f0f7d6c75818896579d9 X-Git-Newrev: 411d99e580bb52a61c0b3e4f1a791cbf21bc0f58 X-Git-Rev: 411d99e580bb52a61c0b3e4f1a791cbf21bc0f58 X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. zivanfi pushed a commit to branch encryption in repository https://gitbox.apache.org/repos/asf/parquet-format.git The following commit(s) were added to refs/heads/encryption by this push: new 411d99e PARQUET-1419: enable old readers to access unencrypted columns in files with plaint… (#109) 411d99e is described below commit 411d99e580bb52a61c0b3e4f1a791cbf21bc0f58 Author: ggershinsky AuthorDate: Fri Oct 26 12:11:45 2018 +0300 PARQUET-1419: enable old readers to access unencrypted columns in files with plaint… (#109) --- src/main/thrift/parquet.thrift | 83 ++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift index c05e871..9d67a54 100644 --- a/src/main/thrift/parquet.thrift +++ b/src/main/thrift/parquet.thrift @@ -561,7 +561,7 @@ struct PageHeader { /** Uncompressed page size in bytes (not including this header) **/ 2: required i32 uncompressed_page_size - /** Compressed page size in bytes (not including this header) **/ + /** Compressed (and potentially encrypted) page size in bytes, not including this header **/ 3: required i32 compressed_page_size /** 32bit crc for the data below. This allows for disabling checksumming in HDFS @@ -638,7 +638,8 @@ struct ColumnMetaData { /** total byte size of all uncompressed pages in this column chunk (including the headers) **/ 6: required i64 total_uncompressed_size - /** total byte size of all compressed pages in this column chunk (including the headers) **/ + /** total byte size of all compressed, and potentially encrypted, pages + * in this column chunk (including the headers) **/ 7: required i64 total_compressed_size /** Optional key/value metadata **/ @@ -730,7 +731,8 @@ struct RowGroup { * in this row group **/ 5: optional i64 file_offset - /** Total byte size of all compressed column data in this row group **/ + /** Total byte size of all compressed (and potentially encrypted) column data + * in this row group **/ 6: optional i64 total_compressed_size } @@ -860,6 +862,31 @@ struct ColumnIndex { 5: optional list null_counts } +struct AesGcmV1 { + /** Retrieval metadata of AAD used for encryption of pages and structures **/ + 1: optional binary aad_metadata + + /** If file IVs are comprised of a fixed part, and variable parts + * (e.g. counter), keep the fixed part here **/ + 2: optional binary iv_prefix +} + +struct AesGcmCtrV1 { + /** Retrieval metadata of AAD used for encryption of structures **/ + 1: optional binary aad_metadata + + /** If file IVs are comprised of a fixed part, and variable parts + * (e.g. counter), keep the fixed part here **/ + 2: optional binary gcm_iv_prefix + + 3: optional binary ctr_iv_prefix +} + +union EncryptionAlgorithm { + 1: AesGcmV1 AES_GCM_V1 + 2: AesGcmCtrV1 AES_GCM_CTR_V1 +} + /** * Description for file metadata */ @@ -902,46 +929,30 @@ struct FileMetaData { * The obsolete min and max fields are always sorted by signed comparison * regardless of column_orders. */ - 7: optional list column_orders; -} - -struct AesGcmV1 { - /** Retrieval metadata of AAD used for encryption of pages and structures **/ - 1: optional binary aad_metadata - - /** If file IVs are comprised of a fixed part, and variable parts - * (e.g. counter), keep the fixed part here **/ - 2: optional binary iv_prefix - -} - -struct AesGcmCtrV1 { - /** Retrieval metadata of AAD used for encryption of structures **/ - 1: optional binary aad_metadata - - /** If file IVs are comprised of a fixed part, and variable parts - * (e.g. counter), keep the fixed part here **/ - 2: optional binary gcm_iv_prefix - - 3: optional binary ctr_iv_prefix -} - -union EncryptionAlgorithm { - 1: AesGcmV1 AES_GCM_V1 - 2: AesGcmCtrV1 AES_GCM_CTR_V1 + 7: optional list column_orders + + /** + * Encryption algorithm. Note that this field is only used for files + * with plaintext footer. Files with encrypted footer store the algorithm id + * in FileCryptoMetaData structure. + */ + 8: optional EncryptionAlgorithm encryption_algorithm } +/** Crypto metadata for files with encrypted footer **/ struct FileCryptoMetaData { + /** + * Encryption algorithm. Note that this field is only used for files + * with encrypted footer. Files with plaintext footer store the algorithm id + * inside footer (FileMetaData structure). + */ 1: required EncryptionAlgorithm encryption_algorithm - - /** Parquet footer can be encrypted, or left as plaintext **/ - 2: required bool encrypted_footer /** Retrieval metadata of key used for encryption of footer, * and (possibly) columns **/ - 3: optional binary footer_key_metadata + 2: optional binary footer_key_metadata - /** Offset of Parquet footer (encrypted, or plaintext) **/ - 4: required i64 footer_offset + /** Offset of encrypted Parquet footer **/ + 3: required i64 footer_offset }