hive-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Xiaomeng Huang (JIRA)" <j...@apache.org>
Subject [jira] [Updated] (HIVE-7934) Improve column level encryption with key management
Date Wed, 10 Dec 2014 07:14:13 GMT

     [ https://issues.apache.org/jira/browse/HIVE-7934?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Xiaomeng Huang updated HIVE-7934:
---------------------------------
    Description: 
Now HIVE-6329 is a framework of column level encryption/decryption. But the implementation
in HIVE-6329 is just use Base64, it is not safe and have some problems:
- Base64WriteOnly just be able to get the ciphertext from client for any users. 
- Base64Rewriter just be able to get plaintext from client for any users.

I have an improvement based on HIVE-6329 using key management via kms.
This patch implement transparent column level encryption. Users don't need to set anything
when they quey tables.
# setup kms and set kms-acls.xml (e.g. user1 and root has permission to get key)
{code}
 <property>
    <name>hadoop.kms.acl.GET</name>
    <value>user1 root</value>
    <description>
      ACL for get-key-version and get-current-key operations.
    </description>
  </property>
{code}
# set hive-site.xml 
{code}
 <property>  
    <name>hadoop.security.key.provider.path</name>  
    <value>kms://http@localhost:16000/kms</value>  
 </property> 
{code}
# create an encrypted table
{code}
drop table student_column_encrypt;
create table student_column_encrypt (s_key INT, s_name STRING, s_country STRING, s_age INT)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
  WITH SERDEPROPERTIES ('column.encode.columns'='s_country,s_age', 'column.encode.classname'='org.apache.hadoop.hive.serde2.crypto.CryptoRewriter')

  STORED AS TEXTFILE TBLPROPERTIES('hive.encrypt.keynames'='hive.k1');
insert overwrite table student_column_encrypt 
select 
  s_key, s_name, s_country, s_age
from student;
             
select * from student_column_encrypt; 
{code}
# query table by different user, this is transparent to users. It is very convenient and don't
need to set anything.
{code}
[root@huang1 hive_data]# hive
hive> select * from student_column_encrypt;       
OK
0	Armon	China	20
1	Jack	USA	21
2	Lucy	England	22
3	Lily	France	23
4	Yom	Spain	24
Time taken: 0.759 seconds, Fetched: 5 row(s)

[root@huang1 hive_data]# su user2
[user2@huang1 hive_data]$ hive
hive> select * from student_column_encrypt;
OK
0	Armon	dqyb188=	NULL
1	Jack	YJez	NULL
2	Lucy	cKqV1c8MTw==	NULL
3	Lily	c7aT180H	NULL
4	Yom	ZrST0MA=	NULL
Time taken: 0.77 seconds, Fetched: 5 row(s)
{code}

  was:
Now HIVE-6329 is a framework of column level encryption/decryption. But the implementation
in HIVE-6329 is just use Base64, it is not safe and have some problems:
- Base64WriteOnly just be able to get the ciphertext from client for any users. 
- Base64Rewriter just be able to get plaintext from client for any users.

I have an improvement based on HIVE-6329 using key management via kms.
# setup kms and set kms-acls.xml (e.g. user1 and root has permission to get key)
{code}
 <property>
    <name>hadoop.kms.acl.GET</name>
    <value>user1 root</value>
    <description>
      ACL for get-key-version and get-current-key operations.
    </description>
  </property>
{code}
# set hive-site.xml 
{code}
 <property>  
    <name>hadoop.security.kms.uri</name>  
    <value>http://localhost:16000/kms</value>  
 </property> 
{code}
# create an encrypted table
{code}
-- region-aes-column.q
drop table region_aes_column;
create table region_aes_column (r_regionkey int, r_name string) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
  WITH SERDEPROPERTIES ('column.encode.columns'='r_name', 'column.encode.classname'='org.apache.hadoop.hive.serde2.aes.AESRewriter')
  STORED AS TEXTFILE TBLPROPERTIES("hive.encrypt.keynames"="hive.k1");
insert overwrite table region_aes_column
select
  r_regionkey, r_name
from region;
{code}
# query table by different user, this is transparent to users. It is very convenient and don't
need to set anything.
{code}
[root@huang1 hive_data]# hive
hive> select * from region_aes_column;
OK
0	AFRICA
1	AMERICA
2	ASIA
3	EUROPE
4	MIDDLE EAST
Time taken: 0.9 seconds, Fetched: 5 row(s)

[root@huang1 hive_data]# su user1
[user1@huang1 hive_data]$ hive
hive> select * from region_aes_column;
OK
0	AFRICA
1	AMERICA
2	ASIA
3	EUROPE
4	MIDDLE EAST
Time taken: 0.899 seconds, Fetched: 5 row(s)

[root@huang1 hive_data]# su user2
[user2@huang1 hive_data]$ hive
hive> select * from region_aes_column;
OK
0	RcQycWVD
1	Rc8lam9Bxg==
2	RdEpeQ==
3	Qdcyd3ZH
4	ScskfGpHp8KIIuY=
Time taken: 0.749 seconds, Fetched: 5 row(s)
{code}


> Improve column level encryption with key management
> ---------------------------------------------------
>
>                 Key: HIVE-7934
>                 URL: https://issues.apache.org/jira/browse/HIVE-7934
>             Project: Hive
>          Issue Type: Improvement
>            Reporter: Xiaomeng Huang
>            Assignee: Xiaomeng Huang
>            Priority: Minor
>
> Now HIVE-6329 is a framework of column level encryption/decryption. But the implementation
in HIVE-6329 is just use Base64, it is not safe and have some problems:
> - Base64WriteOnly just be able to get the ciphertext from client for any users. 
> - Base64Rewriter just be able to get plaintext from client for any users.
> I have an improvement based on HIVE-6329 using key management via kms.
> This patch implement transparent column level encryption. Users don't need to set anything
when they quey tables.
> # setup kms and set kms-acls.xml (e.g. user1 and root has permission to get key)
> {code}
>  <property>
>     <name>hadoop.kms.acl.GET</name>
>     <value>user1 root</value>
>     <description>
>       ACL for get-key-version and get-current-key operations.
>     </description>
>   </property>
> {code}
> # set hive-site.xml 
> {code}
>  <property>  
>     <name>hadoop.security.key.provider.path</name>  
>     <value>kms://http@localhost:16000/kms</value>  
>  </property> 
> {code}
> # create an encrypted table
> {code}
> drop table student_column_encrypt;
> create table student_column_encrypt (s_key INT, s_name STRING, s_country STRING, s_age
INT) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
>   WITH SERDEPROPERTIES ('column.encode.columns'='s_country,s_age', 'column.encode.classname'='org.apache.hadoop.hive.serde2.crypto.CryptoRewriter')

>   STORED AS TEXTFILE TBLPROPERTIES('hive.encrypt.keynames'='hive.k1');
> insert overwrite table student_column_encrypt 
> select 
>   s_key, s_name, s_country, s_age
> from student;
>              
> select * from student_column_encrypt; 
> {code}
> # query table by different user, this is transparent to users. It is very convenient
and don't need to set anything.
> {code}
> [root@huang1 hive_data]# hive
> hive> select * from student_column_encrypt;       
> OK
> 0	Armon	China	20
> 1	Jack	USA	21
> 2	Lucy	England	22
> 3	Lily	France	23
> 4	Yom	Spain	24
> Time taken: 0.759 seconds, Fetched: 5 row(s)
> [root@huang1 hive_data]# su user2
> [user2@huang1 hive_data]$ hive
> hive> select * from student_column_encrypt;
> OK
> 0	Armon	dqyb188=	NULL
> 1	Jack	YJez	NULL
> 2	Lucy	cKqV1c8MTw==	NULL
> 3	Lily	c7aT180H	NULL
> 4	Yom	ZrST0MA=	NULL
> Time taken: 0.77 seconds, Fetched: 5 row(s)
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message