hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From the...@apache.org
Subject hive git commit: HIVE-10910 : Alter table drop partition queries in encrypted zone failing to remove data from HDFS (Eugene Koifman, reviewed by Gunther)
Date Mon, 08 Jun 2015 22:00:08 GMT
Repository: hive
Updated Branches:
  refs/heads/master a80210434 -> 7ae1d0b73


HIVE-10910 : Alter table drop partition queries in encrypted zone failing to remove data from
HDFS (Eugene Koifman, reviewed by Gunther)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7ae1d0b7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7ae1d0b7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7ae1d0b7

Branch: refs/heads/master
Commit: 7ae1d0b736143c3f2abcf972cac087aabfea5331
Parents: a802104
Author: Thejas Nair <thejas@hortonworks.com>
Authored: Mon Jun 8 14:59:18 2015 -0700
Committer: Thejas Nair <thejas@hortonworks.com>
Committed: Mon Jun 8 15:00:04 2015 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   3 +-
 .../hadoop/hive/metastore/HiveMetaStore.java    | 101 +++++++++++--------
 .../clientpositive/encryption_drop_partition.q  |  18 ++++
 .../encrypted/encryption_drop_partition.q.out   |  81 +++++++++++++++
 .../encrypted/encryption_drop_table.q.out       |   2 +-
 5 files changed, 159 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7ae1d0b7/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 784b502..b9f39fb 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -358,7 +358,8 @@ encrypted.query.files=encryption_join_unencrypted_tbl.q,\
   encryption_move_tbl.q \
   encryption_drop_table.q \
   encryption_insert_values.q \
-  encryption_drop_view.q
+  encryption_drop_view.q \
+  encryption_drop_partition.q
 
 beeline.positive.exclude=add_part_exist.q,\
   alter1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/7ae1d0b7/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index 85a734c..914f2e7 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -1501,17 +1501,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         if (tbl.getSd() == null) {
           throw new MetaException("Table metadata is corrupted");
         }
-
-        /**
-         * Trash may be skipped iff:
-         * 1. deleteData == true, obviously.
-         * 2. tbl is external.
-         * 3. Either
-         *  3.1. User has specified PURGE from the commandline, and if not,
-         *  3.2. User has set the table to auto-purge.
-         */
-        ifPurge = ((envContext != null) && Boolean.parseBoolean(envContext.getProperties().get("ifPurge")))
-          || (tbl.isSetParameters() && "true".equalsIgnoreCase(tbl.getParameters().get("auto.purge")));
+        ifPurge = isMustPurge(envContext, tbl);
 
         firePreEvent(new PreDropTableEvent(tbl, deleteData, this));
 
@@ -1546,19 +1536,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         }
 
         // tblPath will be null when tbl is a view. We skip the following if block in that
case.
-        if(tblPath != null && !ifPurge) {
-          String trashInterval = hiveConf.get("fs.trash.interval");
-          boolean trashEnabled = trashInterval != null && trashInterval.length()
> 0
-            && Float.parseFloat(trashInterval) > 0;
-          if (trashEnabled) {
-            HadoopShims.HdfsEncryptionShim shim =
-              ShimLoader.getHadoopShims().createHdfsEncryptionShim(FileSystem.get(hiveConf),
hiveConf);
-            if (shim.isPathEncrypted(tblPath)) {
-              throw new MetaException("Unable to drop table because it is in an encryption
zone" +
-                " and trash is enabled.  Use PURGE option to skip trash.");
-            }
-          }
-        }
+        checkTrashPurgeCombination(tblPath, dbname + "." + name, ifPurge);
         // Drop the partitions and get a list of locations which need to be deleted
         partPaths = dropPartitionsAndGetLocations(ms, dbname, name, tblPath,
             tbl.getPartitionKeys(), deleteData && !isExternal);
@@ -1590,6 +1568,41 @@ public class HiveMetaStore extends ThriftHiveMetastore {
     }
 
     /**
+     * Will throw MetaException if combination of trash policy/purge can't be satisfied
+     * @param pathToData path to data which may potentially be moved to trash
+     * @param objectName db.table, or db.table.part
+     * @param ifPurge if PURGE options is specified
+     */
+    private void checkTrashPurgeCombination(Path pathToData, String objectName, boolean ifPurge)
+      throws MetaException {
+      if (!(pathToData != null && !ifPurge)) {//pathToData may be NULL for a view
+        return;
+      }
+
+      boolean trashEnabled = false;
+      try {
+	trashEnabled = 0 < hiveConf.getFloat("fs.trash.interval", -1);
+      } catch(NumberFormatException ex) {
+	// nothing to do
+      }
+
+      if (trashEnabled) {
+        try {
+          HadoopShims.HdfsEncryptionShim shim =
+            ShimLoader.getHadoopShims().createHdfsEncryptionShim(FileSystem.get(hiveConf),
hiveConf);
+          if (shim.isPathEncrypted(pathToData)) {
+            throw new MetaException("Unable to drop " + objectName + " because it is in an
encryption zone" +
+              " and trash is enabled.  Use PURGE option to skip trash.");
+          }
+        } catch (IOException ex) {
+          MetaException e = new MetaException(ex.getMessage());
+          e.initCause(ex);
+          throw e;
+        }
+      }
+    }
+
+    /**
      * Deletes the data in a table's location, if it fails logs an error
      *
      * @param tablePath
@@ -2591,12 +2604,14 @@ public class HiveMetaStore extends ThriftHiveMetastore {
       Partition part = null;
       boolean isArchived = false;
       Path archiveParentDir = null;
+      boolean mustPurge = false;
 
       try {
         ms.openTransaction();
         part = ms.getPartition(db_name, tbl_name, part_vals);
         tbl = get_table_core(db_name, tbl_name);
         firePreEvent(new PreDropPartitionEvent(tbl, part, deleteData, this));
+        mustPurge = isMustPurge(envContext, tbl);
 
         if (part == null) {
           throw new NoSuchObjectException("Partition doesn't exist. "
@@ -2607,6 +2622,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         if (isArchived) {
           archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
           verifyIsWritablePath(archiveParentDir);
+          checkTrashPurgeCombination(archiveParentDir, db_name + "." + tbl_name + "." + part_vals,
mustPurge);
         }
         if (!ms.dropPartition(db_name, tbl_name, part_vals)) {
           throw new MetaException("Unable to drop partition");
@@ -2615,22 +2631,13 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         if ((part.getSd() != null) && (part.getSd().getLocation() != null)) {
           partPath = new Path(part.getSd().getLocation());
           verifyIsWritablePath(partPath);
+          checkTrashPurgeCombination(partPath, db_name + "." + tbl_name + "." + part_vals,
mustPurge);
         }
       } finally {
         if (!success) {
           ms.rollbackTransaction();
         } else if (deleteData && ((partPath != null) || (archiveParentDir != null)))
{
           if (tbl != null && !isExternal(tbl)) {
-            // Data needs deletion. Check if trash may be skipped.
-            // Trash may be skipped iff:
-            //  1. deleteData == true, obviously.
-            //  2. tbl is external.
-            //  3. Either
-            //    3.1. User has specified PURGE from the commandline, and if not,
-            //    3.2. User has set the table to auto-purge.
-            boolean mustPurge = ((envContext != null) && Boolean.parseBoolean(envContext.getProperties().get("ifPurge")))
-                                ||
-                                 (tbl.isSetParameters() && "true".equalsIgnoreCase(tbl.getParameters().get("auto.purge")));
             if (mustPurge) {
               LOG.info("dropPartition() will purge " + partPath + " directly, skipping trash.");
             }
@@ -2660,6 +2667,18 @@ public class HiveMetaStore extends ThriftHiveMetastore {
       return true;
     }
 
+    private static boolean isMustPurge(EnvironmentContext envContext, Table tbl) {
+      // Data needs deletion. Check if trash may be skipped.
+      // Trash may be skipped iff:
+      //  1. deleteData == true, obviously.
+      //  2. tbl is external.
+      //  3. Either
+      //    3.1. User has specified PURGE from the commandline, and if not,
+      //    3.2. User has set the table to auto-purge.
+      return ((envContext != null) && Boolean.parseBoolean(envContext.getProperties().get("ifPurge")))
+        || (tbl.isSetParameters() && "true".equalsIgnoreCase(tbl.getParameters().get("auto.purge")));
+
+    }
     private void deleteParentRecursive(Path parent, int depth, boolean mustPurge) throws
IOException, MetaException {
       if (depth > 0 && parent != null && wh.isWritable(parent) &&
wh.isEmpty(parent)) {
         wh.deleteDir(parent, true, mustPurge);
@@ -2702,10 +2721,12 @@ public class HiveMetaStore extends ThriftHiveMetastore {
       ms.openTransaction();
       Table tbl = null;
       List<Partition> parts = null;
+      boolean mustPurge = false;
       try {
         // We need Partition-s for firing events and for result; DN needs MPartition-s to
drop.
         // Great... Maybe we could bypass fetching MPartitions by issuing direct SQL deletes.
         tbl = get_table_core(dbName, tblName);
+        mustPurge = isMustPurge(envContext, tbl);
         int minCount = 0;
         RequestPartsSpec spec = request.getParts();
         List<String> partNames = null;
@@ -2770,11 +2791,13 @@ public class HiveMetaStore extends ThriftHiveMetastore {
           if (MetaStoreUtils.isArchived(part)) {
             Path archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
             verifyIsWritablePath(archiveParentDir);
+            checkTrashPurgeCombination(archiveParentDir, dbName + "." + tblName + "." + part.getValues(),
mustPurge);
             archToDelete.add(archiveParentDir);
           }
           if ((part.getSd() != null) && (part.getSd().getLocation() != null)) {
             Path partPath = new Path(part.getSd().getLocation());
             verifyIsWritablePath(partPath);
+            checkTrashPurgeCombination(partPath, dbName + "." + tblName + "." + part.getValues(),
mustPurge);
             dirsToDelete.add(new PathAndPartValSize(partPath, part.getValues().size()));
           }
         }
@@ -2790,16 +2813,6 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         if (!success) {
           ms.rollbackTransaction();
         } else if (deleteData && !isExternal(tbl)) {
-          // Data needs deletion. Check if trash may be skipped.
-          // Trash may be skipped iff:
-          //  1. deleteData == true, obviously.
-          //  2. tbl is external.
-          //  3. Either
-          //    3.1. User has specified PURGE from the commandline, and if not,
-          //    3.2. User has set the table to auto-purge.
-          boolean mustPurge = ((envContext != null) && Boolean.parseBoolean(envContext.getProperties().get("ifPurge")))
-                              ||
-                              (tbl.isSetParameters() && "true".equalsIgnoreCase(tbl.getParameters().get("auto.purge")));
           LOG.info( mustPurge?
                       "dropPartition() will purge partition-directories directly, skipping
trash."
                     :  "dropPartition() will move partition-directories to trash-directory.");

http://git-wip-us.apache.org/repos/asf/hive/blob/7ae1d0b7/ql/src/test/queries/clientpositive/encryption_drop_partition.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/encryption_drop_partition.q b/ql/src/test/queries/clientpositive/encryption_drop_partition.q
new file mode 100644
index 0000000..e1c1796
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/encryption_drop_partition.q
@@ -0,0 +1,18 @@
+-- SORT_QUERY_RESULTS;
+
+-- we're setting this so that TestNegaiveCliDriver.vm doesn't stop processing after DROP
TABLE fails;
+
+set hive.cli.errors.ignore=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+DROP TABLE IF EXISTS encrypted_table_dp PURGE;
+CREATE TABLE encrypted_table_dp (key INT, value STRING) partitioned by (p STRING) LOCATION
'${hiveconf:hive.metastore.warehouse.dir}/default/encrypted_table_dp';
+CRYPTO CREATE_KEY --keyName key_128 --bitLength 128;
+CRYPTO CREATE_ZONE --keyName key_128 --path ${hiveconf:hive.metastore.warehouse.dir}/default/encrypted_table_dp;
+
+INSERT INTO encrypted_table_dp PARTITION(p)(p,key,value) values('2014-09-23', 1, 'foo'),('2014-09-24',
2, 'bar');
+SELECT * FROM encrypted_table_dp;
+ALTER TABLE encrypted_table_dp DROP PARTITION (p='2014-09-23');
+SELECT * FROM encrypted_table_dp;
+ALTER TABLE encrypted_table_dp DROP PARTITION (p='2014-09-23') PURGE;
+SELECT * FROM encrypted_table_dp;

http://git-wip-us.apache.org/repos/asf/hive/blob/7ae1d0b7/ql/src/test/results/clientpositive/encrypted/encryption_drop_partition.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_drop_partition.q.out
b/ql/src/test/results/clientpositive/encrypted/encryption_drop_partition.q.out
new file mode 100644
index 0000000..067bf82
--- /dev/null
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_drop_partition.q.out
@@ -0,0 +1,81 @@
+PREHOOK: query: DROP TABLE IF EXISTS encrypted_table_dp PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS encrypted_table_dp PURGE
+POSTHOOK: type: DROPTABLE
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@encrypted_table_dp
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@encrypted_table_dp
+Encryption key created: 'key_128'
+Encryption zone created: '/build/ql/test/data/warehouse/default/encrypted_table_dp' using
key: 'key_128'
+PREHOOK: query: INSERT INTO encrypted_table_dp PARTITION(p)(p,key,value) values('2014-09-23',
1, 'foo'),('2014-09-24', 2, 'bar')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@encrypted_table_dp
+POSTHOOK: query: INSERT INTO encrypted_table_dp PARTITION(p)(p,key,value) values('2014-09-23',
1, 'foo'),('2014-09-24', 2, 'bar')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@encrypted_table_dp@p=2014-09-23
+POSTHOOK: Output: default@encrypted_table_dp@p=2014-09-24
+POSTHOOK: Lineage: encrypted_table_dp PARTITION(p=2014-09-23).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+POSTHOOK: Lineage: encrypted_table_dp PARTITION(p=2014-09-23).value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+POSTHOOK: Lineage: encrypted_table_dp PARTITION(p=2014-09-24).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+POSTHOOK: Lineage: encrypted_table_dp PARTITION(p=2014-09-24).value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+PREHOOK: query: SELECT * FROM encrypted_table_dp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encrypted_table_dp
+PREHOOK: Input: default@encrypted_table_dp@p=2014-09-23
+PREHOOK: Input: default@encrypted_table_dp@p=2014-09-24
+#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table_dp/.hive-staging
+POSTHOOK: query: SELECT * FROM encrypted_table_dp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encrypted_table_dp
+POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-23
+POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-24
+#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table_dp/.hive-staging
+1	foo	2014-09-23
+2	bar	2014-09-24
+PREHOOK: query: ALTER TABLE encrypted_table_dp DROP PARTITION (p='2014-09-23')
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: default@encrypted_table_dp
+PREHOOK: Output: default@encrypted_table_dp@p=2014-09-23
+FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Unable
to drop default.encrypted_table_dp.[2014-09-23] because it is in an encryption zone and trash
is enabled.  Use PURGE option to skip trash.
+PREHOOK: query: SELECT * FROM encrypted_table_dp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encrypted_table_dp
+PREHOOK: Input: default@encrypted_table_dp@p=2014-09-23
+PREHOOK: Input: default@encrypted_table_dp@p=2014-09-24
+#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table_dp/.hive-staging
+POSTHOOK: query: SELECT * FROM encrypted_table_dp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encrypted_table_dp
+POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-23
+POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-24
+#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table_dp/.hive-staging
+1	foo	2014-09-23
+2	bar	2014-09-24
+PREHOOK: query: ALTER TABLE encrypted_table_dp DROP PARTITION (p='2014-09-23') PURGE
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: default@encrypted_table_dp
+PREHOOK: Output: default@encrypted_table_dp@p=2014-09-23
+POSTHOOK: query: ALTER TABLE encrypted_table_dp DROP PARTITION (p='2014-09-23') PURGE
+POSTHOOK: type: ALTERTABLE_DROPPARTS
+POSTHOOK: Input: default@encrypted_table_dp
+POSTHOOK: Output: default@encrypted_table_dp@p=2014-09-23
+PREHOOK: query: SELECT * FROM encrypted_table_dp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encrypted_table_dp
+PREHOOK: Input: default@encrypted_table_dp@p=2014-09-24
+#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table_dp/.hive-staging
+POSTHOOK: query: SELECT * FROM encrypted_table_dp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encrypted_table_dp
+POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-24
+#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table_dp/.hive-staging
+2	bar	2014-09-24

http://git-wip-us.apache.org/repos/asf/hive/blob/7ae1d0b7/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out
index 9171e1b..55eefa0 100644
--- a/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out
@@ -36,7 +36,7 @@ PREHOOK: query: DROP TABLE default.encrypted_table
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@encrypted_table
 PREHOOK: Output: default@encrypted_table
-FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. MetaException(message:Unable
to drop table because it is in an encryption zone and trash is enabled.  Use PURGE option
to skip trash.)
+FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. MetaException(message:Unable
to drop default.encrypted_table because it is in an encryption zone and trash is enabled.
 Use PURGE option to skip trash.)
 PREHOOK: query: SHOW TABLES
 PREHOOK: type: SHOWTABLES
 PREHOOK: Input: database:default


Mime
View raw message