Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 2917F171E4 for ; Mon, 29 Sep 2014 19:54:54 +0000 (UTC) Received: (qmail 2595 invoked by uid 500); 29 Sep 2014 19:54:54 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 2547 invoked by uid 500); 29 Sep 2014 19:54:54 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 2536 invoked by uid 99); 29 Sep 2014 19:54:54 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 29 Sep 2014 19:54:53 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 29 Sep 2014 19:54:52 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 3A1882388A2C; Mon, 29 Sep 2014 19:54:32 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1628282 - in /hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql: exec/MoveTask.java metadata/Hive.java Date: Mon, 29 Sep 2014 19:54:32 -0000 To: commits@hive.apache.org From: gunther@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140929195432.3A1882388A2C@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: gunther Date: Mon Sep 29 19:54:31 2014 New Revision: 1628282 URL: http://svn.apache.org/r1628282 Log: HIVE-7389: Reduce number of metastore calls in MoveTask (when loading dynamic partitions) (Rajesh Balamohan via Gunther Hagleitner) Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java?rev=1628282&r1=1628281&r2=1628282&view=diff ============================================================================== --- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (original) +++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java Mon Sep 29 19:54:31 2014 @@ -353,6 +353,7 @@ public class MoveTask extends Task> dp = + Map, Partition> dp = db.loadDynamicPartitions( tbd.getSourcePath(), tbd.getTable().getTableName(), @@ -370,16 +371,19 @@ public class MoveTask extends Task partSpec: dp) { - Partition partn = db.getPartition(table, partSpec, false); + for(Map.Entry, Partition> entry : dp.entrySet()) { + Partition partn = entry.getValue(); if (bucketCols != null || sortCols != null) { updatePartitionBucketSortColumns(table, partn, bucketCols, numBuckets, sortCols); @@ -412,8 +416,10 @@ public class MoveTask extends Task partVals = MetaStoreUtils.getPvals(table.getPartCols(), Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1628282&r1=1628281&r2=1628282&view=diff ============================================================================== --- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original) +++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Mon Sep 29 19:54:31 2014 @@ -1237,6 +1237,15 @@ public class Hive { return getDatabase(currentDb); } + public void loadPartition(Path loadPath, String tableName, + Map partSpec, boolean replace, boolean holdDDLTime, + boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, + boolean isSrcLocal, boolean isAcid) throws HiveException { + Table tbl = getTable(tableName); + loadPartition(loadPath, tbl, partSpec, replace, holdDDLTime, inheritTableSpecs, + isSkewedStoreAsSubdir, isSrcLocal, isAcid); + } + /** * Load a directory into a Hive Table Partition - Alters existing content of * the partition with the contents of loadPath. - If the partition does not @@ -1245,7 +1254,7 @@ public class Hive { * * @param loadPath * Directory containing files to load into Table - * @param tableName + * @param tbl * name of table to be loaded. * @param partSpec * defines which partition needs to be loaded @@ -1258,12 +1267,12 @@ public class Hive { * @param isSrcLocal * If the source directory is LOCAL */ - public void loadPartition(Path loadPath, String tableName, + public Partition loadPartition(Path loadPath, Table tbl, Map partSpec, boolean replace, boolean holdDDLTime, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, boolean isSrcLocal, boolean isAcid) throws HiveException { - Table tbl = getTable(tableName); Path tblDataLocationPath = tbl.getDataLocation(); + Partition newTPart = null; try { /** * Move files before creating the partition since down stream processes @@ -1312,10 +1321,10 @@ public class Hive { Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid); } + boolean forceCreate = (!holdDDLTime) ? true : false; + newTPart = getPartition(tbl, partSpec, forceCreate, newPartPath.toString(), inheritTableSpecs); // recreate the partition if it existed before if (!holdDDLTime) { - Partition newTPart = getPartition(tbl, partSpec, true, newPartPath.toString(), - inheritTableSpecs); if (isSkewedStoreAsSubdir) { org.apache.hadoop.hive.metastore.api.Partition newCreatedTpart = newTPart.getTPartition(); SkewedInfo skewedInfo = newCreatedTpart.getSd().getSkewedInfo(); @@ -1325,9 +1334,9 @@ public class Hive { /* Add list bucketing location mappings. */ skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps); newCreatedTpart.getSd().setSkewedInfo(skewedInfo); - alterPartition(tbl.getTableName(), new Partition(tbl, newCreatedTpart)); + alterPartition(tbl.getDbName(), tbl.getTableName(), new Partition(tbl, newCreatedTpart)); newTPart = getPartition(tbl, partSpec, true, newPartPath.toString(), inheritTableSpecs); - newCreatedTpart = newTPart.getTPartition(); + return new Partition(tbl, newCreatedTpart); } } } catch (IOException e) { @@ -1340,7 +1349,7 @@ public class Hive { LOG.error(StringUtils.stringifyException(e)); throw new HiveException(e); } - + return newTPart; } /** @@ -1436,18 +1445,18 @@ private void constructOneLBLocationMap(F * @param replace * @param numDP number of dynamic partitions * @param holdDDLTime - * @return a list of strings with the dynamic partition paths + * @return partition map details (PartitionSpec and Partition) * @throws HiveException */ - public ArrayList> loadDynamicPartitions(Path loadPath, + public Map, Partition> loadDynamicPartitions(Path loadPath, String tableName, Map partSpec, boolean replace, int numDP, boolean holdDDLTime, boolean listBucketingEnabled, boolean isAcid) throws HiveException { Set validPartitions = new HashSet(); try { - ArrayList> fullPartSpecs = - new ArrayList>(); + Map, Partition> partitionsMap = new + LinkedHashMap, Partition>(); FileSystem fs = loadPath.getFileSystem(conf); FileStatus[] leafStatus = HiveStatsUtils.getFileStatusRecurse(loadPath, numDP+1, fs); @@ -1481,6 +1490,7 @@ private void constructOneLBLocationMap(F + " to at least " + validPartitions.size() + '.'); } + Table tbl = getTable(tableName); // for each dynamically created DP directory, construct a full partition spec // and load the partition based on that Iterator iter = validPartitions.iterator(); @@ -1493,14 +1503,12 @@ private void constructOneLBLocationMap(F // generate a full partition specification LinkedHashMap fullPartSpec = new LinkedHashMap(partSpec); Warehouse.makeSpecFromName(fullPartSpec, partPath); - fullPartSpecs.add(fullPartSpec); - - // finally load the partition -- move the file to the final table address - loadPartition(partPath, tableName, fullPartSpec, replace, holdDDLTime, true, - listBucketingEnabled, false, isAcid); + Partition newPartition = loadPartition(partPath, tbl, fullPartSpec, replace, + holdDDLTime, true, listBucketingEnabled, false, isAcid); + partitionsMap.put(fullPartSpec, newPartition); LOG.info("New loading path = " + partPath + " with partSpec " + fullPartSpec); } - return fullPartSpecs; + return partitionsMap; } catch (IOException e) { throw new HiveException(e); }