hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From na...@apache.org
Subject svn commit: r1294942 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/parse/ test/org/apache/hadoop/hive/ql/hooks/ test/queries/clientpositive/ test/results/clientpositive/
Date Wed, 29 Feb 2012 02:14:10 GMT
Author: namit
Date: Wed Feb 29 02:14:10 2012
New Revision: 1294942

URL: http://svn.apache.org/viewvc?rev=1294942&view=rev
Log:
HIVE-2825 Concatenating a partition does not inherit location from table
(Kevin Wilfong via namit)


Added:
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyPartitionIsSubdirectoryOfTableHook.java
    hive/trunk/ql/src/test/queries/clientpositive/concatenate_inherit_table_location.q
    hive/trunk/ql/src/test/results/clientpositive/concatenate_inherit_table_location.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1294942&r1=1294941&r2=1294942&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Wed Feb
29 02:14:10 2012
@@ -25,7 +25,6 @@ import static org.apache.hadoop.hive.ql.
 import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_SHOWDATABASES;
 
 import java.io.Serializable;
-import java.net.URI;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -33,9 +32,9 @@ import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Map.Entry;
 import java.util.Properties;
 import java.util.Set;
+import java.util.Map.Entry;
 
 import org.antlr.runtime.tree.CommonTree;
 import org.antlr.runtime.tree.Tree;
@@ -59,8 +58,8 @@ import org.apache.hadoop.hive.ql.exec.Ut
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.index.HiveIndex;
-import org.apache.hadoop.hive.ql.index.HiveIndex.IndexType;
 import org.apache.hadoop.hive.ql.index.HiveIndexHandler;
+import org.apache.hadoop.hive.ql.index.HiveIndex.IndexType;
 import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat;
 import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
 import org.apache.hadoop.hive.ql.metadata.Hive;
@@ -71,9 +70,7 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.ql.plan.AddPartitionDesc;
 import org.apache.hadoop.hive.ql.plan.AlterDatabaseDesc;
 import org.apache.hadoop.hive.ql.plan.AlterIndexDesc;
-import org.apache.hadoop.hive.ql.plan.AlterIndexDesc.AlterIndexTypes;
 import org.apache.hadoop.hive.ql.plan.AlterTableDesc;
-import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
 import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc;
 import org.apache.hadoop.hive.ql.plan.CreateDatabaseDesc;
 import org.apache.hadoop.hive.ql.plan.CreateIndexDesc;
@@ -109,6 +106,8 @@ import org.apache.hadoop.hive.ql.plan.St
 import org.apache.hadoop.hive.ql.plan.SwitchDatabaseDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.UnlockTableDesc;
+import org.apache.hadoop.hive.ql.plan.AlterIndexDesc.AlterIndexTypes;
+import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
 import org.apache.hadoop.hive.ql.security.authorization.Privilege;
 import org.apache.hadoop.hive.ql.security.authorization.PrivilegeRegistry;
 import org.apache.hadoop.hive.ql.session.SessionState;
@@ -1133,7 +1132,8 @@ public class DDLSemanticAnalyzer extends
         tableName, partSpec);
 
     List<String> inputDir = new ArrayList<String>();
-    String tblPartLoc = null;
+    Path oldTblPartLoc = null;
+    Path newTblPartLoc = null;
     Table tblObj = null;
 
     try {
@@ -1166,12 +1166,24 @@ public class DDLSemanticAnalyzer extends
           bucketCols = part.getBucketCols();
           inputFormatClass = part.getInputFormatClass();
           isArchived = ArchiveUtils.isArchived(part);
-          tblPartLoc = part.getPartitionPath().toString();
+
+          Path tabPath = tblObj.getPath();
+          Path partPath = part.getPartitionPath();
+
+          // if the table is in a different dfs than the partition,
+          // replace the partition's dfs with the table's dfs.
+          newTblPartLoc = new Path(tabPath.toUri().getScheme(), tabPath.toUri()
+              .getAuthority(), partPath.toUri().getPath());
+
+          oldTblPartLoc = partPath;
         }
       } else {
         inputFormatClass = tblObj.getInputFormatClass();
         bucketCols = tblObj.getBucketCols();
-        tblPartLoc = tblObj.getPath().toString();
+
+        // input and output are the same
+        oldTblPartLoc = tblObj.getPath();
+        newTblPartLoc = tblObj.getPath();
       }
 
       // throw a HiveException for non-rcfile.
@@ -1192,8 +1204,7 @@ public class DDLSemanticAnalyzer extends
             "Merge can not perform on archived partitions.");
       }
 
-      // input and output are the same
-      inputDir.add(tblPartLoc);
+      inputDir.add(oldTblPartLoc.toString());
 
       mergeDesc.setInputDir(inputDir);
 
@@ -1202,18 +1213,27 @@ public class DDLSemanticAnalyzer extends
       ddlWork.setNeedLock(true);
       Task<? extends Serializable> mergeTask = TaskFactory.get(ddlWork, conf);
       TableDesc tblDesc = Utilities.getTableDesc(tblObj);
-      String queryTmpdir = ctx.getExternalTmpFileURI((new Path(tblPartLoc)).toUri());
+      String queryTmpdir = ctx.getExternalTmpFileURI(newTblPartLoc.toUri());
       mergeDesc.setOutputDir(queryTmpdir);
       LoadTableDesc ltd = new LoadTableDesc(queryTmpdir, queryTmpdir, tblDesc,
           partSpec == null ? new HashMap<String, String>() : partSpec);
       Task<MoveWork> moveTsk = TaskFactory.get(new MoveWork(null, null, ltd, null,
false),
           conf);
       mergeTask.addDependentTask(moveTsk);
-      tableSpec tablepart = new tableSpec(this.db, conf, tablePartAST);
-      StatsWork statDesc = new StatsWork(tablepart);
-      statDesc.setNoStatsAggregator(true);
-      Task<? extends Serializable> statTask = TaskFactory.get(statDesc, conf);
-      moveTsk.addDependentTask(statTask);
+
+      if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
+        StatsWork statDesc;
+        if (oldTblPartLoc.equals(newTblPartLoc)) {
+          // If we're merging to the same location, we can avoid some metastore calls
+          tableSpec tablepart = new tableSpec(this.db, conf, tablePartAST);
+          statDesc = new StatsWork(tablepart);
+        } else {
+          statDesc = new StatsWork(ltd);
+        }
+        statDesc.setNoStatsAggregator(true);
+        Task<? extends Serializable> statTask = TaskFactory.get(statDesc, conf);
+        moveTsk.addDependentTask(statTask);
+      }
 
       rootTasks.add(mergeTask);
     } catch (Exception e) {

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyPartitionIsSubdirectoryOfTableHook.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyPartitionIsSubdirectoryOfTableHook.java?rev=1294942&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyPartitionIsSubdirectoryOfTableHook.java
(added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/hooks/VerifyPartitionIsSubdirectoryOfTableHook.java
Wed Feb 29 02:14:10 2012
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.hooks;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+
+// This hook verifies that the location of every partition in the inputs and outputs starts
with
+// the location of the table.  It is a very simple check to make sure it is a subdirectory.
+public class VerifyPartitionIsSubdirectoryOfTableHook implements ExecuteWithHookContext {
+
+  public void run(HookContext hookContext) {
+    for (WriteEntity output : hookContext.getOutputs()) {
+      if (output.getType() == WriteEntity.Type.PARTITION) {
+        verify (output.getPartition(), output.getTable());
+      }
+    }
+
+    for (ReadEntity input : hookContext.getInputs()) {
+      if (input.getType() == ReadEntity.Type.PARTITION) {
+        verify (input.getPartition(), input.getTable());
+      }
+    }
+  }
+
+  private void verify(Partition partition, Table table) {
+    Assert.assertTrue("The location of the partition: " + partition.getName() + " was not
a " +
+        "subdirectory of the location of the table: " + table.getTableName(),
+        partition.getPartitionPath().toString().startsWith(table.getPath().toString()));
+  }
+}

Added: hive/trunk/ql/src/test/queries/clientpositive/concatenate_inherit_table_location.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/concatenate_inherit_table_location.q?rev=1294942&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/concatenate_inherit_table_location.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/concatenate_inherit_table_location.q Wed
Feb 29 02:14:10 2012
@@ -0,0 +1,21 @@
+CREATE TABLE citl_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+STORED AS RCFILE
+LOCATION 'pfile:${system:test.tmp.dir}/citl_table';
+
+SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.VerifyPartitionIsSubdirectoryOfTableHook;
+
+INSERT OVERWRITE TABLE citl_table PARTITION (part = '1') SELECT * FROM src;
+
+SET hive.exec.post.hooks=;
+
+ALTER TABLE citl_table SET LOCATION 'file:${system:test.tmp.dir}/citl_table';
+
+ALTER TABLE citl_table PARTITION (part = '1') CONCATENATE;
+
+SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.VerifyPartitionIsSubdirectoryOfTableHook;
+
+SELECT count(*) FROM citl_table where part = '1';
+
+SET hive.exec.post.hooks=;
+
+DROP TABLE citl_table;

Added: hive/trunk/ql/src/test/results/clientpositive/concatenate_inherit_table_location.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/concatenate_inherit_table_location.q.out?rev=1294942&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/concatenate_inherit_table_location.q.out
(added)
+++ hive/trunk/ql/src/test/results/clientpositive/concatenate_inherit_table_location.q.out
Wed Feb 29 02:14:10 2012
@@ -0,0 +1,30 @@
+PREHOOK: query: CREATE TABLE citl_table (key STRING, value STRING) PARTITIONED BY (part STRING)
+STORED AS RCFILE
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE citl_table (key STRING, value STRING) PARTITIONED BY (part
STRING)
+STORED AS RCFILE
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@citl_table
+PREHOOK: query: INSERT OVERWRITE TABLE citl_table PARTITION (part = '1') SELECT * FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@citl_table@part=1
+#### A masked pattern was here ####
+PREHOOK: type: ALTERTABLE_LOCATION
+PREHOOK: Input: default@citl_table
+PREHOOK: Output: default@citl_table
+PREHOOK: query: ALTER TABLE citl_table PARTITION (part = '1') CONCATENATE
+PREHOOK: type: ALTER_PARTITION_MERGE
+PREHOOK: Input: default@citl_table
+PREHOOK: Output: default@citl_table@part=1
+PREHOOK: query: SELECT count(*) FROM citl_table where part = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@citl_table@part=1
+#### A masked pattern was here ####
+500
+PREHOOK: query: DROP TABLE citl_table
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@citl_table
+PREHOOK: Output: default@citl_table



Mime
View raw message