hadoop-hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zs...@apache.org
Subject svn commit: r733875 - in /hadoop/hive/trunk: ./ metastore/src/java/org/apache/hadoop/hive/metastore/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/metadata/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/java/or...
Date Mon, 12 Jan 2009 19:42:40 GMT
Author: zshao
Date: Mon Jan 12 11:42:24 2009
New Revision: 733875

URL: http://svn.apache.org/viewvc?rev=733875&view=rev
Log:
HIVE-142. Add a metastore check command. (Johan Oskarsson via zshao)

Added:
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java
    hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/dropTableDesc.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=733875&r1=733874&r2=733875&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Mon Jan 12 11:42:24 2009
@@ -9,6 +9,8 @@
 
   NEW FEATURES
 
+    HIVE-142. Add a metastore check command. (Johan Oskarsson via zshao)
+
     HIVE-148. Extend bin/hive to include the lineage tool.
     (Suresh Antony via zshao)
 

Modified: hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java?rev=733875&r1=733874&r2=733875&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java (original)
+++ hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java Mon
Jan 12 11:42:24 2009
@@ -22,6 +22,7 @@
 import java.io.IOException;
 import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Map.Entry;
 
 import org.apache.commons.lang.StringUtils;
@@ -109,7 +110,7 @@
     return false;
   }
 
-  public static String makePartName(LinkedHashMap<String, String> spec) throws MetaException
{
+  public static String makePartName(Map<String, String> spec) throws MetaException
{
     StringBuffer suffixBuf = new StringBuffer();
     for(Entry<String, String> e: spec.entrySet()) {
       if(e.getValue() == null  || e.getValue().length() == 0) {

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=733875&r1=733874&r2=733875&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Mon Jan 12 11:42:24
2009
@@ -18,12 +18,14 @@
 
 package org.apache.hadoop.hive.ql.exec;
 
+import java.io.BufferedWriter;
 import java.io.DataOutput;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.OutputStreamWriter;
 import java.io.Serializable;
+import java.io.Writer;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -41,11 +43,14 @@
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Order;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.ql.metadata.CheckResult;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreChecker;
 import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.plan.MsckDesc;
 import org.apache.hadoop.hive.ql.plan.DDLWork;
 import org.apache.hadoop.hive.ql.plan.alterTableDesc;
 import org.apache.hadoop.hive.ql.plan.createTableDesc;
@@ -102,6 +107,11 @@
       if (alterTbl != null) {
         return alterTable(db, alterTbl);
       }
+      
+      MsckDesc msckDesc = work.getMsckDesc();
+      if (msckDesc != null) {
+        return msck(db, fs, msckDesc);
+      }      
 
       descTableDesc descTbl = work.getDescTblDesc();
       if (descTbl != null) {
@@ -135,6 +145,95 @@
   }
 
   /**
+   * MetastoreCheck, see if the data in the metastore matches
+   * what is on the dfs.
+   * Current version checks for tables and partitions that
+   * are either missing on disk on in the metastore.
+   * 
+   * @param db The database in question.
+   * @param fs FileSystem that will contain the file written.
+   * @param msckDesc Information about the tables and partitions
+   * we want to check for.
+   * @return Returns 0 when execution succeeds and above 0 if it fails.
+   */
+  private int msck(Hive db, FileSystem fs, MsckDesc msckDesc) {
+    
+    CheckResult result = new CheckResult();
+    try {
+      HiveMetaStoreChecker checker = new HiveMetaStoreChecker(db, fs);
+      checker.checkMetastore(
+        MetaStoreUtils.DEFAULT_DATABASE_NAME, msckDesc.getTableName(), 
+        msckDesc.getPartitionSpec(),
+        result);
+    } catch (HiveException e) {
+      LOG.warn("Failed to run metacheck: ", e);
+      return 1;
+    } catch (IOException e) {
+      LOG.warn("Failed to run metacheck: ", e);
+      return 1;
+    } finally {
+            
+      BufferedWriter resultOut = null;
+      try {
+        resultOut = new BufferedWriter(
+            new OutputStreamWriter(fs.create(msckDesc.getResFile())));
+        
+        boolean firstWritten = false;
+        firstWritten |= writeMsckResult(result.getTablesNotInMs(), 
+            "Tables not in metastore:", resultOut, firstWritten);
+        firstWritten |= writeMsckResult(result.getTablesNotOnFs(), 
+            "Tables missing on filesystem:", resultOut, firstWritten);      
+        firstWritten |= writeMsckResult(result.getPartitionsNotInMs(), 
+            "Partitions not in metastore:", resultOut, firstWritten);
+        firstWritten |= writeMsckResult(result.getPartitionsNotOnFs(), 
+            "Partitions missing from filesystem:", resultOut, firstWritten);      
+      } catch (IOException e) {
+        LOG.warn("Failed to save metacheck output: ", e);
+        return 1;
+      } finally {
+        if(resultOut != null) {
+          try {
+            resultOut.close();
+          } catch (IOException e) {
+            LOG.warn("Failed to close output file: ", e);
+            return 1;
+          }
+        }
+      }
+    }
+    
+    return 0;
+  }
+
+  /**
+   * Write the result of msck to a writer.
+   * @param result The result we're going to write
+   * @param msg Message to write.
+   * @param out Writer to write to
+   * @param wrote if any previous call wrote data
+   * @return true if something was written
+   * @throws IOException In case the writing fails
+   */
+  private boolean writeMsckResult(List<? extends Object> result, String msg, 
+      Writer out, boolean wrote) throws IOException {
+    
+    if(!result.isEmpty()) { 
+      if(wrote) {
+        out.write(terminator);
+      }
+      
+      out.write(msg);
+      for (Object entry : result) {
+        out.write(separator);
+        out.write(entry.toString());
+      }
+      return true;
+    }
+    
+    return false;
+  }
+
+  /**
    * Write a list of partitions to a file.
    * 
    * @param db The database in question.
@@ -455,7 +554,7 @@
       // drop partitions in the list
       Table tbl = db.getTable(dropTbl.getTableName());
       List<Partition> parts = new ArrayList<Partition>();
-      for (HashMap<String, String> partSpec : dropTbl.getPartSpecs()) {
+      for (Map<String, String> partSpec : dropTbl.getPartSpecs()) {
         Partition part = db.getPartition(tbl, partSpec, false);
         if (part == null) {
           console.printInfo("Partition " + partSpec + " does not exist.");

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java?rev=733875&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java Mon
Jan 12 11:42:24 2009
@@ -0,0 +1,113 @@
+package org.apache.hadoop.hive.ql.metadata;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Result class used by the HiveMetaStoreChecker.
+ */
+public class CheckResult {
+
+  private List<String> tablesNotOnFs = new ArrayList<String>();
+  private List<String> tablesNotInMs = new ArrayList<String>();
+  private List<PartitionResult> partitionsNotOnFs = new ArrayList<PartitionResult>();
+  private List<PartitionResult> partitionsNotInMs = new ArrayList<PartitionResult>();
+  
+  /**
+   * @return a list of tables not found on the filesystem.
+   */
+  public List<String> getTablesNotOnFs() {
+    return tablesNotOnFs;
+  }
+  
+  /**
+   * @param tablesNotOnFs a list of tables not found on the filesystem.
+   */
+  public void setTablesNotOnFs(List<String> tablesNotOnFs) {
+    this.tablesNotOnFs = tablesNotOnFs;
+  }
+  
+  /**
+   * @return a list of tables not found in the metastore.
+   */
+  public List<String> getTablesNotInMs() {
+    return tablesNotInMs;
+  }
+  
+  /**
+   * @param tablesNotInMs a list of tables not found in the metastore.
+   */
+  public void setTablesNotInMs(List<String> tablesNotInMs) {
+    this.tablesNotInMs = tablesNotInMs;
+  }
+  
+  /**
+   * @return a list of partitions not found on the fs
+   */
+  public List<PartitionResult> getPartitionsNotOnFs() {
+    return partitionsNotOnFs;
+  }
+  
+  /**
+   * @param partitionsNotOnFs a list of partitions not found on the fs
+   */
+  public void setPartitionsNotOnFs(List<PartitionResult> partitionsNotOnFs) {
+    this.partitionsNotOnFs = partitionsNotOnFs;
+  }
+  
+  /**
+   * @return a list of partitions not found in the metastore
+   */
+  public List<PartitionResult> getPartitionsNotInMs() {
+    return partitionsNotInMs;
+  }
+  
+  /**
+   * @param partitionsNotInMs a list of partitions not found in the metastore
+   */
+  public void setPartitionsNotInMs(List<PartitionResult> partitionsNotInMs) {
+    this.partitionsNotInMs = partitionsNotInMs;
+  } 
+  
+  /**
+   * A basic description of a partition that is 
+   * missing from either the fs or the ms.
+   */
+  static class PartitionResult {
+    private String partitionName;
+    private String tableName;
+    
+    /**
+     * @return name of partition
+     */
+    public String getPartitionName() {
+      return partitionName;
+    }
+    
+    /**
+     * @param partitionName name of partition
+     */
+    public void setPartitionName(String partitionName) {
+      this.partitionName = partitionName;
+    }
+    
+    /**
+     * @return table name
+     */
+    public String getTableName() {
+      return tableName;
+    }
+    
+    /**
+     * @param tableName table name
+     */
+    public void setTableName(String tableName) {
+      this.tableName = tableName;
+    }
+    
+    public String toString() {
+      return tableName + ":" + partitionName;
+    }
+  }
+  
+}

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=733875&r1=733874&r2=733875&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Mon Jan 12
11:42:24 2009
@@ -23,6 +23,7 @@
 import java.util.AbstractMap;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.Properties;
 import java.util.Random;
 
@@ -507,14 +508,14 @@
    * @return created partition object
    * @throws HiveException if table doesn't exist or partition already exists
    */
-  public Partition createPartition(Table tbl, AbstractMap<String, String> partSpec)
throws HiveException {
+  public Partition createPartition(Table tbl, Map<String, String> partSpec) throws
HiveException {
     org.apache.hadoop.hive.metastore.api.Partition tpart = null;
     List<String> pvals = new ArrayList<String>();
     for (FieldSchema field : tbl.getPartCols()) {
       pvals.add(partSpec.get(field.getName()));
     }
     try {
-      tpart = getMSC().appendPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, tbl.getName(),
pvals);;
+      tpart = getMSC().appendPartition(tbl.getDbName(), tbl.getName(), pvals);
     } catch (Exception e) {
       LOG.error(StringUtils.stringifyException(e));
       throw new HiveException(e);
@@ -530,7 +531,7 @@
    * @return result partition object or null if there is no partition
    * @throws HiveException
    */
-  public Partition getPartition(Table tbl, AbstractMap<String, String> partSpec, boolean
forceCreate)
+  public Partition getPartition(Table tbl, Map<String, String> partSpec, boolean forceCreate)
       throws HiveException {
     if(!tbl.isValidSpec(partSpec)) {
       throw new HiveException("Invalid partition: " + partSpec);
@@ -545,10 +546,10 @@
     }
     org.apache.hadoop.hive.metastore.api.Partition tpart = null;
     try {
-      tpart = getMSC().getPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, tbl.getName(),
pvals);
+      tpart = getMSC().getPartition(tbl.getDbName(), tbl.getName(), pvals);
       if(tpart == null && forceCreate) {
         LOG.debug("creating partition for table "  + tbl.getName() + " with partition spec
: " + partSpec);
-        tpart = getMSC().appendPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, tbl.getName(),
pvals);;
+        tpart = getMSC().appendPartition(tbl.getDbName(), tbl.getName(), pvals);;
       }
       if(tpart == null){
         return null;

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java?rev=733875&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
(added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
Mon Jan 12 11:42:24 2009
@@ -0,0 +1,323 @@
+package org.apache.hadoop.hive.ql.metadata;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult;
+
+import com.facebook.thrift.TException;
+
+/**
+ * Verify that the information in the metastore matches what
+ * is on the filesystem. Return a CheckResult object
+ * containing lists of missing and any unexpected tables and partitions.
+ */
+public class HiveMetaStoreChecker {
+
+  public static final Log LOG = LogFactory.getLog(HiveMetaStoreChecker.class);
+
+  private Hive hive;
+  private FileSystem fs;
+
+  public HiveMetaStoreChecker(Hive hive, FileSystem fs) {
+    super();
+    this.hive = hive;
+    this.fs = fs;
+  }
+
+  /**
+   * Check the metastore for inconsistencies, data missing in either the
+   * metastore or on the dfs.
+   * 
+   * @param dbName
+   *          name of the database, if not specified the default will be used.
+   * @param tableName
+   *          Table we want to run the check for. If null we'll check all the
+   *          tables in the database.
+   * @param partitions List of partition name value pairs, 
+   * if null or empty check all partitions
+   * @param result Fill this with the results of the check 
+   * @throws HiveException Failed to get required information 
+   * from the metastore.
+   * @throws IOException Most likely filesystem related
+   */
+  public void checkMetastore(String dbName, String tableName,
+      List<Map<String, String>> partitions, CheckResult result) 
+    throws HiveException, IOException {
+
+    if (dbName == null || "".equalsIgnoreCase(dbName)) {
+      dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME;
+    }
+
+    try {
+      if (tableName == null || "".equals(tableName)) {
+        // no table specified, check all tables and all partitions.
+        List<String> tables = hive.getTablesForDb(dbName, ".*");
+        for (String currentTableName : tables) {
+          checkTable(dbName, currentTableName, null, result);
+        }
+  
+        findUnknownTables(dbName, tables, result);
+      } else if (partitions == null || partitions.isEmpty()) {
+        // only one table, let's check all partitions
+        checkTable(dbName, tableName, null, result);
+      } else {
+        // check the specified partitions
+        checkTable(dbName, tableName, partitions, result);
+      }
+    } catch (MetaException e) {
+      throw new HiveException(e);
+    } catch (TException e) {
+      throw new HiveException(e);
+    }
+  }
+
+  /**
+   * Check for table directories that aren't in the metastore.
+   * @param dbName Name of the database
+   * @param tables List of table names
+   * @param result Add any found tables to this
+   * @throws HiveException Failed to get required information 
+   * from the metastore.
+   * @throws IOException Most likely filesystem related
+   * @throws MetaException Failed to get required information 
+   * from the metastore.
+   * @throws NoSuchObjectException Failed to get required information 
+   * from the metastore.
+   * @throws TException Thrift communication error.
+   */
+  void findUnknownTables(String dbName, List<String> tables,
+      CheckResult result) throws IOException, MetaException, TException,
+      HiveException {
+
+    Set<Path> dbPaths = new HashSet<Path>();
+    Set<String> tableNames = new HashSet<String>(tables);
+
+    for (String tableName : tables) {
+      Table table = hive.getTable(dbName, tableName);
+      // hack, instead figure out a way to get the db paths
+      String isExternal = table.getParameters().get("EXTERNAL");
+      if (isExternal == null || "TRUE".equalsIgnoreCase(isExternal)) {
+        dbPaths.add(table.getPath().getParent());
+      }
+    }
+
+    for (Path dbPath : dbPaths) {
+      FileStatus[] statuses = fs.listStatus(dbPath);
+      for (FileStatus status : statuses) {
+        
+        if (status.isDir() 
+            && !tableNames.contains(status.getPath().getName())) {
+          
+          result.getTablesNotInMs().add(status.getPath().getName());
+        }
+      }
+    }
+  }
+
+  /**
+   * Check the metastore for inconsistencies, data missing in either the
+   * metastore or on the dfs.
+   * 
+   * @param dbName Name of the database
+   * @param tableName Name of the table
+   * @param partitions Partitions to check, if null or empty
+   * get all the partitions.
+   * @param result Result object
+   * @throws HiveException Failed to get required information 
+   * from the metastore.
+   * @throws IOException Most likely filesystem related
+   * @throws MetaException Failed to get required information 
+   * from the metastore.
+   */
+  void checkTable(String dbName, String tableName,
+      List<Map<String, String>> partitions, CheckResult result)
+      throws MetaException, IOException, HiveException {
+
+    Table table = null;
+
+    try {
+      table = hive.getTable(dbName, tableName);
+    } catch (HiveException e) {
+      result.getTablesNotInMs().add(tableName);
+      return;
+    }
+
+    List<Partition> parts = new ArrayList<Partition>();
+    boolean findUnknownPartitions = true;
+    
+    if (table.isPartitioned()) {
+      if (partitions == null || partitions.isEmpty()) {
+        // no partitions specified, let's get all
+        parts = hive.getPartitions(table);
+      } else {
+        //we're interested in specific partitions,
+        //don't check for any others
+        findUnknownPartitions = false;
+        for (Map<String, String> map : partitions) {
+          Partition part = hive.getPartition(table, map, false);
+          if(part == null) {
+            PartitionResult pr = new PartitionResult();
+            pr.setTableName(tableName);
+            pr.setPartitionName(Warehouse.makePartName(map));
+            result.getPartitionsNotInMs().add(pr);
+          } else {
+            parts.add(part);
+          }
+        }
+      }
+    }
+
+    checkTable(table, parts, findUnknownPartitions, result);
+  }
+
+  /**
+   * Check the metastore for inconsistencies, data missing in either the
+   * metastore or on the dfs.
+   * 
+   * @param table Table to check
+   * @param parts Partitions to check
+   * @param result Result object
+   * @param findUnknownPartitions Should we try to find unknown partitions?
+   * @throws IOException Could not get information from filesystem
+   * @throws HiveException Could not create Partition object
+   */
+  void checkTable(Table table, List<Partition> parts, 
+      boolean findUnknownPartitions, CheckResult result) 
+    throws IOException, HiveException {
+
+    Path tablePath = table.getPath();
+    if (!fs.exists(tablePath)) {
+      result.getTablesNotOnFs().add(table.getName());
+      return;
+    }
+
+    Set<Path> partPaths = new HashSet<Path>();
+
+    // check that the partition folders exist on disk
+    for (Partition partition : parts) {
+      if(partition == null) {
+        //most likely the user specified an invalid partition
+        continue;
+      }
+      Path partPath = partition.getPartitionPath();
+      if (!fs.exists(partPath)) {
+        PartitionResult pr = new PartitionResult();
+        pr.setPartitionName(partition.getName());
+        pr.setTableName(partition.getTable().getName());
+        result.getPartitionsNotOnFs().add(pr);
+      }
+
+      for (int i = 0; i < partition.getSpec().size(); i++) {
+        partPaths.add(partPath.makeQualified(fs));
+        partPath = partPath.getParent();
+      }
+    }
+
+    if(findUnknownPartitions) {
+      findUnknownPartitions(table, partPaths, result);
+    }
+  }
+
+  /**
+   * Find partitions on the fs that are
+   * unknown to the metastore
+   * @param table Table where the partitions would be located
+   * @param partPaths Paths of the partitions the ms knows about
+   * @param result Result object 
+   * @throws IOException Thrown if we fail at fetching listings from
+   * the fs.
+   */
+  void findUnknownPartitions(Table table, Set<Path> partPaths, 
+      CheckResult result) throws IOException {
+    
+    Path tablePath = table.getPath();
+    // now check the table folder and see if we find anything
+    // that isn't in the metastore
+    Set<Path> allPartDirs = new HashSet<Path>();
+    getAllLeafDirs(tablePath, allPartDirs);
+    // don't want the table dir
+    allPartDirs.remove(tablePath);
+
+    // remove the partition paths we know about
+    allPartDirs.removeAll(partPaths);
+    
+    // we should now only have the unexpected folders left
+    for (Path partPath : allPartDirs) {
+      String partitionName = getPartitionName(fs.makeQualified(tablePath), 
+          partPath);
+      
+      if (partitionName != null) {
+        PartitionResult pr = new PartitionResult();
+        pr.setPartitionName(partitionName);
+        pr.setTableName(table.getName());
+        
+        result.getPartitionsNotInMs().add(pr);
+      }
+    }
+  }
+
+  /**
+   * Get the partition name from the path.
+   * 
+   * @param tablePath Path of the table.
+   * @param partitionPath Path of the partition.
+   * @return Partition name, for example partitiondate=2008-01-01
+   */
+  private String getPartitionName(Path tablePath, Path partitionPath) {
+    String result = null;
+    Path currPath = partitionPath;
+    while (currPath != null && !tablePath.equals(currPath)) {
+      if(result == null) {
+        result = currPath.getName();
+      } else {
+        result = currPath.getName() + Path.SEPARATOR + result;
+      }
+      
+      currPath = currPath.getParent();
+    }
+    return result;
+  }
+
+  /**
+   * Recursive method to get the leaf directories of a base path.
+   * Example:
+   * base/dir1/dir2
+   * base/dir3
+   * 
+   * This will return dir2 and dir3 but not dir1.
+   * 
+   * @param basePath Start directory
+   * @param allDirs This set will contain the leaf paths at the end.
+   * @throws IOException Thrown if we can't get lists from the fs.
+   */
+  private void getAllLeafDirs(Path basePath, Set<Path> allDirs)
+      throws IOException {
+    
+    FileStatus[] statuses = fs.listStatus(basePath);
+
+    if (statuses.length == 0) {
+      allDirs.add(basePath);
+    }
+
+    for (FileStatus status : statuses) {
+      if (status.isDir()) {
+        getAllLeafDirs(status.getPath(), allDirs);
+      }
+    }
+  }
+
+}

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java?rev=733875&r1=733874&r2=733875&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java Mon Jan 12
11:42:24 2009
@@ -241,7 +241,7 @@
     return outputFormatClass;
   }
 
-  final public boolean isValidSpec(AbstractMap<String, String> spec) throws HiveException
{
+  final public boolean isValidSpec(Map<String, String> spec) throws HiveException {
 
     // TODO - types need to be checked.
     List<FieldSchema> partCols = getTTable().getPartitionKeys();

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=733875&r1=733874&r2=733875&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
(original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
Mon Jan 12 11:42:24 2009
@@ -25,6 +25,7 @@
 import java.util.List;
 import java.util.Map;
 
+import org.antlr.runtime.tree.CommonTree;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -35,6 +36,7 @@
 import org.apache.hadoop.hive.metastore.api.Order;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.plan.MsckDesc;
 import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat;
 import org.apache.hadoop.hive.ql.plan.DDLWork;
 import org.apache.hadoop.hive.ql.plan.alterTableDesc;
@@ -97,8 +99,10 @@
     {
       ctx.setResFile(new Path(getTmpFileName()));
       analyzeShowTables(ast);
-    }
-    else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_RENAME)
+    } else if (ast.getToken().getType() == HiveParser.TOK_MSCK) {
+      ctx.setResFile(new Path(getTmpFileName()));
+      analyzeMetastoreCheck(ast);    
+    } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_RENAME)
       analyzeAlterTableRename(ast);
     else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_ADDCOLS)
       analyzeAlterTableModifyCols(ast, alterTableTypes.ADDCOLS);
@@ -495,13 +499,40 @@
     alterTableDesc alterTblDesc = new alterTableDesc(tblName, newCols, alterType);
     rootTasks.add(TaskFactory.get(new DDLWork(alterTblDesc), conf));
   }
-
+  
   private void analyzeAlterTableDropParts(ASTNode ast) throws SemanticException {
-    String tblName = null;
-    List<HashMap<String, String>> partSpecs = new ArrayList<HashMap<String,
String>>();
-    int childIndex = 0;
+    String tblName = unescapeIdentifier(ast.getChild(0).getText());
     // get table metadata
-    tblName = unescapeIdentifier(ast.getChild(0).getText());
+    List<Map<String, String>> partSpecs = getPartitionSpecs(ast);
+    dropTableDesc dropTblDesc = new dropTableDesc(tblName, partSpecs);
+    rootTasks.add(TaskFactory.get(new DDLWork(dropTblDesc), conf));
+  }
+  
+  /**
+   * Verify that the information in the metastore matches up
+   * with the data on the fs.
+   * @param ast Query tree.
+   * @throws SemanticException
+   */
+  private void analyzeMetastoreCheck(CommonTree ast) throws SemanticException {
+    String tableName = null;
+    if(ast.getChildCount() > 0) {
+      tableName = unescapeIdentifier(ast.getChild(0).getText());
+    }
+    List<Map<String, String>> specs = getPartitionSpecs(ast);
+    MsckDesc checkDesc = new MsckDesc(tableName, specs, ctx.getResFile());
+    rootTasks.add(TaskFactory.get(new DDLWork(checkDesc), conf));   
+  }
+
+  /**
+   * Get the partition specs from the tree.
+   * @param ast Tree to extract partitions from.
+   * @return A list of partition name to value mappings.
+   * @throws SemanticException
+   */
+  private List<Map<String, String>> getPartitionSpecs(CommonTree ast) throws
SemanticException {
+    List<Map<String, String>> partSpecs = new ArrayList<Map<String, String>>();
+    int childIndex = 0;
     // get partition metadata if partition specified
     for( childIndex = 1; childIndex < ast.getChildCount(); childIndex++) {
       ASTNode partspec = (ASTNode) ast.getChild(childIndex);
@@ -513,7 +544,6 @@
       }
       partSpecs.add(partSpec);
     }
-    dropTableDesc dropTblDesc = new dropTableDesc(tblName, partSpecs);
-    rootTasks.add(TaskFactory.get(new DDLWork(dropTblDesc), conf));
+    return partSpecs;
   }
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g?rev=733875&r1=733874&r2=733875&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g Mon Jan 12 11:42:24
2009
@@ -88,6 +88,7 @@
 TOK_ALTERTABLE_SERDEPROPERTIES;
 TOK_ALTERTABLE_SERIALIZER;
 TOK_ALTERTABLE_PROPERTIES;
+TOK_MSCK;
 TOK_SHOWTABLES;
 TOK_SHOWPARTITIONS;
 TOK_CREATEEXTTABLE;
@@ -163,6 +164,7 @@
     | alterStatement
     | descStatement
     | showStatement
+    | metastoreCheck
     | createFunctionStatement
     ;
 
@@ -229,6 +231,11 @@
     | KW_SHOW KW_PARTITIONS Identifier -> ^(TOK_SHOWPARTITIONS Identifier)
     ;
     
+metastoreCheck
+    : KW_MSCK (KW_TABLE table=Identifier partitionSpec? (COMMA partitionSpec)*)?
+    -> ^(TOK_MSCK ($table partitionSpec*)?)
+    ;     
+    
 createFunctionStatement
     : KW_CREATE KW_TEMPORARY KW_FUNCTION Identifier KW_AS StringLiteral
     -> ^(TOK_CREATEFUNCTION Identifier StringLiteral)
@@ -850,6 +857,7 @@
 KW_TABLE: 'TABLE';
 KW_TABLES: 'TABLES';
 KW_SHOW: 'SHOW';
+KW_MSCK: 'MSCK';
 KW_DIRECTORY: 'DIRECTORY';
 KW_LOCAL: 'LOCAL';
 KW_TRANSFORM : 'TRANSFORM';

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java?rev=733875&r1=733874&r2=733875&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
(original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
Mon Jan 12 11:42:24 2009
@@ -34,6 +34,7 @@
       case HiveParser.TOK_CREATEEXTTABLE: 
       case HiveParser.TOK_DROPTABLE: 
       case HiveParser.TOK_DESCTABLE: 
+      case HiveParser.TOK_MSCK:
       case HiveParser.TOK_ALTERTABLE_ADDCOLS:
       case HiveParser.TOK_ALTERTABLE_REPLACECOLS:
       case HiveParser.TOK_ALTERTABLE_RENAME:

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java?rev=733875&r1=733874&r2=733875&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java Mon Jan 12 11:42:24
2009
@@ -28,6 +28,7 @@
   private showTablesDesc       showTblsDesc;
   private showPartitionsDesc   showPartsDesc;
   private descTableDesc        descTblDesc;
+  private MsckDesc msckDesc;
 
   public DDLWork() { }
 
@@ -73,6 +74,10 @@
     this.showPartsDesc = showPartsDesc;
   }
 
+  public DDLWork(MsckDesc checkDesc) {
+    this.msckDesc = checkDesc;
+  }
+
   /**
    * @return the createTblDesc
    */
@@ -163,5 +168,19 @@
   public void setDescTblDesc(descTableDesc descTblDesc) {
     this.descTblDesc = descTblDesc;
   }
+
+  /**
+   * @return Metastore check description
+   */
+  public MsckDesc getMsckDesc() {
+    return msckDesc;
+  }
+
+  /**
+   * @param msckDesc metastore check description
+   */
+  public void setMsckDesc(MsckDesc msckDesc) {
+    this.msckDesc = msckDesc;
+  }
   
 }

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java?rev=733875&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java Mon Jan 12
11:42:24 2009
@@ -0,0 +1,69 @@
+package org.apache.hadoop.hive.ql.plan;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+
+public class MsckDesc {
+
+  private String tableName;
+  private List<Map<String, String>> partitionSpec;
+  private Path resFile;
+  
+  /**
+   * Description of a msck command.
+   * @param tableName Table to check, can be null.
+   * @param partSpecs Partition specification, can be null. 
+   * @param resFile Where to save the output of the command
+   */
+  public MsckDesc(String tableName, List<Map<String, String>> partSpecs, Path
resFile) {
+    super();
+    this.tableName = tableName;
+    this.partitionSpec = partSpecs;
+    this.resFile = resFile;
+  }
+
+  /**
+   * @return the table to check
+   */
+  public String getTableName() {
+    return tableName;
+  }
+
+  /**
+   * @param tableName the table to check
+   */
+  public void setTableName(String tableName) {
+    this.tableName = tableName;
+  }
+
+  /**
+   * @return partitions to check.
+   */
+  public List<Map<String, String>> getPartitionSpec() {
+    return partitionSpec;
+  }
+
+  /**
+   * @param partitionSpec partitions to check.
+   */
+  public void setPartitionSpec(List<Map<String, String>> partitionSpec) {
+    this.partitionSpec = partitionSpec;
+  }
+
+  /**
+   * @return file to save command output to
+   */
+  public Path getResFile() {
+    return resFile;
+  }
+
+  /**
+   * @param resFile file to save command output to
+   */
+  public void setResFile(Path resFile) {
+    this.resFile = resFile;
+  }
+  
+}

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/dropTableDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/dropTableDesc.java?rev=733875&r1=733874&r2=733875&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/dropTableDesc.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/dropTableDesc.java Mon Jan
12 11:42:24 2009
@@ -21,6 +21,7 @@
 import java.io.Serializable;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 @explain(displayName="Drop Table")
 public class dropTableDesc extends ddlDesc implements Serializable 
@@ -28,7 +29,7 @@
   private static final long serialVersionUID = 1L;
   
   String            tableName;
-  List<HashMap<String, String>> partSpecs;
+  List<Map<String, String>> partSpecs;
 
   /**
    * @param tableName
@@ -38,7 +39,7 @@
     this.partSpecs = null;
   }
 
-  public dropTableDesc(String tableName, List<HashMap<String, String>> partSpecs)
{
+  public dropTableDesc(String tableName, List<Map<String, String>> partSpecs)
{
     this.tableName = tableName;
     this.partSpecs = partSpecs;
   }
@@ -61,14 +62,14 @@
   /**
    * @return the partSpecs
    */
-  public List<HashMap<String, String>> getPartSpecs() {
+  public List<Map<String, String>> getPartSpecs() {
     return partSpecs;
   }
 
   /**
    * @param partSpecs the partSpecs to set
    */
-  public void setPartSpecs(List<HashMap<String, String>> partSpecs) {
+  public void setPartSpecs(List<Map<String, String>> partSpecs) {
     this.partSpecs = partSpecs;
   }
 }

Added: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java?rev=733875&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
(added)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
Mon Jan 12 11:42:24 2009
@@ -0,0 +1,216 @@
+package org.apache.hadoop.hive.ql.metadata;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+
+import com.facebook.thrift.TException;
+
+public class TestHiveMetaStoreChecker extends TestCase {
+
+  private Hive hive;
+  private FileSystem fs;
+  private HiveMetaStoreChecker checker = null;
+
+  private String dbName = "dbname";
+  private String tableName = "tablename";
+
+  private String partDateName = "partdate";
+  private String partCityName = "partcity";
+
+  private List<FieldSchema> partCols;
+  private List<Map<String, String>> parts;
+
+  @Override
+  protected void setUp() throws Exception {
+    super.setUp();
+    hive = Hive.get();
+    fs = FileSystem.getLocal(hive.getConf());
+    checker = new HiveMetaStoreChecker(hive, fs);
+
+    partCols = new ArrayList<FieldSchema>();
+    partCols.add(new FieldSchema(partDateName, Constants.STRING_TYPE_NAME, 
+        ""));
+    partCols.add(new FieldSchema(partCityName, Constants.STRING_TYPE_NAME, 
+        ""));
+
+    parts = new ArrayList<Map<String, String>>();
+    Map<String, String> part1 = new HashMap<String, String>();
+    part1.put(partDateName, "2008-01-01");
+    part1.put(partCityName, "london");
+    parts.add(part1);
+    Map<String, String> part2 = new HashMap<String, String>();
+    part2.put(partDateName, "2008-01-02");
+    part2.put(partCityName, "stockholm");
+    parts.add(part2);
+
+    // cleanup
+    hive.dropTable(dbName, tableName, true, true);
+    hive.dropDatabase(dbName);
+  }
+
+  @Override
+  protected void tearDown() throws Exception {
+    super.tearDown();
+    Hive.closeCurrent();
+  }
+
+  public void testTableCheck() throws HiveException, MetaException,
+      IOException, TException, AlreadyExistsException {
+
+    CheckResult result = new CheckResult();
+    checker.checkMetastore(dbName, null, null, result);
+    // we haven't added anything so should return an all ok
+    assertTrue(result.getTablesNotInMs().isEmpty());
+    assertTrue(result.getTablesNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotInMs().isEmpty());
+
+    // check table only, should not exist in ms
+    result = new CheckResult();
+    checker.checkMetastore(dbName, tableName, null, result);
+    assertEquals(1, result.getTablesNotInMs().size());
+    assertEquals(tableName, result.getTablesNotInMs().get(0));
+    assertTrue(result.getTablesNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotInMs().isEmpty());
+
+    hive.createDatabase(dbName, "");
+
+    Table table = new Table(tableName);
+    table.getTTable().setDbName(dbName);
+    table.setInputFormatClass(TextInputFormat.class);
+    table.setOutputFormatClass(TextOutputFormat.class);
+
+    hive.createTable(table);
+    // now we've got a table, check that it works
+    // first check all (1) tables
+    result = new CheckResult();
+    checker.checkMetastore(dbName, null, null, result);
+    assertTrue(result.getTablesNotInMs().isEmpty());
+    assertTrue(result.getTablesNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotInMs().isEmpty());
+
+    // then let's check the one we know about
+    result = new CheckResult();
+    checker.checkMetastore(dbName, tableName, null, result);
+    assertTrue(result.getTablesNotInMs().isEmpty());
+    assertTrue(result.getTablesNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotInMs().isEmpty());
+
+    // remove the table folder
+    fs.delete(table.getPath(), true);
+
+    // now this shouldn't find the path on the fs
+    result = new CheckResult();
+    checker.checkMetastore(dbName, tableName, null, result);
+    assertTrue(result.getTablesNotInMs().isEmpty());
+    assertEquals(1, result.getTablesNotOnFs().size());
+    assertEquals(tableName, result.getTablesNotOnFs().get(0));
+    assertTrue(result.getPartitionsNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotInMs().isEmpty());
+
+    // put it back and one additional table
+    fs.mkdirs(table.getPath());
+    Path fakeTable = table.getPath().getParent().suffix(
+        Path.SEPARATOR + "faketable");
+    fs.mkdirs(fakeTable);
+
+    // find the extra table
+    result = new CheckResult();
+    checker.checkMetastore(dbName, null, null, result);
+    assertEquals(1, result.getTablesNotInMs().size());
+    assertEquals(fakeTable.getName(), result.getTablesNotInMs().get(0));
+    assertTrue(result.getTablesNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotInMs().isEmpty());
+  }
+
+  public void testPartitionsCheck() throws HiveException, MetaException,
+      IOException, TException, AlreadyExistsException {
+
+    hive.createDatabase(dbName, "");
+
+    Table table = new Table(tableName);
+    table.getTTable().setDbName(dbName);
+    table.setInputFormatClass(TextInputFormat.class);
+    table.setOutputFormatClass(TextOutputFormat.class);
+    table.setPartCols(partCols);
+
+    hive.createTable(table);
+    table = hive.getTable(dbName, tableName);
+
+    for (Map<String, String> partSpec : parts) {
+      hive.createPartition(table, partSpec);
+    }
+
+    CheckResult result = new CheckResult();
+    checker.checkMetastore(dbName, tableName, null, result);
+    // all is well
+    assertTrue(result.getTablesNotInMs().isEmpty());
+    assertTrue(result.getTablesNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotInMs().isEmpty());
+
+    List<Partition> partitions = hive.getPartitions(table);
+    assertEquals(2, partitions.size());
+    Partition partToRemove = partitions.get(0);
+    Path partToRemovePath = new Path(partToRemove.getDataLocation().toString());
+    fs.delete(partToRemovePath, true);
+
+    result = new CheckResult();    
+    checker.checkMetastore(dbName, tableName, null, result);
+    // missing one partition on fs
+    assertTrue(result.getTablesNotInMs().isEmpty());
+    assertTrue(result.getTablesNotOnFs().isEmpty());
+    assertEquals(1, result.getPartitionsNotOnFs().size());
+    assertEquals(partToRemove.getName(), result.getPartitionsNotOnFs().get(0)
+        .getPartitionName());
+    assertEquals(partToRemove.getTable().getName(), result
+        .getPartitionsNotOnFs().get(0).getTableName());
+    assertTrue(result.getPartitionsNotInMs().isEmpty());
+
+    List<Map<String, String>> partsCopy = new ArrayList<Map<String, String>>();
+    partsCopy.add(partitions.get(1).getSpec());
+    // check only the partition that exists, all should be well
+    result = new CheckResult();
+    checker.checkMetastore(dbName, tableName, partsCopy, result);
+    assertTrue(result.getTablesNotInMs().isEmpty());
+    assertTrue(result.getTablesNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotInMs().isEmpty());
+
+    // put the other one back
+    fs.mkdirs(partToRemovePath);
+
+    // add a partition dir on fs
+    Path fakePart = new Path(table.getDataLocation().toString(),
+        "fakepartition=fakevalue");
+    fs.mkdirs(fakePart);
+
+    checker.checkMetastore(dbName, tableName, null, result);
+    // one extra partition
+    assertTrue(result.getTablesNotInMs().isEmpty());
+    assertTrue(result.getTablesNotOnFs().isEmpty());
+    assertTrue(result.getPartitionsNotOnFs().isEmpty());
+    assertEquals(1, result.getPartitionsNotInMs().size());
+    assertEquals(fakePart.getName(), result.getPartitionsNotInMs().get(0)
+        .getPartitionName());
+  }
+
+}



Mime
View raw message