hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zs...@apache.org
Subject svn commit: r768451 - in /hadoop/hive/branches/branch-0.3: ./ metastore/src/java/org/apache/hadoop/hive/metastore/ ql/src/java/org/apache/hadoop/hive/ql/metadata/ ql/src/test/results/clientnegative/
Date Sat, 25 Apr 2009 02:53:55 GMT
Author: zshao
Date: Sat Apr 25 02:53:54 2009
New Revision: 768451

URL: http://svn.apache.org/viewvc?rev=768451&view=rev
Log:
HIVE-442. Move the data before creating the partition. (Prasad Chakka via zshao)

Modified:
    hadoop/hive/branches/branch-0.3/CHANGES.txt
    hadoop/hive/branches/branch-0.3/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
    hadoop/hive/branches/branch-0.3/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
    hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
    hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
    hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
    hadoop/hive/branches/branch-0.3/ql/src/test/results/clientnegative/invalid_create_tbl1.q.out

Modified: hadoop/hive/branches/branch-0.3/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/CHANGES.txt?rev=768451&r1=768450&r2=768451&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.3/CHANGES.txt (original)
+++ hadoop/hive/branches/branch-0.3/CHANGES.txt Sat Apr 25 02:53:54 2009
@@ -172,6 +172,9 @@
     HIVE-416. Fix backtrack in Hive.g
     (Zheng Shao via namit)
 
+    HIVE-442. Move the data before creating the partition.
+    (Prasad Chakka via zshao)
+
 Release 0.2.0 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/branches/branch-0.3/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=768451&r1=768450&r2=768451&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.3/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
(original)
+++ hadoop/hive/branches/branch-0.3/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
Sat Apr 25 02:53:54 2009
@@ -462,6 +462,10 @@
           if(tbl == null) {
             throw new InvalidObjectException("Unable to add partition because table or database
do not exist");
           }
+          if (part.getSd().getLocation() == null) {
+            // set default location if not specified
+            part.getSd().setLocation(Warehouse.makePartName(tbl.getPartitionKeys(), part.getValues()));
+          }
           // add partition
           success = getMS().addPartition(part);
           if(success) {

Modified: hadoop/hive/branches/branch-0.3/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java?rev=768451&r1=768450&r2=768451&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.3/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
(original)
+++ hadoop/hive/branches/branch-0.3/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
Sat Apr 25 02:53:54 2009
@@ -157,7 +157,7 @@
     StringBuilder name = new StringBuilder();
     for(int i=0; i< partCols.size(); i++) {
       if(i > 0) {
-        name.append('/');
+        name.append(Path.SEPARATOR);
       }
       name.append((partCols.get(i)).getName());
       name.append('=');

Modified: hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=768451&r1=768450&r2=768451&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
(original)
+++ hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
Sat Apr 25 02:53:54 2009
@@ -25,7 +25,6 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
-import java.util.Random;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -258,8 +257,6 @@
       if (!ifNotExists) {
         throw new HiveException(e);
       }
-    } catch (HiveException e) {
-      throw e;
     } catch (Exception e) {
       throw new HiveException(e);
     }
@@ -272,7 +269,7 @@
    * @deprecated Use {@link #dropTable(String, String)} instead
    */
   public void dropTable(String tableName) throws HiveException {
-    dropTable(tableName, true, true);
+    dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName, true, true);
   }
 
   
@@ -510,13 +507,29 @@
       AbstractMap<String, String> partSpec, boolean replace,
       Path tmpDirPath)
   throws HiveException {
-    Table tbl = getTable(tableName);
-    Partition part = getPartition(tbl, partSpec, true);
-    if(replace) {
-      part.replaceFiles(loadPath, tmpDirPath);
-    } else {
-      part.copyFiles(loadPath);
-    }
+    Table tbl = getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName);
+    try {
+      FileSystem fs = FileSystem.get(tbl.getDataLocation(), getConf());
+      Path partPath = new Path(tbl.getDataLocation().getPath(), Warehouse.makePartName(partSpec));
+      /** Move files before creating the partition since down stream processes check 
+       *  for existence of partition in metadata before accessing the data. If partition
+       *  is created before data is moved, downstream waiting processes might move forward
+       *  with partial data
+       */
+      if(replace) {
+        Hive.replaceFiles(loadPath, partPath, fs, tmpDirPath);
+      } else {
+        Hive.copyFiles(loadPath, partPath, fs);
+      }
+      // create a partition if it doesn't exist
+      getPartition(tbl, partSpec, true);
+    } catch (IOException e) {
+      LOG.error(StringUtils.stringifyException(e));
+      throw new HiveException(e);
+    } catch (MetaException e) {
+      LOG.error(StringUtils.stringifyException(e));
+      throw new HiveException(e);
+    } 
   }
 
   /**
@@ -533,7 +546,7 @@
   public void loadTable(Path loadPath, String tableName, 
        boolean replace,
        Path tmpDirPath) throws HiveException {
-    Table tbl = getTable(tableName);
+    Table tbl = getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName);
     if(replace) {
       tbl.replaceFiles(loadPath, tmpDirPath);
     } else {
@@ -550,14 +563,7 @@
    */
   public Partition createPartition(Table tbl, Map<String, String> partSpec)
     throws HiveException {
-    
-    try {
-      String loc = tbl.getTTable().getSd().getLocation() +
-        Path.SEPARATOR + Warehouse.makePartName(partSpec);
-      return createPartition(tbl, partSpec, new Path(loc));
-    } catch (MetaException e) {
-      throw new HiveException("Could not create partition location");
-    }
+      return createPartition(tbl, partSpec, null);
   }
   
   /**
@@ -676,7 +682,7 @@
     }
   }
 
-  private void checkPaths(FileSystem fs, FileStatus [] srcs, Path destf, boolean replace)
throws HiveException {
+  static private void checkPaths(FileSystem fs, FileStatus [] srcs, Path destf, boolean replace)
throws HiveException {
     try {
         for(int i=0; i<srcs.length; i++) {
             FileStatus [] items = fs.listStatus(srcs[i].getPath());
@@ -703,7 +709,7 @@
     }
 }
 
-  protected void copyFiles(Path srcf, Path destf, FileSystem fs) throws HiveException {
+  static protected void copyFiles(Path srcf, Path destf, FileSystem fs) throws HiveException
{
     FileStatus[] srcs;
     try {
       srcs = fs.globStatus(srcf);
@@ -740,7 +746,7 @@
    * @param fs The filesystem handle
    * @param tmppath Temporary directory
    */
-  protected void replaceFiles(Path srcf, Path destf, FileSystem fs,
+  static protected void replaceFiles(Path srcf, Path destf, FileSystem fs,
       Path tmppath) throws HiveException {
       FileStatus [] srcs;
       try {

Modified: hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java?rev=768451&r1=768450&r2=768451&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
(original)
+++ hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
Sat Apr 25 02:53:54 2009
@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.hive.ql.metadata;
 
-import java.io.IOException;
 import java.net.URI;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -90,7 +89,7 @@
     for (FieldSchema field : tbl.getPartCols()) {
       pvals.add(partSpec.get(field.getName()));
     }
-
+    
     org.apache.hadoop.hive.metastore.api.Partition tpart = 
       new org.apache.hadoop.hive.metastore.api.Partition();
     tpart.setDbName(tbl.getDbName());
@@ -111,7 +110,11 @@
     } 
 
     tpart.setSd(sd);
-    tpart.getSd().setLocation(location.toString());
+    if (location != null) {
+      tpart.getSd().setLocation(location.toString());
+    } else {
+      tpart.getSd().setLocation(null);
+    }
 
     initialize(tbl, tpart);
   }
@@ -126,28 +129,34 @@
       org.apache.hadoop.hive.metastore.api.Partition tp) 
   throws HiveException {
 
-    this.table = tbl;
-    this.tPartition = tp;
-    this.partName = "";
+    table = tbl;
+    tPartition = tp;
+    partName = "";
 
     if(tbl.isPartitioned()) {
       try {
-        this.partName = Warehouse.makePartName(tbl.getPartCols(), 
+        partName = Warehouse.makePartName(tbl.getPartCols(), 
             tp.getValues());
+        if (tp.getSd().getLocation() == null) {
+          // set default if location is not set
+          partPath = new Path(tbl.getDataLocation().toString(), partName);
+          tp.getSd().setLocation(partPath.toString());
+        } else {
+          partPath = new Path(tp.getSd().getLocation());
+        }
       } catch (MetaException e) {
         throw new HiveException("Invalid partition for table " + tbl.getName(),
             e);
       }
-      this.partPath = new Path(tp.getSd().getLocation());
     } else {
       // We are in the HACK territory. 
       // SemanticAnalyzer expects a single partition whose schema
       // is same as the table partition. 
-      this.partPath = table.getPath();
+      partPath = table.getPath();
     }
 
-    this.spec = new LinkedHashMap<String, String>(tbl.createSpec(tp));
-    this.partURI = partPath.toUri();
+    spec = tbl.createSpec(tp);
+    partURI = partPath.toUri();
   }
 
   public String getName() {
@@ -155,21 +164,21 @@
   }
 
   public Table getTable() {
-    return (this.table);
+    return table;
   }
 
   public Path [] getPath() {
     Path [] ret = new Path [1];
-    ret[0] = this.partPath;
+    ret[0] = partPath;
     return(ret);
   }
 
   public Path getPartitionPath() {
-    return this.partPath;
+    return partPath;
   }
 
   final public URI getDataLocation() {
-    return this.partURI;
+    return partURI;
   }
 
   /**
@@ -177,7 +186,7 @@
    * storing it as a property of the table as a short term measure.
    */
   public int getBucketCount() {
-    return this.table.getNumBuckets();
+    return table.getNumBuckets();
     /*
       TODO: Keeping this code around for later use when we will support
       sampling on tables which are not created with CLUSTERED INTO clause
@@ -202,7 +211,7 @@
   }
 
   public List<String> getBucketCols() {
-    return this.table.getBucketCols();
+    return table.getBucketCols();
   }
 
   /**
@@ -212,8 +221,8 @@
   @SuppressWarnings("nls")
   public Path getBucketPath(int bucketNum) {
     try {
-      FileSystem fs = FileSystem.get(this.table.getDataLocation(), Hive.get().getConf());
-      String pathPattern = this.partPath.toString();
+      FileSystem fs = FileSystem.get(table.getDataLocation(), Hive.get().getConf());
+      String pathPattern = partPath.toString();
       if (getBucketCount() > 0) {
         pathPattern = pathPattern + "/*";
       }
@@ -228,7 +237,6 @@
     catch (Exception e) {
       throw new RuntimeException("Cannot get bucket path for bucket " + bucketNum, e);
     }
-    // return new Path(this.partPath, String.format("part-%1$05d", bucketNum));
   }
 
   /**
@@ -257,13 +265,13 @@
     if(s == null) {
       return getPath();
     } else {
-      int bcount = this.getBucketCount();
+      int bcount = getBucketCount();
       if(bcount == 0) {
         return getPath();
       }
 
       Dimension d = s.getSampleDimension();
-      if(!d.getDimensionId().equals(this.table.getBucketingDimensionId())) {
+      if(!d.getDimensionId().equals(table.getBucketingDimensionId())) {
         // if the bucket dimension is not the same as the sampling dimension
         // we must scan all the data
         return getPath();
@@ -277,14 +285,14 @@
       } else if (bcount < scount) {
         if((scount/bcount)*bcount != scount) {
           throw new HiveException("Sample Count"+scount+" is not a multiple of bucket count
" +
-              bcount + " for table " + this.table.getName());
+              bcount + " for table " + table.getName());
         }
         // undersampling a bucket
         ret.add(getBucketPath((s.getSampleNum()-1)%bcount));
       } else if (bcount > scount) {
         if((bcount/scount)*scount != bcount) {
           throw new HiveException("Sample Count"+scount+" is not a divisor of bucket count
" +
-              bcount + " for table " + this.table.getName());
+              bcount + " for table " + table.getName());
         }
         // sampling multiple buckets
         for(int i=0; i<bcount/scount; i++) {
@@ -296,41 +304,9 @@
   }
 
   public LinkedHashMap<String, String> getSpec() {
-    return this.spec;
+    return spec;
   }
 
-  /**
-   * Replaces files in the partition with new data set specified by srcf. Works by moving
files
-   *
-   * @param srcf Files to be moved. Leaf Directories or Globbed File Paths
-   * @param tmpd Temporary directory
-   */
-  @SuppressWarnings("nls")
-  protected void replaceFiles(Path srcf, Path tmpd) throws HiveException {
-    FileSystem fs;
-    try {
-      fs = FileSystem.get(table.getDataLocation(), Hive.get().getConf());
-      Hive.get().replaceFiles(srcf, partPath, fs, tmpd);
-    } catch (IOException e) {
-      throw new HiveException("addFiles: filesystem error in check phase", e);
-    }
-  }
-
-  /**
-   * Inserts files specified into the partition. Works by moving files
-   *
-   * @param srcf Files to be moved. Leaf Directories or Globbed File Paths
-   */
-  @SuppressWarnings("nls")
-  protected void copyFiles(Path srcf) throws HiveException {
-    FileSystem fs;
-    try {
-      fs = FileSystem.get(table.getDataLocation(), Hive.get().getConf());
-      Hive.get().copyFiles(srcf, partPath, fs);
-    } catch (IOException e) {
-      throw new HiveException("addFiles: filesystem error in check phase", e);
-    }
-  }
 
   @SuppressWarnings("nls")
   @Override

Modified: hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java?rev=768451&r1=768450&r2=768451&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
(original)
+++ hadoop/hive/branches/branch-0.3/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
Sat Apr 25 02:53:54 2009
@@ -20,7 +20,6 @@
 
 import java.io.IOException;
 import java.net.URI;
-import java.util.AbstractMap;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -41,11 +40,10 @@
 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.serde.Constants;
-import org.apache.hadoop.hive.serde2.SerDeUtils;
 import org.apache.hadoop.hive.serde2.Deserializer;
 import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.io.Writable;
@@ -449,7 +447,7 @@
     FileSystem fs;
     try {
       fs = FileSystem.get(getDataLocation(), Hive.get().getConf());
-      Hive.get().replaceFiles(srcf, new Path(getDataLocation().getPath()), fs, tmpd);
+      Hive.replaceFiles(srcf, new Path(getDataLocation().getPath()), fs, tmpd);
     } catch (IOException e) {
       throw new HiveException("addFiles: filesystem error in check phase", e);
     }
@@ -463,7 +461,7 @@
     FileSystem fs;
     try {
       fs = FileSystem.get(getDataLocation(), Hive.get().getConf());
-      Hive.get().copyFiles(srcf, new Path(getDataLocation().getPath()), fs);
+      Hive.copyFiles(srcf, new Path(getDataLocation().getPath()), fs);
     } catch (IOException e) {
       throw new HiveException("addFiles: filesystem error in check phase", e);
     }
@@ -574,7 +572,7 @@
    * @param tp Use the information from this partition.
    * @return Partition name to value mapping.
    */
-  public Map<String, String> createSpec(
+  public LinkedHashMap<String, String> createSpec(
       org.apache.hadoop.hive.metastore.api.Partition tp) {
     
     List<FieldSchema> fsl = getPartCols();

Modified: hadoop/hive/branches/branch-0.3/ql/src/test/results/clientnegative/invalid_create_tbl1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.3/ql/src/test/results/clientnegative/invalid_create_tbl1.q.out?rev=768451&r1=768450&r2=768451&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.3/ql/src/test/results/clientnegative/invalid_create_tbl1.q.out
(original)
+++ hadoop/hive/branches/branch-0.3/ql/src/test/results/clientnegative/invalid_create_tbl1.q.out
Sat Apr 25 02:53:54 2009
@@ -1,2 +1,2 @@
-FAILED: Error in metadata: Partition column name aint conflicts with table columns.
+FAILED: Error in metadata: org.apache.hadoop.hive.ql.metadata.HiveException: Partition column
name aint conflicts with table columns.
 FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask



Mime
View raw message