hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pa...@apache.org
Subject svn commit: r1155573 - in /hive/trunk/metastore: scripts/upgrade/derby/ scripts/upgrade/mysql/ src/java/org/apache/hadoop/hive/metastore/ src/model/ src/model/org/apache/hadoop/hive/metastore/model/
Date Tue, 09 Aug 2011 22:04:22 GMT
Author: pauly
Date: Tue Aug  9 22:04:20 2011
New Revision: 1155573

URL: http://svn.apache.org/viewvc?rev=1155573&view=rev
Log:
HIVE-2246. Dedupe tables' column schemas from partitions in the metastore db (Sohan Jain via
pauly)

Added:
    hive/trunk/metastore/scripts/upgrade/derby/008-HIVE-2246.derby.sql
    hive/trunk/metastore/scripts/upgrade/derby/008-REVERT-HIVE-2246.derby.sql
    hive/trunk/metastore/scripts/upgrade/mysql/008-HIVE-2246.mysql.sql
    hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MColumnDescriptor.java
Modified:
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
    hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MStorageDescriptor.java
    hive/trunk/metastore/src/model/package.jdo

Added: hive/trunk/metastore/scripts/upgrade/derby/008-HIVE-2246.derby.sql
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/scripts/upgrade/derby/008-HIVE-2246.derby.sql?rev=1155573&view=auto
==============================================================================
--- hive/trunk/metastore/scripts/upgrade/derby/008-HIVE-2246.derby.sql (added)
+++ hive/trunk/metastore/scripts/upgrade/derby/008-HIVE-2246.derby.sql Tue Aug  9 22:04:20
2011
@@ -0,0 +1,93 @@
+/*
+ * Creates the following tables:
+ *  - CDS
+ *  - COLUMNS_V2
+ * The new columns table is called COLUMNS_V2
+ * because many columns are removed, and the schema is changed.
+ * It'd take too long to migrate and keep the same table.
+ */
+CREATE TABLE "CDS" (
+  "CD_ID" bigint NOT NULL,
+  PRIMARY KEY ("CD_ID")
+);
+
+CREATE TABLE "COLUMNS_V2" (
+  "CD_ID" bigint NOT NULL,
+  "COMMENT" varchar(4000),
+  "COLUMN_NAME" varchar(128) NOT NULL,
+  "TYPE_NAME" varchar(4000),
+  "INTEGER_IDX" INTEGER NOT NULL,
+  PRIMARY KEY ("CD_ID", "COLUMN_NAME")
+);
+
+ALTER TABLE "COLUMNS_V2" 
+  ADD CONSTRAINT "COLUMNS_V2_FK1"
+  FOREIGN KEY ("CD_ID") REFERENCES "CDS" ("CD_ID")
+  ON DELETE NO ACTION ON UPDATE NO ACTION
+;
+
+/* Alter the SDS table to:
+ *  - add the column CD_ID
+ *  - add a foreign key on CD_ID
+ *  - create an index on CD_ID
+ */ 
+ALTER TABLE SDS
+  ADD COLUMN "CD_ID" bigint
+;
+ALTER TABLE SDS
+  ADD CONSTRAINT "SDS_FK2"
+  FOREIGN KEY ("CD_ID") REFERENCES "CDS" ("CD_ID")
+;
+
+/*
+ * Migrate the TBLS table
+ * Add entries into CDS.
+ * Populate the CD_ID field in SDS for tables
+ * Add entires to COLUMNS_V2 based on this table's sd's columns
+ */ 
+
+/* In the migration, there is a 1:1 mapping between CD_ID and SD_ID
+ * for tables. For speed, just let CD_ID = SD_ID for tables 
+ */
+INSERT INTO CDS (CD_ID)
+SELECT t.SD_ID FROM TBLS t WHERE t.SD_ID IS NOT NULL ORDER BY t.SD_ID;
+
+UPDATE SDS
+  SET CD_ID = SD_ID
+WHERE SD_ID in 
+(SELECT t.SD_ID FROM TBLS t WHERE t.SD_ID IS NOT NULL ORDER BY t.SD_ID);
+
+INSERT INTO COLUMNS_V2
+  (CD_ID, COMMENT, COLUMN_NAME, TYPE_NAME, INTEGER_IDX)
+SELECT 
+  c.SD_ID, c.COMMENT, c.COLUMN_NAME, c.TYPE_NAME, c.INTEGER_IDX
+FROM
+  COLUMNS c
+JOIN
+  TBLS t
+ON
+  t.SD_ID = c.SD_ID
+;
+
+/*
+ * Migrate the partitions.
+ * Update the partitions' SDS to use the parent tables' CD_ID  BEGIN
+ * Derby does not allow joins in update statements, 
+ * so we have to make a temporary tableh
+ */
+DECLARE GLOBAL TEMPORARY TABLE "TMP_TBL" (
+  "SD_ID" bigint not null,
+  "CD_ID" bigint not null
+) ON COMMIT PRESERVE ROWS NOT LOGGED;
+
+INSERT INTO "SESSION"."TMP_TBL" SELECT
+  p.SD_ID, sds.CD_ID
+  FROM PARTITIONS p
+  JOIN TBLS t ON t.TBL_ID = p.TBL_ID
+  JOIN SDS sds on t.SD_ID = sds.SD_ID
+  WHERE p.SD_ID IS NOT NULL;
+
+UPDATE SDS sd
+  SET sd.CD_ID = 
+    (SELECT tt.CD_ID FROM SESSION.TMP_TBL tt WHERE tt.SD_ID = sd.SD_ID)
+  WHERE sd.SD_ID IN (SELECT SD_ID FROM SESSION.TMP_TBL);

Added: hive/trunk/metastore/scripts/upgrade/derby/008-REVERT-HIVE-2246.derby.sql
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/scripts/upgrade/derby/008-REVERT-HIVE-2246.derby.sql?rev=1155573&view=auto
==============================================================================
--- hive/trunk/metastore/scripts/upgrade/derby/008-REVERT-HIVE-2246.derby.sql (added)
+++ hive/trunk/metastore/scripts/upgrade/derby/008-REVERT-HIVE-2246.derby.sql Tue Aug  9 22:04:20
2011
@@ -0,0 +1,13 @@
+/*
+ * Remove the CD_ID column from SDS
+ * Delete the CDS table
+ * Delete the COLUMNS_V2 table
+ */
+
+ALTER TABLE SDS DROP CONSTRAINT SDS_FK2;
+
+ALTER TABLE SDS DROP COLUMN CD_ID;
+
+DROP TABLE COLUMNS_V2;
+
+DROP TABLE CDS;

Added: hive/trunk/metastore/scripts/upgrade/mysql/008-HIVE-2246.mysql.sql
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/scripts/upgrade/mysql/008-HIVE-2246.mysql.sql?rev=1155573&view=auto
==============================================================================
--- hive/trunk/metastore/scripts/upgrade/mysql/008-HIVE-2246.mysql.sql (added)
+++ hive/trunk/metastore/scripts/upgrade/mysql/008-HIVE-2246.mysql.sql Tue Aug  9 22:04:20
2011
@@ -0,0 +1,206 @@
+DELIMITER $$
+DROP PROCEDURE IF EXISTS REVERT $$
+DROP PROCEDURE IF EXISTS ALTER_SDS $$
+DROP PROCEDURE IF EXISTS CREATE_SDS $$
+DROP PROCEDURE IF EXISTS CREATE_TABLES $$
+DROP PROCEDURE IF EXISTS MIGRATE_TABLES $$
+DROP PROCEDURE IF EXISTS MIGRATE_PARTITIONS $$
+DROP PROCEDURE IF EXISTS MIGRATE $$
+DROP PROCEDURE IF EXISTS PRE_MIGRATE $$
+DROP PROCEDURE IF EXISTS RENAME_TMP_COLUMNS $$
+DROP PROCEDURE IF EXISTS CREATE_TABLE_SDS $$
+
+/* Call this procedure to revert all changes by this script */
+CREATE PROCEDURE REVERT()
+  BEGIN
+    ALTER TABLE SDS
+      DROP FOREIGN KEY `SDS_FK2`
+    ;
+    ALTER TABLE SDS
+      DROP COLUMN CD_ID
+    ; 
+    DROP TABLE IF EXISTS COLUMNS_V2;
+    DROP TABLE IF EXISTS TABLE_SDS;
+    DROP TABLE IF EXISTS CDS;
+
+  END $$
+
+/* Alter the SDS table to:
+ *  - add the column CD_ID
+ *  - add a foreign key on CD_ID
+ *  - create an index on CD_ID
+ */ 
+CREATE PROCEDURE ALTER_SDS()
+  BEGIN
+    ALTER TABLE SDS
+      ADD COLUMN CD_ID bigint(20) NULL
+      AFTER SD_ID
+    ;
+    SELECT 'Added the column CD_ID to SD_ID';
+    ALTER TABLE SDS
+      ADD CONSTRAINT `SDS_FK2`
+      FOREIGN KEY (`CD_ID`) REFERENCES `CDS` (`CD_ID`)
+    ;
+    SELECT 'Created a FK Constraint on CD_ID in SDS';
+    CREATE INDEX `SDS_N50` ON SDS 
+      (CD_ID)
+    ;
+    SELECT 'Added an index on CD_ID in SDS';
+  END $$
+
+/*
+ * Creates the following tables:
+ *  - CDS
+ *  - COLUMNS_V2
+ * The new columns table is called COLUMNS_V2
+ * because many columns are removed, and the schema is changed.
+ * It'd take too long to migrate and keep the same table.
+ */
+CREATE PROCEDURE CREATE_TABLES()
+  BEGIN
+    CREATE TABLE IF NOT EXISTS `CDS` (
+      `CD_ID` bigint(20) NOT NULL,
+      PRIMARY KEY (`CD_ID`)
+    ) ENGINE=InnoDB DEFAULT CHARSET=latin1
+    ;
+
+    CREATE TABLE IF NOT EXISTS `COLUMNS_V2` (
+      `CD_ID` bigint(20) NOT NULL,
+      `COMMENT` varchar(256) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
+      `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
+      `TYPE_NAME` varchar(4000) DEFAULT NULL,
+      `INTEGER_IDX` int(11) NOT NULL,
+      PRIMARY KEY (`CD_ID`,`COLUMN_NAME`),
+      KEY `COLUMNS_V2_N49` (`CD_ID`),
+      CONSTRAINT `COLUMNS_V2_FK1` FOREIGN KEY (`CD_ID`) REFERENCES `CDS` (`CD_ID`)
+    ) ENGINE=InnoDB DEFAULT CHARSET=latin1
+    ;
+  END $$
+
+/*
+ * Procedures called before migration happens
+ */ 
+CREATE PROCEDURE PRE_MIGRATE()
+  BEGIN
+    call CREATE_TABLES();
+    SELECT 'Created tables';
+    call CREATE_TABLE_SDS();
+    SELECT 'Created the temp table TABLE_SDS';
+    call ALTER_SDS();
+    SELECT 'Altered the SDS table';
+  END $$
+
+/*
+ * Migrate the TBLS table
+ * Add entries into CDS.
+ * Populate the CD_ID field in SDS for tables
+ * Add entires to COLUMNS_V2 based on this table's sd's columns
+ */ 
+CREATE PROCEDURE MIGRATE_TABLES()
+  BEGIN
+    /* In the migration, there is a 1:1 mapping between CD_ID and SD_ID
+     * for tables. For speed, just let CD_ID = SD_ID for tables 
+     */
+    INSERT INTO CDS (CD_ID)
+    SELECT SD_ID FROM TABLE_SDS;
+    SELECT 'Inserted into CDS';
+    
+    UPDATE SDS
+      SET CD_ID = SD_ID
+    WHERE SD_ID in 
+    (select SD_ID from TABLE_SDS);
+    SELECT 'Updated CD_ID in SDS';
+
+    INSERT INTO COLUMNS_V2
+      (CD_ID, COMMENT, COLUMN_NAME, TYPE_NAME, INTEGER_IDX)
+    SELECT 
+      c.SD_ID, c.COMMENT, c.COLUMN_NAME, c.TYPE_NAME, c.INTEGER_IDX
+    FROM
+      COLUMNS c
+    JOIN
+      TBLS t
+    ON
+      t.SD_ID = c.SD_ID
+    ;
+    SELECT 'Inserted table columns into COLUMNS_V2';
+  END $$
+
+/*
+ * Migrate the partitions.
+ * Update the partition's SDS to use the parent table's CD_ID
+ */
+CREATE PROCEDURE MIGRATE_PARTITIONS()
+  BEGIN
+    UPDATE SDS sd
+    JOIN PARTITIONS p on p.SD_ID = sd.SD_ID
+    JOIN TBLS t on t.TBL_ID = p.TBL_ID
+    SET sd.CD_ID = t.SD_ID
+    where p.SD_ID is not null
+    ;
+    SELECT 'Updated CD_IDs in SDS for partitions'
+      
+  END $$
+
+/*
+ * Create a temp table that holds the SDS of tables
+ */
+CREATE PROCEDURE CREATE_TABLE_SDS()
+  BEGIN
+    CREATE TEMPORARY TABLE `TABLE_SDS` (
+      `SD_ID` bigint(20) NOT NULL,
+      PRIMARY KEY (`SD_ID`)
+    ) ENGINE=InnoDB DEFAULT CHARSET=latin1
+    ;
+    INSERT INTO TABLE_SDS 
+      (SD_ID)
+    SELECT
+      t.SD_ID
+    FROM
+      TBLS t
+    WHERE
+      t.SD_ID IS NOT NULL
+    ORDER BY 
+      t.SD_ID
+    ;
+ END $$
+
+/*
+ * A currently unused function to igrate the COLUMNS_V2 table
+ * to have the name COLUMNS
+ */
+CREATE PROCEDURE RENAME_TMP_COLUMNS()
+  BEGIN
+    /*DROP TABLE `COLUMNS`;*/
+    RENAME TABLE `COLUMNS_V2` TO `COLUMNS`;
+    SELECT 'Renamed COLUMNS_V2 to COLUMNS';
+    ALTER TABLE `COLUMNS`
+      DROP FOREIGN KEY `COLUMNS_V2_FK1`;
+    SELECT 'Dropped FK on Columns';
+    DROP INDEX `COLUMNS_V2_N49` ON COLUMNS;
+    SELECT 'Dropped Index on Columns';
+    CREATE INDEX `COLUMNS_N49` ON COLUMNS
+      (CD_ID)
+    ;
+    SELECT 'Added index on Columns';
+    ALTER TABLE COLUMNS_
+    ADD CONSTRAINT `COLUMNS_FK1`
+      FOREIGN KEY (`CD_ID`) REFERENCES `CDS` (`CD_ID`)
+    ;
+    SELECT 'Added FK on Columns';
+  END $$
+
+/*
+ * Main call for migration
+ */
+CREATE PROCEDURE MIGRATE()
+  BEGIN
+    call PRE_MIGRATE();
+    SELECT 'Completed pre migration';
+    call MIGRATE_TABLES();
+    SELECT 'Completed migrating tables';
+    call MIGRATE_PARTITIONS();
+    SELECT 'Completed migrating partitions';
+    /* Migrate indexes? */
+  END $$
+
+DELIMITER ;

Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1155573&r1=1155572&r2=1155573&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Tue
Aug  9 22:04:20 2011
@@ -3465,7 +3465,6 @@ public class HiveMetaStore extends Thrif
    */
   public static void main(String[] args) throws Throwable {
     HiveMetastoreCli cli = new HiveMetastoreCli();
-
     cli.parse(args);
 
     // NOTE: It is critical to do this prior to initializing log4j, otherwise

Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1155573&r1=1155572&r2=1155573&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java Tue Aug
 9 22:04:20 2011
@@ -79,6 +79,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.UnknownDBException;
 import org.apache.hadoop.hive.metastore.api.UnknownPartitionException;
 import org.apache.hadoop.hive.metastore.api.UnknownTableException;
+import org.apache.hadoop.hive.metastore.model.MColumnDescriptor;
 import org.apache.hadoop.hive.metastore.model.MDBPrivilege;
 import org.apache.hadoop.hive.metastore.model.MDatabase;
 import org.apache.hadoop.hive.metastore.model.MFieldSchema;
@@ -701,7 +702,17 @@ public class ObjectStore implements RawS
         if (partColGrants != null && partColGrants.size() > 0) {
           pm.deletePersistentAll(partColGrants);
         }
-        pm.deletePersistentAll(listMPartitions(dbName, tableName, -1));
+
+        // call dropPartition on each of the table's partitions to follow the
+        // procedure for cleanly dropping partitions.
+        List<MPartition> partsToDelete = listMPartitions(dbName, tableName, -1);
+        if (partsToDelete != null) {
+          for (MPartition mpart : partsToDelete) {
+            dropPartitionCommon(mpart);
+          }
+        }
+
+        preDropStorageDescriptor(tbl.getSd());
         // then remove the table
         pm.deletePersistentAll(tbl);
       }
@@ -885,6 +896,7 @@ public class ObjectStore implements RawS
       }
     }
 
+    // A new table is always created with a new column descriptor
     return new MTable(tbl.getTableName().toLowerCase(), mdb,
         convertToMStorageDescriptor(tbl.getSd()), tbl.getOwner(), tbl
             .getCreateTime(), tbl.getLastAccessTime(), tbl.getRetention(),
@@ -955,6 +967,18 @@ public class ObjectStore implements RawS
         .getParameters());
   }
 
+  /**
+   * Given a list of model field schemas, create a new model column descriptor.
+   * @param cols the columns the column descriptor contains
+   * @return a new column descriptor db-backed object
+   */
+  private MColumnDescriptor createNewMColumnDescriptor(List<MFieldSchema> cols) {
+    if (cols == null) {
+      return null;
+    }
+    return new MColumnDescriptor(cols);
+  }
+
   // MSD and SD should be same objects. Not sure how to make then same right now
   // MSerdeInfo *& SerdeInfo should be same as well
   private StorageDescriptor convertToStorageDescriptor(MStorageDescriptor msd,
@@ -963,7 +987,8 @@ public class ObjectStore implements RawS
     if (msd == null) {
       return null;
     }
-    return new StorageDescriptor(noFS ? null: convertToFieldSchemas(msd.getCols()),
+    List<MFieldSchema> mFieldSchemas = msd.getCD() == null ? null : msd.getCD().getCols();
+    return new StorageDescriptor(noFS ? null: convertToFieldSchemas(mFieldSchemas),
         msd.getLocation(), msd.getInputFormat(), msd.getOutputFormat(), msd
         .isCompressed(), msd.getNumBuckets(), converToSerDeInfo(msd
         .getSerDeInfo()), msd.getBucketCols(), convertToOrders(msd
@@ -975,12 +1000,37 @@ public class ObjectStore implements RawS
     return convertToStorageDescriptor(msd, false);
   }
 
+  /**
+   * Converts a storage descriptor to a db-backed storage descriptor.  Creates a
+   *   new db-backed column descriptor object for this SD.
+   * @param sd the storage descriptor to wrap in a db-backed object
+   * @return the storage descriptor db-backed object
+   * @throws MetaException
+   */
   private MStorageDescriptor convertToMStorageDescriptor(StorageDescriptor sd)
       throws MetaException {
     if (sd == null) {
       return null;
     }
-    return new MStorageDescriptor(convertToMFieldSchemas(sd.getCols()), sd
+    MColumnDescriptor mcd = createNewMColumnDescriptor(convertToMFieldSchemas(sd.getCols()));
+    return convertToMStorageDescriptor(sd, mcd);
+  }
+
+  /**
+   * Converts a storage descriptor to a db-backed storage descriptor.  It points the
+   * storage descriptor's column descriptor to the one passed as an argument,
+   * so it does not create a new mcolumn descriptor object.
+   * @param sd the storage descriptor to wrap in a db-backed object
+   * @param mcd the db-backed column descriptor
+   * @return the db-backed storage descriptor object
+   * @throws MetaException
+   */
+  private MStorageDescriptor convertToMStorageDescriptor(StorageDescriptor sd,
+      MColumnDescriptor mcd) throws MetaException {
+    if (sd == null) {
+      return null;
+    }
+    return new MStorageDescriptor(mcd, sd
         .getLocation(), sd.getInputFormat(), sd.getOutputFormat(), sd
         .isCompressed(), sd.getNumBuckets(), converToMSerDeInfo(sd
         .getSerdeInfo()), sd.getBucketCols(),
@@ -1002,7 +1052,7 @@ public class ObjectStore implements RawS
             part.getDbName(), part.getTableName());
       }
       openTransaction();
-      MPartition mpart = convertToMPart(part);
+      MPartition mpart = convertToMPart(part, true);
       pm.makePersistent(mpart);
 
       int now = (int)(System.currentTimeMillis()/1000);
@@ -1086,7 +1136,18 @@ public class ObjectStore implements RawS
     return mpart;
   }
 
-  private MPartition convertToMPart(Partition part)
+  /**
+   * Convert a Partition object into an MPartition, which is an object backed by the db
+   * If the Partition's set of columns is the same as the parent table's AND useTableCD
+   * is true, then this partition's storage descriptor's column descriptor will point
+   * to the same one as the table's storage descriptor.
+   * @param part the partition to convert
+   * @param useTableCD whether to try to use the parent table's column descriptor.
+   * @return the model partition object
+   * @throws InvalidObjectException
+   * @throws MetaException
+   */
+  private MPartition convertToMPart(Partition part, boolean useTableCD)
       throws InvalidObjectException, MetaException {
     if (part == null) {
       return null;
@@ -1096,10 +1157,26 @@ public class ObjectStore implements RawS
       throw new InvalidObjectException(
           "Partition doesn't have a valid table or database name");
     }
+
+    // If this partition's set of columns is the same as the parent table's,
+    // use the parent table's, so we do not create a duplicate column descriptor,
+    // thereby saving space
+    MStorageDescriptor msd;
+    if (useTableCD &&
+        mt.getSd() != null && mt.getSd().getCD() != null &&
+        mt.getSd().getCD().getCols() != null &&
+        part.getSd() != null &&
+        convertToFieldSchemas(mt.getSd().getCD().getCols()).
+        equals(part.getSd().getCols())) {
+      msd = convertToMStorageDescriptor(part.getSd(), mt.getSd().getCD());
+    } else {
+      msd = convertToMStorageDescriptor(part.getSd());
+    }
+
     return new MPartition(Warehouse.makePartName(convertToFieldSchemas(mt
         .getPartitionKeys()), part.getValues()), mt, part.getValues(), part
         .getCreateTime(), part.getLastAccessTime(),
-        convertToMStorageDescriptor(part.getSd()), part.getParameters());
+        msd, part.getParameters());
   }
 
   private Partition convertToPart(MPartition mpart) throws MetaException {
@@ -1122,33 +1199,58 @@ public class ObjectStore implements RawS
         mpart.getParameters());
   }
 
+  @Override
   public boolean dropPartition(String dbName, String tableName,
       List<String> part_vals) throws MetaException {
     boolean success = false;
     try {
       openTransaction();
       MPartition part = getMPartition(dbName, tableName, part_vals);
+      dropPartitionCommon(part);
+      success = commitTransaction();
+    } finally {
+      if (!success) {
+        rollbackTransaction();
+      }
+    }
+    return success;
+  }
+
+  /**
+   * Drop an MPartition and cascade deletes (e.g., delete partition privilege grants,
+   *   drop the storage descriptor cleanly, etc.)
+   * @param part - the MPartition to drop
+   * @return whether the transaction committed successfully
+   */
+  private boolean dropPartitionCommon(MPartition part) {
+    boolean success = false;
+    try {
+      openTransaction();
       if (part != null) {
         List<MFieldSchema> schemas = part.getTable().getPartitionKeys();
         List<String> colNames = new ArrayList<String>();
         for (MFieldSchema col: schemas) {
           colNames.add(col.getName());
         }
-        String partName = FileUtils.makePartName(colNames, part_vals);
+        String partName = FileUtils.makePartName(colNames, part.getValues());
 
         List<MPartitionPrivilege> partGrants = listPartitionGrants(
-            dbName, tableName, partName);
+            part.getTable().getDatabase().getName(),
+            part.getTable().getTableName(),
+            partName);
 
         if (partGrants != null && partGrants.size() > 0) {
           pm.deletePersistentAll(partGrants);
         }
 
         List<MPartitionColumnPrivilege> partColumnGrants = listPartitionAllColumnGrants(
-            dbName, tableName, partName);
+            part.getTable().getDatabase().getName(),
+            part.getTable().getTableName(),
+            partName);
         if (partColumnGrants != null && partColumnGrants.size() > 0) {
           pm.deletePersistentAll(partColumnGrants);
         }
-
+        preDropStorageDescriptor(part.getSd());
         pm.deletePersistent(part);
       }
       success = commitTransaction();
@@ -1740,7 +1842,9 @@ public class ObjectStore implements RawS
       oldt.setTableName(newt.getTableName().toLowerCase());
       oldt.setParameters(newt.getParameters());
       oldt.setOwner(newt.getOwner());
-      oldt.setSd(newt.getSd());
+      // Fully copy over the contents of the new SD into the old SD,
+      // so we don't create an extra SD in the metastore db that has no references.
+      fullCopyMSD(newt.getSd(), oldt.getSd());
       oldt.setDatabase(newt.getDatabase());
       oldt.setRetention(newt.getRetention());
       oldt.setPartitionKeys(newt.getPartitionKeys());
@@ -1796,7 +1900,7 @@ public class ObjectStore implements RawS
       name = name.toLowerCase();
       dbname = dbname.toLowerCase();
       MPartition oldp = getMPartition(dbname, name, newPart.getValues());
-      MPartition newp = convertToMPart(newPart);
+      MPartition newp = convertToMPart(newPart, false);
       if (oldp == null || newp == null) {
         throw new InvalidObjectException("partition does not exist.");
       }
@@ -1821,7 +1925,25 @@ public class ObjectStore implements RawS
 
   private void copyMSD(MStorageDescriptor newSd, MStorageDescriptor oldSd) {
     oldSd.setLocation(newSd.getLocation());
-    oldSd.setCols(newSd.getCols());
+    MColumnDescriptor oldCD = oldSd.getCD();
+    // If the columns of the old column descriptor != the columns of the new one,
+    // then change the old storage descriptor's column descriptor.
+    // Convert the MFieldSchema's to their thrift object counterparts, because we maintain
+    // datastore identity (i.e., identity of the model objects are managed by JDO,
+    // not the application).
+    if (!(oldSd != null && oldSd.getCD() != null &&
+         oldSd.getCD().getCols() != null &&
+         newSd != null && newSd.getCD() != null &&
+         newSd.getCD().getCols() != null &&
+         convertToFieldSchemas(newSd.getCD().getCols()).
+         equals(convertToFieldSchemas(oldSd.getCD().getCols()))
+       )) {
+        oldSd.setCD(newSd.getCD());
+    }
+
+    //If oldCd does not have any more references, then we should delete it
+    // from the backend db
+    removeUnusedColumnDescriptor(oldCD);
     oldSd.setBucketCols(newSd.getBucketCols());
     oldSd.setCompressed(newSd.isCompressed());
     oldSd.setInputFormat(newSd.getInputFormat());
@@ -1833,6 +1955,92 @@ public class ObjectStore implements RawS
     oldSd.getSerDeInfo().setParameters(newSd.getSerDeInfo().getParameters());
   }
 
+  /**
+   * copy over all fields from newSd to oldSd
+   * @param newSd the new storage descriptor
+   * @param oldSd the old descriptor that gets copied over
+   */
+  private void fullCopyMSD(MStorageDescriptor newSd, MStorageDescriptor oldSd) {
+    copyMSD(newSd, oldSd);
+    oldSd.setSortCols(newSd.getSortCols());
+    oldSd.setParameters(newSd.getParameters());
+  }
+
+  /**
+   * Checks if a column descriptor has any remaining references by storage descriptors
+   * in the db.  If it does not, then delete the CD.  If it does, then do nothing.
+   * @param oldCD the column descriptor to delete if it is no longer referenced anywhere
+   */
+  private void removeUnusedColumnDescriptor(MColumnDescriptor oldCD) {
+    if (oldCD == null) {
+      return;
+    }
+
+    boolean success = false;
+    try {
+      openTransaction();
+      LOG.debug("execute removeUnusedColumnDescriptor");
+      List<MStorageDescriptor> referencedSDs = listStorageDescriptorsWithCD(oldCD);
+      //if no other SD references this CD, we can throw it out.
+      if (referencedSDs != null && referencedSDs.isEmpty()) {
+        pm.retrieve(oldCD);
+        pm.deletePersistent(oldCD);
+      }
+      success = commitTransaction();
+      LOG.debug("successfully deleted a CD in removeUnusedColumnDescriptor");
+    } finally {
+      if (!success) {
+        rollbackTransaction();
+      }
+    }
+  }
+
+  /**
+   * Called right before an action that would drop a storage descriptor.
+   * This function makes the SD's reference to a CD null, and then deletes the CD
+   * if it no longer is referenced in the table.
+   * @param msd the storage descriptor to drop
+   */
+  private void preDropStorageDescriptor(MStorageDescriptor msd) {
+    if (msd == null || msd.getCD() == null) {
+      return;
+    }
+
+    MColumnDescriptor mcd = msd.getCD();
+    // Because there is a 1-N relationship between CDs and SDs,
+    // we must set the SD's CD to null first before dropping the storage descriptor
+    // to satisfy foriegn key constraints.
+    msd.setCD(null);
+    removeUnusedColumnDescriptor(mcd);
+  }
+
+  /**
+   * Get a list of storage descriptors that reference a particular Column Descriptor
+   * @param oldCD the column descriptor to get storage descriptors for
+   * @return a list of storage descriptors
+   */
+  private List<MStorageDescriptor> listStorageDescriptorsWithCD(MColumnDescriptor oldCD)
{
+    boolean success = false;
+    List<MStorageDescriptor> sds = null;
+    try {
+      openTransaction();
+      LOG.debug("Executing listStorageDescriptorsWithCD");
+      Query query = pm.newQuery(MStorageDescriptor.class,
+          "this.cd == inCD");
+      query.declareParameters("MColumnDescriptor inCD");
+      sds = (List<MStorageDescriptor>) query.execute(oldCD);
+      LOG.debug("Done executing query for listStorageDescriptorsWithCD");
+      pm.retrieveAll(sds);
+      success = commitTransaction();
+      LOG.debug("Done retrieving all objects for listStorageDescriptorsWithCD");
+    } finally {
+      if (!success) {
+        rollbackTransaction();
+      }
+    }
+    return sds;
+  }
+
   @Override
   public boolean addIndex(Index index) throws InvalidObjectException,
       MetaException {

Added: hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MColumnDescriptor.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MColumnDescriptor.java?rev=1155573&view=auto
==============================================================================
--- hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MColumnDescriptor.java
(added)
+++ hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MColumnDescriptor.java
Tue Aug  9 22:04:20 2011
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *
+ */
+package org.apache.hadoop.hive.metastore.model;
+
+import java.util.List;
+
+/**
+ *
+ * MColumnDescriptor.
+ * A wrapper around a list of columns.
+ */
+public class MColumnDescriptor {
+  private List<MFieldSchema> cols;
+
+  public MColumnDescriptor() {}
+
+  /**
+   *
+   * @param cols
+   */
+  public MColumnDescriptor(List<MFieldSchema> cols) {
+    this.cols = cols;
+  }
+
+  public List<MFieldSchema> getCols() {
+    return cols;
+  }
+
+  public void setCols(List<MFieldSchema> cols) {
+    this.cols = cols;
+  }
+}

Modified: hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MStorageDescriptor.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MStorageDescriptor.java?rev=1155573&r1=1155572&r2=1155573&view=diff
==============================================================================
--- hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MStorageDescriptor.java
(original)
+++ hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MStorageDescriptor.java
Tue Aug  9 22:04:20 2011
@@ -22,7 +22,7 @@ import java.util.List;
 import java.util.Map;
 
 public class MStorageDescriptor {
-  private List<MFieldSchema> cols;
+  private MColumnDescriptor cd;
   private String location;
   private String inputFormat;
   private String outputFormat;
@@ -32,12 +32,12 @@ public class MStorageDescriptor {
   private List<String> bucketCols;
   private List<MOrder> sortCols;
   private Map<String, String> parameters;
-  
+
   public MStorageDescriptor() {}
 
-  
+
   /**
-   * @param cols
+   * @param cd
    * @param location
    * @param inputFormat
    * @param outputFormat
@@ -48,10 +48,10 @@ public class MStorageDescriptor {
    * @param sortOrder
    * @param parameters
    */
-  public MStorageDescriptor(List<MFieldSchema> cols, String location, String inputFormat,
+  public MStorageDescriptor(MColumnDescriptor cd, String location, String inputFormat,
       String outputFormat, boolean isCompressed, int numBuckets, MSerDeInfo serDeInfo,
       List<String> bucketCols, List<MOrder> sortOrder, Map<String, String>
parameters) {
-    this.cols = cols;
+    this.cd = cd;
     this.location = location;
     this.inputFormat = inputFormat;
     this.outputFormat = outputFormat;
@@ -163,17 +163,17 @@ public class MStorageDescriptor {
   }
 
   /**
-   * @return the cols
+   * @return the column descriptor
    */
-  public List<MFieldSchema> getCols() {
-    return cols;
+  public MColumnDescriptor getCD() {
+    return cd;
   }
 
   /**
-   * @param cols the cols to set
+   * @param cd the Column Descriptor to set
    */
-  public void setCols(List<MFieldSchema> cols) {
-    this.cols = cols;
+  public void setCD(MColumnDescriptor cd) {
+    this.cd = cd;
   }
 
   /**

Modified: hive/trunk/metastore/src/model/package.jdo
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/model/package.jdo?rev=1155573&r1=1155572&r2=1155573&view=diff
==============================================================================
--- hive/trunk/metastore/src/model/package.jdo (original)
+++ hive/trunk/metastore/src/model/package.jdo Tue Aug  9 22:04:20 2011
@@ -191,17 +191,17 @@
       </field>
     </class>
 
-    <class name="MStorageDescriptor" identity-type="datastore" table="SDS" detachable="true">
+    <class name="MColumnDescriptor" identity-type="datastore" table="CDS" detachable="true">
       <datastore-identity>
-        <column name="SD_ID"/>
+        <column name="CD_ID"/>
       </datastore-identity>
-      <field name="cols" table="COLUMNS" >
+      <field name="cols" table="COLUMNS_V2" >
         <collection element-type="MFieldSchema"/>
         <join>
           <primary-key name="COLUMNS_PK">
             <column name="COLUMN_NAME"/>
           </primary-key>
-          <column name="SD_ID"/>
+          <column name="CD_ID"/>
         </join>
         <element>
           <embedded>
@@ -214,6 +214,15 @@
           </embedded>
         </element>
       </field>
+	</class>
+
+    <class name="MStorageDescriptor" identity-type="datastore" table="SDS" detachable="true">
+      <datastore-identity>
+        <column name="SD_ID"/>
+      </datastore-identity>
+      <field name="cd">
+      	<column name="CD_ID"/>
+      </field>
       <field name="location">
         <column name="LOCATION" length="4000" jdbc-type="VARCHAR"/>
       </field>



Mime
View raw message