hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ct...@apache.org
Subject hive git commit: HIVE-15530: Optimize the column stats update logic in table alteration (Yibing Shi via Chaoyu Tang)
Date Wed, 11 Jan 2017 14:37:17 GMT
Repository: hive
Updated Branches:
  refs/heads/master 886978db5 -> 51769dca0


HIVE-15530: Optimize the column stats update logic in table alteration (Yibing Shi via Chaoyu
Tang)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/51769dca
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/51769dca
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/51769dca

Branch: refs/heads/master
Commit: 51769dca0392af35e0fcf57628876a071b92e746
Parents: 886978d
Author: Chaoyu Tang <ctang@cloudera.com>
Authored: Wed Jan 11 09:33:52 2017 -0500
Committer: Chaoyu Tang <ctang@cloudera.com>
Committed: Wed Jan 11 09:33:52 2017 -0500

----------------------------------------------------------------------
 .../hadoop/hive/metastore/HiveAlterHandler.java |   8 +-
 .../hadoop/hive/metastore/MetaStoreUtils.java   |  15 +++
 .../hadoop/hive/metastore/ObjectStore.java      |   1 -
 .../hive/metastore/TestHiveAlterHandler.java    | 108 +++++++++++++++++++
 .../hive/metastore/TestMetaStoreUtils.java      |  41 +++++++
 5 files changed, 169 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/51769dca/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
index 86565a4..64d9fc1 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hive.metastore;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;
 
 import org.apache.commons.lang.StringUtils;
@@ -777,7 +778,7 @@ public class HiveAlterHandler implements AlterHandler {
         }
         if (oldPartition.getSd() != null && newPart.getSd() != null) {
         List<FieldSchema> oldCols = oldPartition.getSd().getCols();
-          if (!MetaStoreUtils.areSameColumns(oldCols, newPart.getSd().getCols())) {
+          if (!MetaStoreUtils.columnsIncluded(oldCols, newPart.getSd().getCols())) {
             updatePartColumnStatsForAlterColumns(msdb, oldPartition, oldPartName, partVals,
oldCols, newPart);
           }
         }
@@ -790,7 +791,8 @@ public class HiveAlterHandler implements AlterHandler {
     }
   }
 
-  private void alterTableUpdateTableColumnStats(RawStore msdb,
+  @VisibleForTesting
+  void alterTableUpdateTableColumnStats(RawStore msdb,
       Table oldTable, Table newTable)
       throws MetaException, InvalidObjectException {
     String dbName = oldTable.getDbName().toLowerCase();
@@ -808,7 +810,7 @@ public class HiveAlterHandler implements AlterHandler {
       // Nothing to update if everything is the same
         if (newDbName.equals(dbName) &&
             newTableName.equals(tableName) &&
-            MetaStoreUtils.areSameColumns(oldCols, newCols)) {
+            MetaStoreUtils.columnsIncluded(oldCols, newCols)) {
           updateColumnStats = false;
         }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/51769dca/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index 05ee3ac..4aea152 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -655,6 +655,21 @@ public class MetaStoreUtils {
     return true;
   }
 
+  static boolean columnsIncluded(List<FieldSchema> oldCols, List<FieldSchema>
newCols) {
+    if (oldCols.size() > newCols.size()) {
+      return false;
+    }
+
+    Set<FieldSchema> newColsSet = new HashSet<FieldSchema>(newCols);
+    for (final FieldSchema oldCol : oldCols) {
+      if (!newColsSet.contains(oldCol)) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
   /**
    * @return true if oldType and newType are compatible.
    * Two types are compatible if we have internal functions to cast one to another.

http://git-wip-us.apache.org/repos/asf/hive/blob/51769dca/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index a308970..778615d 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -7218,7 +7218,6 @@ public class ObjectStore implements RawStore, Configurable {
     } finally {
       if (!committed) {
         rollbackTransaction();
-        return Lists.newArrayList();
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/51769dca/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveAlterHandler.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveAlterHandler.java
b/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveAlterHandler.java
new file mode 100644
index 0000000..03ea7fc
--- /dev/null
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveAlterHandler.java
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.metastore;
+
+import org.apache.hadoop.hive.metastore.api.*;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import java.util.Arrays;
+
+public class TestHiveAlterHandler {
+
+  @Test
+  public void testAlterTableAddColNotUpdateStats() throws MetaException, InvalidObjectException,
NoSuchObjectException {
+    FieldSchema col1 = new FieldSchema("col1", "string", "col1 comment");
+    FieldSchema col2 = new FieldSchema("col2", "string", "col2 comment");
+    FieldSchema col3 = new FieldSchema("col3", "string", "col3 comment");
+    FieldSchema col4 = new FieldSchema("col4", "string", "col4 comment");
+
+    StorageDescriptor oldSd = new StorageDescriptor();
+    oldSd.setCols(Arrays.asList(col1, col2, col3));
+    Table oldTable = new Table();
+    oldTable.setDbName("default");
+    oldTable.setTableName("test_table");
+    oldTable.setSd(oldSd);
+
+    StorageDescriptor newSd = new StorageDescriptor(oldSd);
+    newSd.setCols(Arrays.asList(col1, col2, col3, col4));
+    Table newTable = new Table(oldTable);
+    newTable.setSd(newSd);
+
+    RawStore msdb = Mockito.mock(RawStore.class);
+    Mockito.doThrow(new RuntimeException("shouldn't be called")).when(msdb).getTableColumnStatistics(
+        oldTable.getDbName(), oldTable.getTableName(), Arrays.asList("col1", "col2", "col3"));
+    HiveAlterHandler handler = new HiveAlterHandler();
+    handler.alterTableUpdateTableColumnStats(msdb, oldTable, newTable);
+  }
+
+  @Test
+  public void testAlterTableDelColUpdateStats() throws MetaException, InvalidObjectException,
NoSuchObjectException {
+    FieldSchema col1 = new FieldSchema("col1", "string", "col1 comment");
+    FieldSchema col2 = new FieldSchema("col2", "string", "col2 comment");
+    FieldSchema col3 = new FieldSchema("col3", "string", "col3 comment");
+    FieldSchema col4 = new FieldSchema("col4", "string", "col4 comment");
+
+    StorageDescriptor oldSd = new StorageDescriptor();
+    oldSd.setCols(Arrays.asList(col1, col2, col3, col4));
+    Table oldTable = new Table();
+    oldTable.setDbName("default");
+    oldTable.setTableName("test_table");
+    oldTable.setSd(oldSd);
+
+    StorageDescriptor newSd = new StorageDescriptor(oldSd);
+    newSd.setCols(Arrays.asList(col1, col2, col3));
+    Table newTable = new Table(oldTable);
+    newTable.setSd(newSd);
+
+    RawStore msdb = Mockito.mock(RawStore.class);
+    HiveAlterHandler handler = new HiveAlterHandler();
+    handler.alterTableUpdateTableColumnStats(msdb, oldTable, newTable);
+    Mockito.verify(msdb, Mockito.times(1)).getTableColumnStatistics(
+        oldTable.getDbName(), oldTable.getTableName(), Arrays.asList("col1", "col2", "col3",
"col4")
+    );
+  }
+
+  @Test
+  public void testAlterTableChangePosNotUpdateStats() throws MetaException, InvalidObjectException,
NoSuchObjectException {
+    FieldSchema col1 = new FieldSchema("col1", "string", "col1 comment");
+    FieldSchema col2 = new FieldSchema("col2", "string", "col2 comment");
+    FieldSchema col3 = new FieldSchema("col3", "string", "col3 comment");
+    FieldSchema col4 = new FieldSchema("col4", "string", "col4 comment");
+
+    StorageDescriptor oldSd = new StorageDescriptor();
+    oldSd.setCols(Arrays.asList(col1, col2, col3, col4));
+    Table oldTable = new Table();
+    oldTable.setDbName("default");
+    oldTable.setTableName("test_table");
+    oldTable.setSd(oldSd);
+
+    StorageDescriptor newSd = new StorageDescriptor(oldSd);
+    newSd.setCols(Arrays.asList(col1, col4, col2, col3));
+    Table newTable = new Table(oldTable);
+    newTable.setSd(newSd);
+
+    RawStore msdb = Mockito.mock(RawStore.class);
+    Mockito.doThrow(new RuntimeException("shouldn't be called")).when(msdb).getTableColumnStatistics(
+        oldTable.getDbName(), oldTable.getTableName(), Arrays.asList("col1", "col2", "col3",
"col4"));
+    HiveAlterHandler handler = new HiveAlterHandler();
+    handler.alterTableUpdateTableColumnStats(msdb, oldTable, newTable);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/51769dca/metastore/src/test/org/apache/hadoop/hive/metastore/TestMetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestMetaStoreUtils.java b/metastore/src/test/org/apache/hadoop/hive/metastore/TestMetaStoreUtils.java
new file mode 100644
index 0000000..21f9054
--- /dev/null
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestMetaStoreUtils.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.metastore;
+
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+
+public class TestMetaStoreUtils {
+
+  @Test
+  public void testColumnsIncluded() {
+    FieldSchema col1 = new FieldSchema("col1", "string", "col1 comment");
+    FieldSchema col2 = new FieldSchema("col2", "string", "col2 comment");
+    FieldSchema col3 = new FieldSchema("col3", "string", "col3 comment");
+    Assert.assertTrue(MetaStoreUtils.columnsIncluded(Arrays.asList(col1), Arrays.asList(col1)));
+    Assert.assertTrue(MetaStoreUtils.columnsIncluded(Arrays.asList(col1, col2), Arrays.asList(col1,
col2)));
+    Assert.assertTrue(MetaStoreUtils.columnsIncluded(Arrays.asList(col1, col2), Arrays.asList(col2,
col1)));
+    Assert.assertTrue(MetaStoreUtils.columnsIncluded(Arrays.asList(col1, col2), Arrays.asList(col1,
col2, col3)));
+    Assert.assertTrue(MetaStoreUtils.columnsIncluded(Arrays.asList(col1, col2), Arrays.asList(col3,
col2, col1)));
+    Assert.assertFalse(MetaStoreUtils.columnsIncluded(Arrays.asList(col1, col2), Arrays.asList(col1)));
+  }
+}


Mime
View raw message