hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aihu...@apache.org
Subject hive git commit: HIVE-15263: Detect the values for incorrect NULL values (Aihua Xu, reviewed by Yongzhi Chen)
Date Tue, 29 Nov 2016 14:29:40 GMT
Repository: hive
Updated Branches:
  refs/heads/master 77b26d5a2 -> fb3b81e3d


HIVE-15263: Detect the values for incorrect NULL values (Aihua Xu, reviewed by Yongzhi Chen)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fb3b81e3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fb3b81e3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fb3b81e3

Branch: refs/heads/master
Commit: fb3b81e3da8703dc3020cc3b455db65356ba30da
Parents: 77b26d5
Author: Aihua Xu <aihuaxu@apache.org>
Authored: Tue Nov 22 09:38:10 2016 -0500
Committer: Aihua Xu <aihuaxu@apache.org>
Committed: Tue Nov 29 09:28:59 2016 -0500

----------------------------------------------------------------------
 .../org/apache/hive/beeline/HiveSchemaTool.java | 79 +++++++++++++++-----
 .../org/apache/hive/beeline/TestSchemaTool.java | 43 ++++++++++-
 .../hadoop/hive/metastore/ObjectStore.java      |  4 +-
 3 files changed, 105 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/fb3b81e3/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
----------------------------------------------------------------------
diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
index a2ab3e0..23f3cf2 100644
--- a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
+++ b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.HiveMetaException;
 import org.apache.hadoop.hive.metastore.MetaStoreSchemaInfo;
+import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hive.beeline.HiveSchemaHelper.NestedScriptParser;
@@ -577,6 +578,7 @@ public class HiveSchemaTool {
     validateSequences();
     validateSchemaTables();
     validateLocations(null);
+    validateColumnNullValues();
     System.out.print("Done with metastore validation");
   }
 
@@ -677,6 +679,7 @@ public class HiveSchemaTool {
       if (hmsConn != null) {
         try {
           hmsConn.close();
+
         } catch (SQLException e) {
           throw new HiveMetaException("Failed to close metastore connection", e);
         }
@@ -746,6 +749,37 @@ public class HiveSchemaTool {
     return subs;
   }
 
+  boolean validateColumnNullValues() throws HiveMetaException {
+    System.out.println("Validating columns for incorrect NULL values");
+    Connection conn = getConnectionToMetastore(true);
+    boolean isValid = true;
+    try {
+      Statement stmt = conn.createStatement();
+      String tblQuery = getDbCommandParser(dbType).needsQuotedIdentifier() ?
+          ("select t.* from \"TBLS\" t WHERE t.\"SD_ID\" IS NULL and (t.\"TBL_TYPE\"='" +
TableType.EXTERNAL_TABLE + "' or t.\"TBL_TYPE\"='" + TableType.MANAGED_TABLE + "')")
+          : ("select t.* from TBLS t WHERE t.SD_ID IS NULL and (t.TBL_TYPE='" + TableType.EXTERNAL_TABLE
+ "' or t.TBL_TYPE='" + TableType.MANAGED_TABLE + "')");
+
+      ResultSet res = stmt.executeQuery(tblQuery);
+      while (res.next()) {
+         long tableId = res.getLong("TBL_ID");
+         String tableName = res.getString("TBL_NAME");
+         String tableType = res.getString("TBL_TYPE");
+         isValid = false;
+         System.err.println("Value of SD_ID in TBLS should not be NULL: hive table - " +
tableName + " tableId - " + tableId + " tableType - " + tableType);
+      }
+
+      return isValid;
+    } catch(SQLException e) {
+        throw new HiveMetaException("Failed to validate columns for incorrect NULL values",
e);
+    } finally {
+      try {
+        conn.close();
+      } catch (SQLException e) {
+        throw new HiveMetaException("Failed to close metastore connection", e);
+      }
+    }
+  }
+
   /**
    *  Run pre-upgrade scripts corresponding to a given upgrade script,
    *  if any exist. The errors from pre-upgrade are ignored.
@@ -818,29 +852,38 @@ public class HiveSchemaTool {
 
     if (LOG.isDebugEnabled()) {
       LOG.debug("Going to invoke file that contains:");
-      FileReader fr = new FileReader(sqlScriptFile);
-      BufferedReader reader = new BufferedReader(fr);
-      String line;
-      while ((line = reader.readLine()) != null) {
-        LOG.debug("script: " + line);
+      BufferedReader reader = new BufferedReader(new FileReader(sqlScriptFile));
+      try {
+        String line;
+        while ((line = reader.readLine()) != null) {
+          LOG.debug("script: " + line);
+        }
+      } finally {
+        if (reader != null) {
+          reader.close();
+        }
       }
     }
 
     // run the script using Beeline
     BeeLine beeLine = new BeeLine();
-    if (!verbose) {
-      beeLine.setOutputStream(new PrintStream(new NullOutputStream()));
-      beeLine.getOpts().setSilent(true);
-    }
-    beeLine.getOpts().setAllowMultiLineCommand(false);
-    beeLine.getOpts().setIsolation("TRANSACTION_READ_COMMITTED");
-    // We can be pretty sure that an entire line can be processed as a single command since
-    // we always add a line separator at the end while calling dbCommandParser.buildCommand.
-    beeLine.getOpts().setEntireLineAsCommand(true);
-    LOG.debug("Going to run command <" + StringUtils.join(argList, " ") + ">");
-    int status = beeLine.begin(argList.toArray(new String[0]), null);
-    if (status != 0) {
-      throw new IOException("Schema script failed, errorcode " + status);
+    try {
+      if (!verbose) {
+        beeLine.setOutputStream(new PrintStream(new NullOutputStream()));
+        beeLine.getOpts().setSilent(true);
+      }
+      beeLine.getOpts().setAllowMultiLineCommand(false);
+      beeLine.getOpts().setIsolation("TRANSACTION_READ_COMMITTED");
+      // We can be pretty sure that an entire line can be processed as a single command since
+      // we always add a line separator at the end while calling dbCommandParser.buildCommand.
+      beeLine.getOpts().setEntireLineAsCommand(true);
+      LOG.debug("Going to run command <" + StringUtils.join(argList, " ") + ">");
+      int status = beeLine.begin(argList.toArray(new String[0]), null);
+      if (status != 0) {
+        throw new IOException("Schema script failed, errorcode " + status);
+      }
+    } finally {
+      beeLine.close();
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/fb3b81e3/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
index 5dc17b9..3b5c6c0 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
@@ -77,7 +77,7 @@ public class TestSchemaTool extends TestCase {
     schemaTool.doInit();
 
     // Test empty database
-    boolean isValid = (boolean)schemaTool.validateSequences();
+    boolean isValid = schemaTool.validateSequences();
     assertTrue(isValid);
 
     // Test valid case
@@ -141,6 +141,31 @@ public class TestSchemaTool extends TestCase {
     assertTrue(isValid);
    }
 
+  /*
+   * Test the validation of incorrect NULL values in the tables
+   * @throws Exception
+   */
+  public void testValidateNullValues() throws Exception {
+    schemaTool.doInit();
+
+    // Test empty database
+    boolean isValid = schemaTool.validateColumnNullValues();
+    assertTrue(isValid);
+
+    // Test valid case
+    createTestHiveTableSchemas();
+    isValid = schemaTool.validateColumnNullValues();
+
+    // Test invalid case
+    String[] scripts = new String[] {
+        "update TBLS set SD_ID=null"
+    };
+    File scriptFile = generateTestScript(scripts);
+    schemaTool.runBeeLine(scriptFile.getPath());
+    isValid = schemaTool.validateColumnNullValues();
+    assertFalse(isValid);
+  }
+
   /**
    * Test dryrun of schema initialization
    * @throws Exception
@@ -610,4 +635,20 @@ public class TestSchemaTool extends TestCase {
     out.close();
     return preUpgradeScript;
   }
+
+  /**
+   * Insert the records in DB to simulate a hive table
+   * @throws IOException
+   */
+  private void createTestHiveTableSchemas() throws IOException {
+     String[] scripts = new String[] {
+          "insert into DBS values(2, 'my db', 'hdfs://myhost.com:8020/user/hive/warehouse/mydb',
'mydb', 'public', 'role')",
+          "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID)
values (1,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)",
+          "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID)
values (2,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/admin/2015_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)",
+          "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT)
values (2 ,1435255431,2,0 ,'hive',0,1,'mytal','MANAGED_TABLE',NULL,NULL)",
+          "insert into PARTITIONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID)
values(1, 1441402388,0, 'd1=1/d2=1',2,2)"
+        };
+     File scriptFile = generateTestScript(scripts);
+     schemaTool.runBeeLine(scriptFile.getPath());
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/fb3b81e3/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 90ea641..d4024d2 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -3615,7 +3615,7 @@ public class ObjectStore implements RawStore, Configurable {
       }
 
       MColumnDescriptor parentCD = retrieveCD ? nParentTable.mcd : parentTable.getSd().getCD();
-      List<MFieldSchema> parentCols = parentCD.getCols();
+      List<MFieldSchema> parentCols = parentCD == null ? null : parentCD.getCols();
       int parentIntegerIndex =
         getColumnIndexFromTableColumns(parentCols, fks.get(i).getPkcolumn_name());
       if (parentIntegerIndex == -1) {
@@ -3690,7 +3690,7 @@ public class ObjectStore implements RawStore, Configurable {
 
       MColumnDescriptor parentCD = retrieveCD ? nParentTable.mcd : parentTable.getSd().getCD();
       int parentIntegerIndex =
-        getColumnIndexFromTableColumns(parentCD.getCols(), pks.get(i).getColumn_name());
+        getColumnIndexFromTableColumns(parentCD == null ? null : parentCD.getCols(), pks.get(i).getColumn_name());
 
       if (parentIntegerIndex == -1) {
         throw new InvalidObjectException("Parent column not found: " + pks.get(i).getColumn_name());


Mime
View raw message