hudi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [incubator-hudi] umehrot2 commented on a change in pull request #1416: [HUDI-717] Fixed usage of HiveDriver for DDL statements for Hive 2.x
Date Tue, 24 Mar 2020 09:26:26 GMT
umehrot2 commented on a change in pull request #1416: [HUDI-717] Fixed usage of HiveDriver
for DDL statements for Hive 2.x
URL: https://github.com/apache/incubator-hudi/pull/1416#discussion_r397009308
 
 

 ##########
 File path: hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java
 ##########
 @@ -363,4 +404,51 @@ public void testMultiPartitionKeySync() throws Exception {
     assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES",
commitTime,
         hiveClient.getLastCommitTimeSynced(hiveSyncConfig.tableName).get());
   }
+
+  @Test
+  public void testSchemeFromMOR() throws Exception {
+    TestUtil.hiveSyncConfig.useJdbc = this.useJdbc;
+    String commitTime = "100";
+    String snapshotTableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE;
+    TestUtil.createMORTable(commitTime, "", 5, false);
+    HoodieHiveClient hiveClientRT =
+        new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
+
+    assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+        + " should not exist initially", hiveClientRT.doesTableExist(snapshotTableName));
+
+    // Lets do the sync
+    HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(),
TestUtil.fileSystem);
+    tool.syncHoodieTable();
+
+    assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_SNAPSHOT_TABLE
+        + " should exist after sync completes", hiveClientRT.doesTableExist(snapshotTableName));
+
+    // Schema being read from compacted base files
+    assertEquals("Hive Schema should match the table schema + partition field", hiveClientRT.getTableSchema(snapshotTableName).size(),
+        SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
+    assertEquals("Table partitions should match the number of partitions we wrote", 5,
+        hiveClientRT.scanTablePartitions(snapshotTableName).size());
+
+    // Now lets create more partitions and these are the only ones which needs to be synced
+    DateTime dateTime = DateTime.now().plusDays(6);
+    String commitTime2 = "102";
+    String deltaCommitTime2 = "103";
+
+    TestUtil.addCOWPartitions(1, true, dateTime, commitTime2);
+    TestUtil.addMORPartitions(1, true, false, dateTime, commitTime2, deltaCommitTime2);
+    // Lets do the sync
+    tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(), TestUtil.fileSystem);
+    tool.syncHoodieTable();
+    hiveClientRT = new HoodieHiveClient(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(),
TestUtil.fileSystem);
+
+    // Schema being read from the log files
+    assertEquals("Hive Schema should match the evolved table schema + partition field",
+        hiveClientRT.getTableSchema(snapshotTableName).size(), SchemaTestUtil.getEvolvedSchema().getFields().size()
+ 1);
+    // Sync should add the one partition
+    assertEquals("The 2 partitions we wrote should be added to hive", 6, hiveClientRT.scanTablePartitions(snapshotTableName).size());
 
 Review comment:
   Is this message correct ? We only added `1 partition` right and thats why count went from
`5 => 6` ?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message