hudi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [incubator-hudi] xushiyan commented on a change in pull request #1404: [HUDI-344] Improve exporter tests
Date Sat, 14 Mar 2020 00:41:36 GMT
xushiyan commented on a change in pull request #1404: [HUDI-344] Improve exporter tests
URL: https://github.com/apache/incubator-hudi/pull/1404#discussion_r392539398
 
 

 ##########
 File path: hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieSnapshotExporter.java
 ##########
 @@ -18,205 +18,144 @@
 
 package org.apache.hudi.utilities;
 
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
-import org.apache.hudi.DataSourceWriteOptions;
-import org.apache.hudi.common.HoodieCommonTestHarness;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hudi.client.HoodieWriteClient;
+import org.apache.hudi.common.HoodieClientTestHarness;
 import org.apache.hudi.common.HoodieTestDataGenerator;
-import org.apache.hudi.common.model.HoodieTestUtils;
-import org.apache.hudi.common.util.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.SaveMode;
+import org.apache.hudi.index.HoodieIndex.IndexType;
+import org.apache.hudi.utilities.HoodieSnapshotExporter.Config;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.sql.SparkSession;
-
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
 
-import java.io.File;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+public class TestHoodieSnapshotExporter extends HoodieClientTestHarness {
 
-public class TestHoodieSnapshotExporter extends HoodieCommonTestHarness {
-  private static String TEST_WRITE_TOKEN = "1-0-1";
-
-  private SparkSession spark = null;
-  private HoodieTestDataGenerator dataGen = null;
-  private String outputPath = null;
-  private String rootPath = null;
-  private FileSystem fs = null;
-  private Map commonOpts;
-  private HoodieSnapshotExporter.Config cfg;
-  private JavaSparkContext jsc = null;
+  private static final Logger LOG = LogManager.getLogger(TestHoodieSnapshotExporter.class);
+  private static final int NUM_RECORDS = 100;
+  private static final String COMMIT_TIME = "20200101000000";
+  private static final String PARTITION_PATH = "2020/01/01";
+  private static final String TABLE_NAME = "testing";
+  private String sourcePath;
+  private String targetPath;
 
   @Before
-  public void initialize() throws IOException {
-    spark = SparkSession.builder()
-        .appName("Hoodie Datasource test")
-        .master("local[2]")
-        .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-        .getOrCreate();
-    jsc = new JavaSparkContext(spark.sparkContext());
-    dataGen = new HoodieTestDataGenerator();
-    folder.create();
-    basePath = folder.getRoot().getAbsolutePath();
-    fs = FSUtils.getFs(basePath, spark.sparkContext().hadoopConfiguration());
-    commonOpts = new HashMap();
-
-    commonOpts.put("hoodie.insert.shuffle.parallelism", "4");
-    commonOpts.put("hoodie.upsert.shuffle.parallelism", "4");
-    commonOpts.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key");
-    commonOpts.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "partition");
-    commonOpts.put(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp");
-    commonOpts.put(HoodieWriteConfig.TABLE_NAME, "hoodie_test");
-
-
-    cfg = new HoodieSnapshotExporter.Config();
-
-    cfg.sourceBasePath = basePath;
-    cfg.targetOutputPath = outputPath = basePath + "/target";
-    cfg.outputFormat = "json";
-    cfg.outputPartitionField = "partition";
-
+  public void setUp() throws Exception {
+    initSparkContexts();
+    initDFS();
+    dataGen = new HoodieTestDataGenerator(new String[]{PARTITION_PATH});
 
 Review comment:
   Make sure data generated fall into 1 partition for ease of verification

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message