hudi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <>
Subject [GitHub] [incubator-hudi] xushiyan commented on a change in pull request #1405: [HUDI-344] Add partitioner param to Exporter
Date Tue, 17 Mar 2020 18:06:37 GMT
xushiyan commented on a change in pull request #1405: [HUDI-344] Add partitioner param to Exporter

 File path: hudi-utilities/src/main/java/org/apache/hudi/utilities/
 @@ -45,41 +50,65 @@
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
-import org.apache.spark.sql.execution.datasources.DataSource;
-import scala.Tuple2;
-import scala.collection.JavaConversions;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
+import scala.Tuple2;
+import scala.collection.JavaConversions;
  * Export the latest records of Hudi dataset to a set of external files (e.g., plain parquet
  * @experimental This export is an experimental tool. If you want to export hudi to hudi,
please use HoodieSnapshotCopier.
 public class HoodieSnapshotExporter {
+  @FunctionalInterface
+  public interface Partitioner {
+    DataFrameWriter<Row> partition(Dataset<Row> source);
+  }
   private static final Logger LOG = LogManager.getLogger(HoodieSnapshotExporter.class);
+  public static class OutputFormatValidator implements IValueValidator<String> {
+    static final String HUDI = "hudi";
+    static final List<String> FORMATS = ImmutableList.of("json", "parquet", HUDI);
 Review comment:
   @leesf Sure add some explanation in the CLI help line

This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:

With regards,
Apache Git Services

View raw message