hudi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vin...@apache.org
Subject [incubator-hudi] branch master updated: [HUDI-502] provide a custom time zone definition for TimestampBasedKeyGenerator (#1188)
Date Sun, 12 Jan 2020 23:45:32 GMT
This is an automated email from the ASF dual-hosted git repository.

vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new a44c61b  [HUDI-502] provide a custom time zone definition for TimestampBasedKeyGenerator
(#1188)
a44c61b is described below

commit a44c61b81356e93340710ccc0022e576b7b6e077
Author: openopen2 <a261049174@outlook.com>
AuthorDate: Mon Jan 13 07:45:23 2020 +0800

    [HUDI-502] provide a custom time zone definition for TimestampBasedKeyGenerator (#1188)
---
 .../apache/hudi/common/util/SchemaTestUtil.java    |  4 ++
 .../src/test/resources/timestamp-test-evolved.avsc | 26 +++++++
 .../keygen/TimestampBasedKeyGenerator.java         | 11 ++-
 .../utilities/TestTimestampBasedKeyGenerator.java  | 83 ++++++++++++++++++++++
 4 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java b/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java
index 2b3de27..18d6b73 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java
@@ -169,6 +169,10 @@ public class SchemaTestUtil {
     return new Schema.Parser().parse(SchemaTestUtil.class.getResourceAsStream("/complex-test-evolved.avsc"));
   }
 
+  public static Schema getTimestampEvolvedSchema() throws IOException {
+    return new Schema.Parser().parse(SchemaTestUtil.class.getResourceAsStream("/timestamp-test-evolved.avsc"));
+  }
+
   public static GenericRecord generateAvroRecordFromJson(Schema schema, int recordNumber,
String commitTime,
       String fileId) throws IOException {
     TestRecord record = new TestRecord(commitTime, recordNumber, fileId);
diff --git a/hudi-common/src/test/resources/timestamp-test-evolved.avsc b/hudi-common/src/test/resources/timestamp-test-evolved.avsc
new file mode 100644
index 0000000..421c672
--- /dev/null
+++ b/hudi-common/src/test/resources/timestamp-test-evolved.avsc
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{
+  "namespace": "example.avro",
+  "type": "record",
+  "name": "User",
+  "fields": [
+    {"name": "field1", "type": ["null", "string"], "default": null},
+    {"name": "createTime", "type": ["null", "string"], "default": null}
+  ]
+}
\ No newline at end of file
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java
b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java
index 6d3a6e3..7f1380e 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java
@@ -51,6 +51,10 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
 
   private final String outputDateFormat;
 
+  // TimeZone detailed settings reference
+  // https://docs.oracle.com/javase/8/docs/api/java/util/TimeZone.html
+  private final TimeZone timeZone;
+
   /**
    * Supported configs.
    */
@@ -62,6 +66,8 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
         "hoodie.deltastreamer.keygen.timebased.input.dateformat";
     private static final String TIMESTAMP_OUTPUT_DATE_FORMAT_PROP =
         "hoodie.deltastreamer.keygen.timebased.output.dateformat";
+    private static final String TIMESTAMP_TIMEZONE_FORMAT_PROP =
+            "hoodie.deltastreamer.keygen.timebased.timezone";
   }
 
   public TimestampBasedKeyGenerator(TypedProperties config) {
@@ -70,12 +76,13 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
         Arrays.asList(Config.TIMESTAMP_TYPE_FIELD_PROP, Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP));
     this.timestampType = TimestampType.valueOf(config.getString(Config.TIMESTAMP_TYPE_FIELD_PROP));
     this.outputDateFormat = config.getString(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
+    this.timeZone = TimeZone.getTimeZone(config.getString(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP,
"GMT"));
 
     if (timestampType == TimestampType.DATE_STRING || timestampType == TimestampType.MIXED)
{
       DataSourceUtils.checkRequiredProperties(config,
           Collections.singletonList(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP));
       this.inputDateFormat = new SimpleDateFormat(config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP));
-      this.inputDateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
+      this.inputDateFormat.setTimeZone(timeZone);
     }
   }
 
@@ -86,7 +93,7 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
       partitionVal = 1L;
     }
     SimpleDateFormat partitionPathFormat = new SimpleDateFormat(outputDateFormat);
-    partitionPathFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
+    partitionPathFormat.setTimeZone(timeZone);
 
     try {
       long unixTime;
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestTimestampBasedKeyGenerator.java
b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestTimestampBasedKeyGenerator.java
new file mode 100644
index 0000000..cb0c822
--- /dev/null
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestTimestampBasedKeyGenerator.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.DataSourceWriteOptions;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.util.SchemaTestUtil;
+import org.apache.hudi.common.util.TypedProperties;
+import org.apache.hudi.utilities.keygen.TimestampBasedKeyGenerator;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestTimestampBasedKeyGenerator {
+  private Schema schema;
+  private GenericRecord baseRecord;
+  private TypedProperties properties = new TypedProperties();
+
+  @Before
+  public void initialize() throws IOException {
+    schema = SchemaTestUtil.getTimestampEvolvedSchema();
+    baseRecord = SchemaTestUtil
+        .generateAvroRecordFromJson(schema, 1, "001", "f1");
+
+    properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "field1");
+    properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "createTime");
+    properties.setProperty(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY(), "false");
+  }
+  
+  private TypedProperties getBaseKeyConfig(String timestampType, String dateFormat, String
timezone) {
+    properties.setProperty("hoodie.deltastreamer.keygen.timebased.timestamp.type", timestampType);
+    properties.setProperty("hoodie.deltastreamer.keygen.timebased.output.dateformat", dateFormat);
+    properties.setProperty("hoodie.deltastreamer.keygen.timebased.timezone", timezone);
+    return properties;
+  }
+
+  @Test
+  public void testTimestampBasedKeyGenerator() {
+    // timezone is GMT+8:00
+    baseRecord.put("createTime", 1578283932000L);
+    properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00");
+    HoodieKey hk1 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
+    assertEquals(hk1.getPartitionPath(), "2020-01-06 12");
+
+    // timezone is GMT
+    properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT");
+    HoodieKey hk2 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
+    assertEquals(hk2.getPartitionPath(), "2020-01-06 04");
+
+    // timestamp is DATE_STRING, timezone is GMT+8:00
+    baseRecord.put("createTime", "2020-01-06 12:12:12");
+    properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT+8:00");
+    properties.setProperty("hoodie.deltastreamer.keygen.timebased.input.dateformat", "yyyy-MM-dd
hh:mm:ss");
+    HoodieKey hk3 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
+    assertEquals(hk3.getPartitionPath(), "2020-01-06 12");
+
+    // timezone is GMT
+    properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT");
+    HoodieKey hk4 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord);
+    assertEquals(hk4.getPartitionPath(), "2020-01-06 12");
+  }
+}


Mime
View raw message