From commits-return-9579-archive-asf-public=cust-asf.ponee.io@hudi.apache.org Sun Jan 12 23:45:33 2020 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id D87B4180663 for ; Mon, 13 Jan 2020 00:45:32 +0100 (CET) Received: (qmail 8115 invoked by uid 500); 12 Jan 2020 23:45:32 -0000 Mailing-List: contact commits-help@hudi.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hudi.apache.org Delivered-To: mailing list commits@hudi.apache.org Received: (qmail 8101 invoked by uid 99); 12 Jan 2020 23:45:32 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 12 Jan 2020 23:45:32 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 19A2381F11; Sun, 12 Jan 2020 23:45:32 +0000 (UTC) Date: Sun, 12 Jan 2020 23:45:32 +0000 To: "commits@hudi.apache.org" Subject: [incubator-hudi] branch master updated: [HUDI-502] provide a custom time zone definition for TimestampBasedKeyGenerator (#1188) MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <157887273197.13948.7135373592758202977@gitbox.apache.org> From: vinoth@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: incubator-hudi X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: 971c7d41bd912b0e00ae8606adbe36548e2c49b3 X-Git-Newrev: a44c61b81356e93340710ccc0022e576b7b6e077 X-Git-Rev: a44c61b81356e93340710ccc0022e576b7b6e077 X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. vinoth pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git The following commit(s) were added to refs/heads/master by this push: new a44c61b [HUDI-502] provide a custom time zone definition for TimestampBasedKeyGenerator (#1188) a44c61b is described below commit a44c61b81356e93340710ccc0022e576b7b6e077 Author: openopen2 AuthorDate: Mon Jan 13 07:45:23 2020 +0800 [HUDI-502] provide a custom time zone definition for TimestampBasedKeyGenerator (#1188) --- .../apache/hudi/common/util/SchemaTestUtil.java | 4 ++ .../src/test/resources/timestamp-test-evolved.avsc | 26 +++++++ .../keygen/TimestampBasedKeyGenerator.java | 11 ++- .../utilities/TestTimestampBasedKeyGenerator.java | 83 ++++++++++++++++++++++ 4 files changed, 122 insertions(+), 2 deletions(-) diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java b/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java index 2b3de27..18d6b73 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/SchemaTestUtil.java @@ -169,6 +169,10 @@ public class SchemaTestUtil { return new Schema.Parser().parse(SchemaTestUtil.class.getResourceAsStream("/complex-test-evolved.avsc")); } + public static Schema getTimestampEvolvedSchema() throws IOException { + return new Schema.Parser().parse(SchemaTestUtil.class.getResourceAsStream("/timestamp-test-evolved.avsc")); + } + public static GenericRecord generateAvroRecordFromJson(Schema schema, int recordNumber, String commitTime, String fileId) throws IOException { TestRecord record = new TestRecord(commitTime, recordNumber, fileId); diff --git a/hudi-common/src/test/resources/timestamp-test-evolved.avsc b/hudi-common/src/test/resources/timestamp-test-evolved.avsc new file mode 100644 index 0000000..421c672 --- /dev/null +++ b/hudi-common/src/test/resources/timestamp-test-evolved.avsc @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +{ + "namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "field1", "type": ["null", "string"], "default": null}, + {"name": "createTime", "type": ["null", "string"], "default": null} + ] +} \ No newline at end of file diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java index 6d3a6e3..7f1380e 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java @@ -51,6 +51,10 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator { private final String outputDateFormat; + // TimeZone detailed settings reference + // https://docs.oracle.com/javase/8/docs/api/java/util/TimeZone.html + private final TimeZone timeZone; + /** * Supported configs. */ @@ -62,6 +66,8 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator { "hoodie.deltastreamer.keygen.timebased.input.dateformat"; private static final String TIMESTAMP_OUTPUT_DATE_FORMAT_PROP = "hoodie.deltastreamer.keygen.timebased.output.dateformat"; + private static final String TIMESTAMP_TIMEZONE_FORMAT_PROP = + "hoodie.deltastreamer.keygen.timebased.timezone"; } public TimestampBasedKeyGenerator(TypedProperties config) { @@ -70,12 +76,13 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator { Arrays.asList(Config.TIMESTAMP_TYPE_FIELD_PROP, Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP)); this.timestampType = TimestampType.valueOf(config.getString(Config.TIMESTAMP_TYPE_FIELD_PROP)); this.outputDateFormat = config.getString(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP); + this.timeZone = TimeZone.getTimeZone(config.getString(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT")); if (timestampType == TimestampType.DATE_STRING || timestampType == TimestampType.MIXED) { DataSourceUtils.checkRequiredProperties(config, Collections.singletonList(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP)); this.inputDateFormat = new SimpleDateFormat(config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP)); - this.inputDateFormat.setTimeZone(TimeZone.getTimeZone("GMT")); + this.inputDateFormat.setTimeZone(timeZone); } } @@ -86,7 +93,7 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator { partitionVal = 1L; } SimpleDateFormat partitionPathFormat = new SimpleDateFormat(outputDateFormat); - partitionPathFormat.setTimeZone(TimeZone.getTimeZone("GMT")); + partitionPathFormat.setTimeZone(timeZone); try { long unixTime; diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestTimestampBasedKeyGenerator.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestTimestampBasedKeyGenerator.java new file mode 100644 index 0000000..cb0c822 --- /dev/null +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestTimestampBasedKeyGenerator.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.utilities; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.hudi.DataSourceWriteOptions; +import org.apache.hudi.common.model.HoodieKey; +import org.apache.hudi.common.util.SchemaTestUtil; +import org.apache.hudi.common.util.TypedProperties; +import org.apache.hudi.utilities.keygen.TimestampBasedKeyGenerator; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; + +import static org.junit.Assert.assertEquals; + +public class TestTimestampBasedKeyGenerator { + private Schema schema; + private GenericRecord baseRecord; + private TypedProperties properties = new TypedProperties(); + + @Before + public void initialize() throws IOException { + schema = SchemaTestUtil.getTimestampEvolvedSchema(); + baseRecord = SchemaTestUtil + .generateAvroRecordFromJson(schema, 1, "001", "f1"); + + properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "field1"); + properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "createTime"); + properties.setProperty(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY(), "false"); + } + + private TypedProperties getBaseKeyConfig(String timestampType, String dateFormat, String timezone) { + properties.setProperty("hoodie.deltastreamer.keygen.timebased.timestamp.type", timestampType); + properties.setProperty("hoodie.deltastreamer.keygen.timebased.output.dateformat", dateFormat); + properties.setProperty("hoodie.deltastreamer.keygen.timebased.timezone", timezone); + return properties; + } + + @Test + public void testTimestampBasedKeyGenerator() { + // timezone is GMT+8:00 + baseRecord.put("createTime", 1578283932000L); + properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00"); + HoodieKey hk1 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord); + assertEquals(hk1.getPartitionPath(), "2020-01-06 12"); + + // timezone is GMT + properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT"); + HoodieKey hk2 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord); + assertEquals(hk2.getPartitionPath(), "2020-01-06 04"); + + // timestamp is DATE_STRING, timezone is GMT+8:00 + baseRecord.put("createTime", "2020-01-06 12:12:12"); + properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT+8:00"); + properties.setProperty("hoodie.deltastreamer.keygen.timebased.input.dateformat", "yyyy-MM-dd hh:mm:ss"); + HoodieKey hk3 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord); + assertEquals(hk3.getPartitionPath(), "2020-01-06 12"); + + // timezone is GMT + properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT"); + HoodieKey hk4 = new TimestampBasedKeyGenerator(properties).getKey(baseRecord); + assertEquals(hk4.getPartitionPath(), "2020-01-06 12"); + } +}