From commits-return-1952-archive-asf-public=cust-asf.ponee.io@orc.apache.org Wed Oct 2 20:20:18 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id 8849018064F for ; Wed, 2 Oct 2019 22:20:14 +0200 (CEST) Received: (qmail 40249 invoked by uid 500); 2 Oct 2019 20:20:14 -0000 Mailing-List: contact commits-help@orc.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@orc.apache.org Delivered-To: mailing list commits@orc.apache.org Received: (qmail 40204 invoked by uid 99); 2 Oct 2019 20:20:13 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 02 Oct 2019 20:20:13 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id B27A1890A3; Wed, 2 Oct 2019 20:20:13 +0000 (UTC) Date: Wed, 02 Oct 2019 20:20:13 +0000 To: "commits@orc.apache.org" Subject: [orc] branch master updated: ORC-554: Float to timestamp schema evolution should handle overflow. MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <157004761362.3462.4555997916194748822@gitbox.apache.org> From: omalley@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: orc X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: 1127ba385d9cfe9dc8f85aefa290302372b4b957 X-Git-Newrev: 7de945b080c5ca83b84397db105f70082a2107f4 X-Git-Rev: 7de945b080c5ca83b84397db105f70082a2107f4 X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. omalley pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/orc.git The following commit(s) were added to refs/heads/master by this push: new 7de945b ORC-554: Float to timestamp schema evolution should handle overflow. 7de945b is described below commit 7de945b080c5ca83b84397db105f70082a2107f4 Author: Laszlo Bodor AuthorDate: Thu Sep 12 09:21:46 2019 +0200 ORC-554: Float to timestamp schema evolution should handle overflow. Fixes #431 Signed-off-by: Owen O'Malley --- java/core/src/findbugs/exclude.xml | 6 -- .../apache/orc/impl/ConvertTreeReaderFactory.java | 18 +++- .../org/apache/orc/impl/TestSchemaEvolution.java | 101 +++++++++++++++++++++ 3 files changed, 116 insertions(+), 9 deletions(-) diff --git a/java/core/src/findbugs/exclude.xml b/java/core/src/findbugs/exclude.xml index 76d395a..6112afd 100644 --- a/java/core/src/findbugs/exclude.xml +++ b/java/core/src/findbugs/exclude.xml @@ -60,11 +60,5 @@ - - - - - - diff --git a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java index a6c158b..1ea870a 100644 --- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java +++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java @@ -1409,9 +1409,21 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory { seconds = SerializationUtils.convertFromUtc(local, seconds); } long wholeSec = (long) Math.floor(seconds); - timestampColVector.time[elementNum] = wholeSec * 1000; - timestampColVector.nanos[elementNum] = - 1_000_000 * (int) Math.round((seconds - wholeSec) * 1000); + + // overflow + double doubleMillis = seconds * 1000; + long millis = wholeSec * 1000; + if (doubleMillis > Long.MAX_VALUE || doubleMillis < Long.MIN_VALUE || + ((millis >= 0) != (doubleMillis >= 0))) { + timestampColVector.time[elementNum] = 0L; + timestampColVector.nanos[elementNum] = 0; + timestampColVector.isNull[elementNum] = true; + timestampColVector.noNulls = false; + } else { + timestampColVector.time[elementNum] = wholeSec * 1000; + timestampColVector.nanos[elementNum] = + 1_000_000 * (int) Math.round((seconds - wholeSec) * 1000); + } } @Override diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java index 1dda07e..304ee4b 100644 --- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java +++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java @@ -2263,4 +2263,105 @@ public class TestSchemaEvolution { TimeZone.setDefault(oldDefault); } } + + @Test + public void doubleToTimeStampOverflow() throws Exception { + floatAndDoubleToTimeStampOverflow("double", + 340282347000000000000000000000000000000000.0, + 1e16, + 9223372036854775.0, + 9000000000000000.1, + 10000000000.0, + 10000000.123, + -1000000.123, + -10000000000.0, + -9000000000000000.1, + -9223372036854775.0, + -1e16, + -340282347000000000000000000000000000000000.0); + } + + @Test + public void floatToTimeStampPositiveOverflow() throws Exception { + floatAndDoubleToTimeStampOverflow("float", + 340282347000000000000000000000000000000000.0, + 1e16, + 9223372036854775.0, + 9000000000000000.1, + 10000000000.0, + 10000000.123, + -1000000.123, + -10000000000.0, + -9000000000000000.1, + -9223372036854775.0, + -1e16, + -340282347000000000000000000000000000000000.0); + } + + private void floatAndDoubleToTimeStampOverflow(String typeInFileSchema, + double... values) throws Exception { + boolean isFloat = typeInFileSchema.equals("float"); + TypeDescription fileSchema = + TypeDescription.fromString(String.format("struct", typeInFileSchema)); + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .setSchema(fileSchema) + .stripeSize(10000) + .useUTCTimestamp(true)); + + VectorizedRowBatch batch = fileSchema.createRowBatchV2(); + DoubleColumnVector fl1 = (DoubleColumnVector) batch.cols[0]; + + for (double v : values) { + int row = batch.size++; + fl1.vector[row] = v; + + if (batch.size == batch.getMaxSize()) { + writer.addRowBatch(batch); + batch.reset(); + } + } + if (batch.size != 0) { + writer.addRowBatch(batch); + } + writer.close(); + + TypeDescription readerSchema = TypeDescription.fromString("struct"); + VectorizedRowBatch batchTimeStamp = readerSchema.createRowBatchV2(); + TimestampColumnVector t1 = (TimestampColumnVector) batchTimeStamp.cols[0]; + + OrcFile.ReaderOptions options = OrcFile + .readerOptions(conf) + .useUTCTimestamp(true); + + try (Reader reader = OrcFile.createReader(testFilePath, options); + RecordReader rows = reader.rows(reader.options().schema(readerSchema))) { + int value = 0; + while (value < values.length) { + assertTrue("value " + value, rows.nextBatch(batchTimeStamp)); + for(int row=0; row < batchTimeStamp.size; ++row) { + double expected = values[value + row]; + String rowName = String.format("value %d", value + row); + boolean isPositive = ((long)Math.floor(expected) * 1000) >= 0; + if (expected * 1000 < Long.MIN_VALUE || + expected * 1000 > Long.MAX_VALUE || + ((expected >= 0) != isPositive)) { + assertFalse(rowName, t1.noNulls); + assertTrue(rowName, t1.isNull[row]); + } else { + double actual = t1.time[row] / 1000.0 + t1.nanos[row] / 1_000_000_000.0; + assertEquals(rowName, expected, actual, + Math.abs(expected * (isFloat ? 0.000001 : 0.0000000000000001))); + assertFalse(rowName, t1.isNull[row]); + assertTrue(String.format( + "%s nanos should be 0 to 1,000,000,000 instead it's: %d", + rowName, t1.nanos[row]), + t1.nanos[row] >= 0 && t1.nanos[row] < 1_000_000_000); + } + } + value += batchTimeStamp.size; + } + assertFalse(rows.nextBatch(batchTimeStamp)); + } + } }