From commits-return-1955-archive-asf-public=cust-asf.ponee.io@orc.apache.org Wed Oct 2 20:33:35 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id 3B02C180675 for ; Wed, 2 Oct 2019 22:33:35 +0200 (CEST) Received: (qmail 80960 invoked by uid 500); 2 Oct 2019 20:33:34 -0000 Mailing-List: contact commits-help@orc.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@orc.apache.org Delivered-To: mailing list commits@orc.apache.org Received: (qmail 80933 invoked by uid 99); 2 Oct 2019 20:33:34 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 02 Oct 2019 20:33:34 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 69086890A5; Wed, 2 Oct 2019 20:33:34 +0000 (UTC) Date: Wed, 02 Oct 2019 20:33:36 +0000 To: "commits@orc.apache.org" Subject: [orc] 02/02: ORC-554: Float to timestamp schema evolution should handle overflow. MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit From: omalley@apache.org In-Reply-To: <157004841404.14747.8286368159957017327@gitbox.apache.org> References: <157004841404.14747.8286368159957017327@gitbox.apache.org> X-Git-Host: gitbox.apache.org X-Git-Repo: orc X-Git-Refname: refs/heads/branch-1.6 X-Git-Reftype: branch X-Git-Rev: 2f1cc766ee19e655bd7db16f36f6790f6ea93ccf X-Git-NotificationType: diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated Message-Id: <20191002203334.69086890A5@gitbox.apache.org> This is an automated email from the ASF dual-hosted git repository. omalley pushed a commit to branch branch-1.6 in repository https://gitbox.apache.org/repos/asf/orc.git commit 2f1cc766ee19e655bd7db16f36f6790f6ea93ccf Author: Laszlo Bodor AuthorDate: Thu Sep 12 09:21:46 2019 +0200 ORC-554: Float to timestamp schema evolution should handle overflow. Fixes #431 Signed-off-by: Owen O'Malley --- java/core/src/findbugs/exclude.xml | 6 -- .../apache/orc/impl/ConvertTreeReaderFactory.java | 18 +++- .../org/apache/orc/impl/TestSchemaEvolution.java | 101 +++++++++++++++++++++ 3 files changed, 116 insertions(+), 9 deletions(-) diff --git a/java/core/src/findbugs/exclude.xml b/java/core/src/findbugs/exclude.xml index 76d395a..6112afd 100644 --- a/java/core/src/findbugs/exclude.xml +++ b/java/core/src/findbugs/exclude.xml @@ -60,11 +60,5 @@ - - - - - - diff --git a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java index a6c158b..1ea870a 100644 --- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java +++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java @@ -1409,9 +1409,21 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory { seconds = SerializationUtils.convertFromUtc(local, seconds); } long wholeSec = (long) Math.floor(seconds); - timestampColVector.time[elementNum] = wholeSec * 1000; - timestampColVector.nanos[elementNum] = - 1_000_000 * (int) Math.round((seconds - wholeSec) * 1000); + + // overflow + double doubleMillis = seconds * 1000; + long millis = wholeSec * 1000; + if (doubleMillis > Long.MAX_VALUE || doubleMillis < Long.MIN_VALUE || + ((millis >= 0) != (doubleMillis >= 0))) { + timestampColVector.time[elementNum] = 0L; + timestampColVector.nanos[elementNum] = 0; + timestampColVector.isNull[elementNum] = true; + timestampColVector.noNulls = false; + } else { + timestampColVector.time[elementNum] = wholeSec * 1000; + timestampColVector.nanos[elementNum] = + 1_000_000 * (int) Math.round((seconds - wholeSec) * 1000); + } } @Override diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java index 1dda07e..304ee4b 100644 --- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java +++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java @@ -2263,4 +2263,105 @@ public class TestSchemaEvolution { TimeZone.setDefault(oldDefault); } } + + @Test + public void doubleToTimeStampOverflow() throws Exception { + floatAndDoubleToTimeStampOverflow("double", + 340282347000000000000000000000000000000000.0, + 1e16, + 9223372036854775.0, + 9000000000000000.1, + 10000000000.0, + 10000000.123, + -1000000.123, + -10000000000.0, + -9000000000000000.1, + -9223372036854775.0, + -1e16, + -340282347000000000000000000000000000000000.0); + } + + @Test + public void floatToTimeStampPositiveOverflow() throws Exception { + floatAndDoubleToTimeStampOverflow("float", + 340282347000000000000000000000000000000000.0, + 1e16, + 9223372036854775.0, + 9000000000000000.1, + 10000000000.0, + 10000000.123, + -1000000.123, + -10000000000.0, + -9000000000000000.1, + -9223372036854775.0, + -1e16, + -340282347000000000000000000000000000000000.0); + } + + private void floatAndDoubleToTimeStampOverflow(String typeInFileSchema, + double... values) throws Exception { + boolean isFloat = typeInFileSchema.equals("float"); + TypeDescription fileSchema = + TypeDescription.fromString(String.format("struct", typeInFileSchema)); + Writer writer = OrcFile.createWriter(testFilePath, + OrcFile.writerOptions(conf) + .setSchema(fileSchema) + .stripeSize(10000) + .useUTCTimestamp(true)); + + VectorizedRowBatch batch = fileSchema.createRowBatchV2(); + DoubleColumnVector fl1 = (DoubleColumnVector) batch.cols[0]; + + for (double v : values) { + int row = batch.size++; + fl1.vector[row] = v; + + if (batch.size == batch.getMaxSize()) { + writer.addRowBatch(batch); + batch.reset(); + } + } + if (batch.size != 0) { + writer.addRowBatch(batch); + } + writer.close(); + + TypeDescription readerSchema = TypeDescription.fromString("struct"); + VectorizedRowBatch batchTimeStamp = readerSchema.createRowBatchV2(); + TimestampColumnVector t1 = (TimestampColumnVector) batchTimeStamp.cols[0]; + + OrcFile.ReaderOptions options = OrcFile + .readerOptions(conf) + .useUTCTimestamp(true); + + try (Reader reader = OrcFile.createReader(testFilePath, options); + RecordReader rows = reader.rows(reader.options().schema(readerSchema))) { + int value = 0; + while (value < values.length) { + assertTrue("value " + value, rows.nextBatch(batchTimeStamp)); + for(int row=0; row < batchTimeStamp.size; ++row) { + double expected = values[value + row]; + String rowName = String.format("value %d", value + row); + boolean isPositive = ((long)Math.floor(expected) * 1000) >= 0; + if (expected * 1000 < Long.MIN_VALUE || + expected * 1000 > Long.MAX_VALUE || + ((expected >= 0) != isPositive)) { + assertFalse(rowName, t1.noNulls); + assertTrue(rowName, t1.isNull[row]); + } else { + double actual = t1.time[row] / 1000.0 + t1.nanos[row] / 1_000_000_000.0; + assertEquals(rowName, expected, actual, + Math.abs(expected * (isFloat ? 0.000001 : 0.0000000000000001))); + assertFalse(rowName, t1.isNull[row]); + assertTrue(String.format( + "%s nanos should be 0 to 1,000,000,000 instead it's: %d", + rowName, t1.nanos[row]), + t1.nanos[row] >= 0 && t1.nanos[row] < 1_000_000_000); + } + } + value += batchTimeStamp.size; + } + assertFalse(rows.nextBatch(batchTimeStamp)); + } + } }