From commits-return-1955-archive-asf-public=cust-asf.ponee.io@orc.apache.org  Wed Oct  2 20:33:35 2019
Return-Path: <commits-return-1955-archive-asf-public=cust-asf.ponee.io@orc.apache.org>
X-Original-To: archive-asf-public@cust-asf.ponee.io
Delivered-To: archive-asf-public@cust-asf.ponee.io
Received: from mail.apache.org (hermes.apache.org [207.244.88.153])
	by mx-eu-01.ponee.io (Postfix) with SMTP id 3B02C180675
	for <archive-asf-public@cust-asf.ponee.io>; Wed,  2 Oct 2019 22:33:35 +0200 (CEST)
Received: (qmail 80960 invoked by uid 500); 2 Oct 2019 20:33:34 -0000
Mailing-List: contact commits-help@orc.apache.org; run by ezmlm
Precedence: bulk
List-Help: <mailto:commits-help@orc.apache.org>
List-Unsubscribe: <mailto:commits-unsubscribe@orc.apache.org>
List-Post: <mailto:commits@orc.apache.org>
List-Id: <commits.orc.apache.org>
Reply-To: dev@orc.apache.org
Delivered-To: mailing list commits@orc.apache.org
Received: (qmail 80933 invoked by uid 99); 2 Oct 2019 20:33:34 -0000
Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70)
    by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 02 Oct 2019 20:33:34 +0000
Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33)
	id 69086890A5; Wed,  2 Oct 2019 20:33:34 +0000 (UTC)
Date: Wed, 02 Oct 2019 20:33:36 +0000
To: "commits@orc.apache.org" <commits@orc.apache.org>
Subject: [orc] 02/02: ORC-554: Float to timestamp schema evolution should
 handle overflow.
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit
From: omalley@apache.org
In-Reply-To: <157004841404.14747.8286368159957017327@gitbox.apache.org>
References: <157004841404.14747.8286368159957017327@gitbox.apache.org>
X-Git-Host: gitbox.apache.org
X-Git-Repo: orc
X-Git-Refname: refs/heads/branch-1.6
X-Git-Reftype: branch
X-Git-Rev: 2f1cc766ee19e655bd7db16f36f6790f6ea93ccf
X-Git-NotificationType: diff
X-Git-Multimail-Version: 1.5.dev
Auto-Submitted: auto-generated
Message-Id: <20191002203334.69086890A5@gitbox.apache.org>

This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git

commit 2f1cc766ee19e655bd7db16f36f6790f6ea93ccf
Author: Laszlo Bodor <bodorlaszlo0202@gmail.com>
AuthorDate: Thu Sep 12 09:21:46 2019 +0200

    ORC-554: Float to timestamp schema evolution should handle overflow.
    
    Fixes #431
    
    Signed-off-by: Owen O'Malley <omalley@apache.org>
---
 java/core/src/findbugs/exclude.xml                 |   6 --
 .../apache/orc/impl/ConvertTreeReaderFactory.java  |  18 +++-
 .../org/apache/orc/impl/TestSchemaEvolution.java   | 101 +++++++++++++++++++++
 3 files changed, 116 insertions(+), 9 deletions(-)
diff --git a/java/core/src/findbugs/exclude.xml b/java/core/src/findbugs/exclude.xml
index 76d395a..6112afd 100644
--- a/java/core/src/findbugs/exclude.xml
+++ b/java/core/src/findbugs/exclude.xml
@@ -60,11 +60,5 @@
   <Match>
     <Bug pattern="RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE"/>
     <Class name="org.apache.orc.impl.TestSchemaEvolution"/>
-    <Method name="testEvolutionFromTimestamp"/>
-  </Match>
-  <Match>
-    <Bug pattern="RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE"/>
-    <Class name="org.apache.orc.impl.TestSchemaEvolution"/>
-    <Method name="testEvolutionToTimestamp"/>
   </Match>
 </FindBugsFilter>
diff --git a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index a6c158b..1ea870a 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -1409,9 +1409,21 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
         seconds = SerializationUtils.convertFromUtc(local, seconds);
       }
       long wholeSec = (long) Math.floor(seconds);
-      timestampColVector.time[elementNum] = wholeSec * 1000;
-      timestampColVector.nanos[elementNum] =
-          1_000_000 * (int) Math.round((seconds - wholeSec) * 1000);
+
+      // overflow
+      double doubleMillis = seconds * 1000;
+      long millis = wholeSec * 1000;
+      if (doubleMillis > Long.MAX_VALUE || doubleMillis < Long.MIN_VALUE ||
+              ((millis >= 0) != (doubleMillis >= 0))) {
+        timestampColVector.time[elementNum] = 0L;
+        timestampColVector.nanos[elementNum] = 0;
+        timestampColVector.isNull[elementNum] = true;
+        timestampColVector.noNulls = false;
+      } else {
+        timestampColVector.time[elementNum] = wholeSec * 1000;
+        timestampColVector.nanos[elementNum] =
+            1_000_000 * (int) Math.round((seconds - wholeSec) * 1000);
+      }
     }
 
     @Override
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 1dda07e..304ee4b 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -2263,4 +2263,105 @@ public class TestSchemaEvolution {
       TimeZone.setDefault(oldDefault);
     }
   }
+
+  @Test
+  public void doubleToTimeStampOverflow() throws Exception {
+    floatAndDoubleToTimeStampOverflow("double",
+        340282347000000000000000000000000000000000.0,
+        1e16,
+        9223372036854775.0,
+        9000000000000000.1,
+        10000000000.0,
+        10000000.123,
+        -1000000.123,
+        -10000000000.0,
+        -9000000000000000.1,
+        -9223372036854775.0,
+        -1e16,
+        -340282347000000000000000000000000000000000.0);
+  }
+
+  @Test
+  public void floatToTimeStampPositiveOverflow() throws Exception {
+    floatAndDoubleToTimeStampOverflow("float",
+        340282347000000000000000000000000000000000.0,
+        1e16,
+        9223372036854775.0,
+        9000000000000000.1,
+        10000000000.0,
+        10000000.123,
+        -1000000.123,
+        -10000000000.0,
+        -9000000000000000.1,
+        -9223372036854775.0,
+        -1e16,
+        -340282347000000000000000000000000000000000.0);
+  }
+
+  private void floatAndDoubleToTimeStampOverflow(String typeInFileSchema,
+                                                 double... values) throws Exception {
+    boolean isFloat = typeInFileSchema.equals("float");
+    TypeDescription fileSchema =
+        TypeDescription.fromString(String.format("struct<c1:%s>", typeInFileSchema));
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf)
+            .setSchema(fileSchema)
+            .stripeSize(10000)
+            .useUTCTimestamp(true));
+
+    VectorizedRowBatch batch = fileSchema.createRowBatchV2();
+    DoubleColumnVector fl1 = (DoubleColumnVector) batch.cols[0];
+
+    for (double v : values) {
+      int row = batch.size++;
+      fl1.vector[row] = v;
+
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+    }
+    if (batch.size != 0) {
+      writer.addRowBatch(batch);
+    }
+    writer.close();
+
+    TypeDescription readerSchema = TypeDescription.fromString("struct<c1:timestamp>");
+    VectorizedRowBatch batchTimeStamp = readerSchema.createRowBatchV2();
+    TimestampColumnVector t1 = (TimestampColumnVector) batchTimeStamp.cols[0];
+
+    OrcFile.ReaderOptions options = OrcFile
+                                        .readerOptions(conf)
+                                        .useUTCTimestamp(true);
+
+    try (Reader reader = OrcFile.createReader(testFilePath, options);
+         RecordReader rows = reader.rows(reader.options().schema(readerSchema))) {
+      int value = 0;
+      while (value < values.length) {
+        assertTrue("value " + value, rows.nextBatch(batchTimeStamp));
+        for(int row=0; row < batchTimeStamp.size; ++row) {
+          double expected = values[value + row];
+          String rowName = String.format("value %d", value + row);
+          boolean isPositive = ((long)Math.floor(expected) * 1000) >= 0;
+          if (expected * 1000 < Long.MIN_VALUE ||
+                  expected * 1000 > Long.MAX_VALUE ||
+                  ((expected >= 0) != isPositive)) {
+            assertFalse(rowName, t1.noNulls);
+            assertTrue(rowName, t1.isNull[row]);
+          } else {
+            double actual = t1.time[row] / 1000.0 + t1.nanos[row] / 1_000_000_000.0;
+            assertEquals(rowName, expected, actual,
+                Math.abs(expected * (isFloat ? 0.000001 : 0.0000000000000001)));
+            assertFalse(rowName, t1.isNull[row]);
+            assertTrue(String.format(
+                "%s nanos should be 0 to 1,000,000,000 instead it's: %d",
+                rowName, t1.nanos[row]),
+                t1.nanos[row] >= 0 && t1.nanos[row] < 1_000_000_000);
+          }
+        }
+        value += batchTimeStamp.size;
+      }
+      assertFalse(rows.nextBatch(batchTimeStamp));
+    }
+  }
 }