Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id B29B1200CAF for ; Thu, 8 Jun 2017 00:22:37 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id B1311160BE2; Wed, 7 Jun 2017 22:22:37 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id A9E66160BD0 for ; Thu, 8 Jun 2017 00:22:36 +0200 (CEST) Received: (qmail 72536 invoked by uid 500); 7 Jun 2017 22:22:35 -0000 Mailing-List: contact commits-help@parquet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@parquet.apache.org Delivered-To: mailing list commits@parquet.apache.org Received: (qmail 72527 invoked by uid 99); 7 Jun 2017 22:22:35 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 07 Jun 2017 22:22:35 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 7F5A9DFAF5; Wed, 7 Jun 2017 22:22:33 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: julien@apache.org To: commits@parquet.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: parquet-mr git commit: Parquet-884: Add support for Decimal datatype to Parquet-Pig record reader Date: Wed, 7 Jun 2017 22:22:33 +0000 (UTC) archived-at: Wed, 07 Jun 2017 22:22:37 -0000 Repository: parquet-mr Updated Branches: refs/heads/master 9491d7a61 -> 9d58b6a83 Parquet-884: Add support for Decimal datatype to Parquet-Pig record reader Adds conversion support to Pig for Decimal datatype. Based on the scala code in the spark project that provides a similar function for their sql library. Author: EllenKletscher Closes #404 from EllenKletscher/master and squashes the following commits: 7714738 [EllenKletscher] add comment for precision check 50c75c8 [EllenKletscher] remove check for primitiveType null 08d4dbb [EllenKletscher] PARQUET-884: Add missing AL header 57c4d72 [EllenKletscher] PARQUET-884: Add missing AL header ea61267 [EllenKletscher] PARQUET-884: add support for decimal type to pig reader Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/9d58b6a8 Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/9d58b6a8 Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/9d58b6a8 Branch: refs/heads/master Commit: 9d58b6a83aa79dcad01c3bcc2ec0a7db74ba83b1 Parents: 9491d7a Author: EllenKletscher Authored: Wed Jun 7 15:22:28 2017 -0700 Committer: Julien Le Dem Committed: Wed Jun 7 15:22:28 2017 -0700 ---------------------------------------------------------------------- .../apache/parquet/pig/PigSchemaConverter.java | 8 +- .../parquet/pig/convert/DecimalUtils.java | 65 ++++++++++++++++ .../parquet/pig/convert/TupleConverter.java | 27 +++++++ .../apache/parquet/pig/TestDecimalUtils.java | 79 ++++++++++++++++++++ 4 files changed, 177 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9d58b6a8/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java ---------------------------------------------------------------------- diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java b/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java index c9eb0ba..e560e42 100644 --- a/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java +++ b/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java @@ -244,8 +244,12 @@ public class PigSchemaConverter { @Override public FieldSchema convertFIXED_LEN_BYTE_ARRAY( - PrimitiveTypeName primitiveTypeName) throws FrontendException { - return new FieldSchema(fieldName, null, DataType.BYTEARRAY); + PrimitiveTypeName primitiveTypeName) throws FrontendException { + if (originalType == OriginalType.DECIMAL) { + return new FieldSchema(fieldName, null, DataType.BIGDECIMAL); + } else { + return new FieldSchema(fieldName, null, DataType.BYTEARRAY); + } } @Override http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9d58b6a8/parquet-pig/src/main/java/org/apache/parquet/pig/convert/DecimalUtils.java ---------------------------------------------------------------------- diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/convert/DecimalUtils.java b/parquet-pig/src/main/java/org/apache/parquet/pig/convert/DecimalUtils.java new file mode 100644 index 0000000..f850332 --- /dev/null +++ b/parquet-pig/src/main/java/org/apache/parquet/pig/convert/DecimalUtils.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.parquet.pig.convert; + +import java.nio.ByteBuffer; +import java.math.BigInteger; +import java.math.BigDecimal; +import static java.lang.Math.pow; + +import org.apache.parquet.io.api.Binary; + +/* + * Conversion between Parquet Decimal Type to Java BigDecimal in Pig + * Code Based on the Apache Spark ParquetRowConverter.scala + * + * + */ + +public class DecimalUtils { + + public static BigDecimal binaryToDecimal(Binary value, int precision, int scale) { + /* + * Precision <= 18 checks for the max number of digits for an unscaled long, + * else treat with big integer conversion + */ + if (precision <= 18) { + ByteBuffer buffer = value.toByteBuffer(); + byte[] bytes = buffer.array(); + int start = buffer.arrayOffset() + buffer.position(); + int end = buffer.arrayOffset() + buffer.limit(); + long unscaled = 0L; + int i = start; + while ( i < end ) { + unscaled = ( unscaled << 8 | bytes[i] & 0xff ); + i++; + } + int bits = 8*(end - start); + long unscaledNew = (unscaled << (64 - bits)) >> (64 - bits); + if (unscaledNew <= -pow(10,18) || unscaledNew >= pow(10,18)) { + return new BigDecimal(unscaledNew); + } else { + return BigDecimal.valueOf(unscaledNew / pow(10,scale)); + } + } else { + return new BigDecimal(new BigInteger(value.getBytes()), scale); + } + } +} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9d58b6a8/parquet-pig/src/main/java/org/apache/parquet/pig/convert/TupleConverter.java ---------------------------------------------------------------------- diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/convert/TupleConverter.java b/parquet-pig/src/main/java/org/apache/parquet/pig/convert/TupleConverter.java index 3887332..1c7ab6c 100644 --- a/parquet-pig/src/main/java/org/apache/parquet/pig/convert/TupleConverter.java +++ b/parquet-pig/src/main/java/org/apache/parquet/pig/convert/TupleConverter.java @@ -21,6 +21,7 @@ package org.apache.parquet.pig.convert; import static java.lang.Math.max; import java.util.ArrayList; import java.util.List; +import java.math.BigDecimal; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.DataByteArray; @@ -39,9 +40,11 @@ import org.apache.parquet.io.api.Converter; import org.apache.parquet.io.api.GroupConverter; import org.apache.parquet.io.api.PrimitiveConverter; import org.apache.parquet.pig.TupleConversionException; +import org.apache.parquet.pig.convert.DecimalUtils; import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.DecimalMetadata; import org.apache.parquet.schema.Type; import org.apache.parquet.schema.Type.Repetition; @@ -140,6 +143,8 @@ public class TupleConverter extends GroupConverter { return new FieldDoubleConverter(parent); case DataType.LONG: return new FieldLongConverter(parent); + case DataType.BIGDECIMAL: + return new FieldBigDecimalConverter(type, parent); default: throw new TupleConversionException("unsupported pig type: " + pigField); } @@ -530,6 +535,28 @@ public class TupleConverter extends GroupConverter { } /** + * handle decimal type + * + */ + static final class FieldBigDecimalConverter extends PrimitiveConverter { + private final ParentValueContainer parent; + private final Type primitiveType; + public FieldBigDecimalConverter(Type primitiveType, ParentValueContainer parent) { + this.parent = parent; + this.primitiveType = primitiveType; + } + + @Override + final public void addBinary(Binary value) { + int precision = primitiveType.asPrimitiveType().getDecimalMetadata().getPrecision(); + int scale = primitiveType.asPrimitiveType().getDecimalMetadata().getScale(); + BigDecimal finaldecimal = DecimalUtils.binaryToDecimal(value, precision, scale); + parent.add(finaldecimal); + } + } + + + /** * Converts groups into bags * * @author Julien Le Dem http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/9d58b6a8/parquet-pig/src/test/java/org/apache/parquet/pig/TestDecimalUtils.java ---------------------------------------------------------------------- diff --git a/parquet-pig/src/test/java/org/apache/parquet/pig/TestDecimalUtils.java b/parquet-pig/src/test/java/org/apache/parquet/pig/TestDecimalUtils.java new file mode 100644 index 0000000..3b4afe8 --- /dev/null +++ b/parquet-pig/src/test/java/org/apache/parquet/pig/TestDecimalUtils.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.parquet.pig; + +import static org.junit.Assert.assertEquals; +import org.junit.Test; + +import java.math.BigDecimal; +import static java.lang.Math.abs; +import java.nio.ByteBuffer; + +import org.apache.parquet.io.api.Binary; +import org.apache.parquet.pig.convert.DecimalUtils; + +public class TestDecimalUtils { + + private void testDecimalConversion(double value, int precision, int scale, String stringValue) { + String originalString = Double.toString(value); + BigDecimal originalValue = new BigDecimal(originalString); + BigDecimal convertedValue = DecimalUtils.binaryToDecimal(Binary.fromByteArray(originalValue.unscaledValue().toByteArray()), + precision,scale); + assertEquals(stringValue, convertedValue.toString()); + } + + private void testDecimalConversion(int value, int precision, int scale, String stringValue) { + String originalString = Integer.toString(value); + BigDecimal originalValue = new BigDecimal(originalString); + BigDecimal convertedValue = DecimalUtils.binaryToDecimal(Binary.fromByteArray(originalValue.unscaledValue().toByteArray()), + precision,scale); + assertEquals(stringValue, convertedValue.toString()); + } + + private void testDecimalConversion(long value, int precision, int scale, String stringValue) { + String originalString = Long.toString(value); + BigDecimal originalValue = new BigDecimal(originalString); + BigDecimal convertedValue = DecimalUtils.binaryToDecimal(Binary.fromByteArray(originalValue.unscaledValue().toByteArray()), + precision, scale); + assertEquals(stringValue, convertedValue.toString()); + } + + @Test + public void testBinaryToDecimal() throws Exception { + // Known issue: testing Nx10^M doubles from BigDecimal.unscaledValue() always converts to Nx10 regardless of M + // Known issue: any double with precision > 17 breaks in test but not in functional testing + + // Test LONG + testDecimalConversion(Long.MAX_VALUE,19,0,"9223372036854775807"); + testDecimalConversion(Long.MIN_VALUE,19,0,"-9223372036854775808"); + testDecimalConversion(0L,0,0,"0.0"); + + // Test INTEGER + testDecimalConversion(Integer.MAX_VALUE,10,0,"2147483647"); + testDecimalConversion(Integer.MIN_VALUE,10,0,"-2147483648"); + testDecimalConversion(0,0,0,"0.0"); + + // Test DOUBLE + testDecimalConversion(12345678912345678d,17,0,"12345678912345678"); + testDecimalConversion(123456789123456.78,17,2,"123456789123456.78"); + testDecimalConversion(0.12345678912345678,17,17,"0.12345678912345678"); + testDecimalConversion(-0.000102,6,6,"-0.000102"); + } +}