From commits-return-1219-archive-asf-public=cust-asf.ponee.io@parquet.apache.org Mon Jan 22 17:22:29 2018 Return-Path: X-Original-To: archive-asf-public@eu.ponee.io Delivered-To: archive-asf-public@eu.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by mx-eu-01.ponee.io (Postfix) with ESMTP id 66DE2180609 for ; Mon, 22 Jan 2018 17:22:29 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 56168160C4D; Mon, 22 Jan 2018 16:22:29 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id A1660160C3A for ; Mon, 22 Jan 2018 17:22:27 +0100 (CET) Received: (qmail 137 invoked by uid 500); 22 Jan 2018 16:22:26 -0000 Mailing-List: contact commits-help@parquet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@parquet.apache.org Delivered-To: mailing list commits@parquet.apache.org Received: (qmail 128 invoked by uid 99); 22 Jan 2018 16:22:26 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 22 Jan 2018 16:22:26 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id B3C48E0885; Mon, 22 Jan 2018 16:22:26 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: zivanfi@apache.org To: commits@parquet.apache.org Message-Id: <04170dc2d99048a7b2c97d209a32aa62@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: parquet-mr git commit: PARQUET-1170: Logical-type-based toString for proper representeation in tools/logs Date: Mon, 22 Jan 2018 16:22:26 +0000 (UTC) Repository: parquet-mr Updated Branches: refs/heads/master 878ebcd0b -> 89aeec028 PARQUET-1170: Logical-type-based toString for proper representeation in tools/logs Author: Gabor Szadovszky Closes #448 from gszadovszky/PARQUET-1170 and squashes the following commits: 8f1f8cc [Gabor Szadovszky] PARQUET-1170: Make interval test more readable 90f73b5 [Gabor Szadovszky] PARQUET-1170: Fix endianess of interval 612d70b [Gabor Szadovszky] PARQUET-1170: Add unit test for different locale d8c5204 [Gabor Szadovszky] PARQUET-1170: Implement toString based on logical type so values will be represented properly in tools/logs etc. Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/89aeec02 Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/89aeec02 Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/89aeec02 Branch: refs/heads/master Commit: 89aeec028b6f56be96b9c56c2fdbb931f80853ad Parents: 878ebcd Author: Gabor Szadovszky Authored: Mon Jan 22 17:21:27 2018 +0100 Committer: Zoltan Ivanfi Committed: Mon Jan 22 17:21:27 2018 +0100 ---------------------------------------------------------------------- .../main/java/org/apache/parquet/cli/Util.java | 40 +-- .../column/statistics/BinaryStatistics.java | 5 +- .../column/statistics/BooleanStatistics.java | 5 + .../column/statistics/DoubleStatistics.java | 4 +- .../column/statistics/FloatStatistics.java | 4 +- .../column/statistics/IntStatistics.java | 5 +- .../column/statistics/LongStatistics.java | 5 +- .../parquet/column/statistics/Statistics.java | 13 +- .../org/apache/parquet/schema/OriginalType.java | 60 +++- .../parquet/schema/PrimitiveStringifier.java | 360 +++++++++++++++++++ .../apache/parquet/schema/PrimitiveType.java | 10 +- .../column/statistics/TestStatistics.java | 46 ++- .../schema/TestPrimitiveStringifier.java | 298 +++++++++++++++ .../parquet/tools/command/DumpCommand.java | 34 +- 14 files changed, 791 insertions(+), 98 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java ---------------------------------------------------------------------- diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java b/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java index 04b3901..98bc1e5 100644 --- a/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java +++ b/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java @@ -27,15 +27,12 @@ import org.apache.commons.codec.binary.Hex; import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.column.Encoding; import org.apache.parquet.column.EncodingStats; -import org.apache.parquet.column.statistics.BinaryStatistics; -import org.apache.parquet.column.statistics.BooleanStatistics; import org.apache.parquet.column.statistics.Statistics; import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type; -import java.nio.charset.StandardCharsets; import java.util.Set; import static org.apache.parquet.column.Encoding.BIT_PACKED; @@ -90,46 +87,15 @@ public class Util { if (!stats.hasNonNullValue()) { return ""; } - // TODO: use original types when showing decimal, timestamp, etc. - if (stats instanceof BinaryStatistics) { - byte[] minBytes = stats.getMinBytes(); - byte[] maxBytes = stats.getMaxBytes(); - return String.format("%s / %s", - printable(minBytes, annotation == OriginalType.UTF8, 30), - printable(maxBytes, annotation == OriginalType.UTF8, 30)); - } else { - return String.format("%s / %s", stats.minAsString(), stats.maxAsString()); - } + return String.format("%s / %s", humanReadable(stats.minAsString(), 30), humanReadable(stats.maxAsString(), 30)); } public static String toString(Statistics stats, long count, OriginalType annotation) { if (stats == null) { return "no stats"; } - // TODO: use original types when showing decimal, timestamp, etc. - if (stats instanceof BooleanStatistics) { - return String.format("nulls: %d/%d", stats.getNumNulls(), count); - } else if (stats instanceof BinaryStatistics) { - byte[] minBytes = stats.getMinBytes(); - byte[] maxBytes = stats.getMaxBytes(); - return String.format("min: %s max: %s nulls: %d/%d", - printable(minBytes, annotation == OriginalType.UTF8, 30), - printable(maxBytes, annotation == OriginalType.UTF8, 30), - stats.getNumNulls(), count); - } else { - return String.format("min: %s max: %s nulls: %d/%d", - stats.minAsString(), stats.maxAsString(), stats.getNumNulls(), count); - } - } - - private static String printable(byte[] bytes, boolean isUtf8, int len) { - if (bytes == null) { - return "null"; - } else if (isUtf8) { - return humanReadable(new String(bytes, StandardCharsets.UTF_8), len); - } else { - return humanReadable(bytes, len); - } + return String.format("min: %s max: %s nulls: %d/%d", + humanReadable(stats.minAsString(), 30), humanReadable(stats.maxAsString(), 30), stats.getNumNulls(), count); } public static String humanReadable(String str, int len) { http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java index a68285b..8ffb585 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/BinaryStatistics.java @@ -94,9 +94,8 @@ public class BinaryStatistics extends Statistics { } @Override - String toString(Binary value) { - // TODO: have separate toString for different logical types? - return value == null ? "null" : value.toStringUsingUTF8(); + String stringify(Binary value) { + return stringifier.stringify(value); } @Override http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/column/statistics/BooleanStatistics.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/BooleanStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/BooleanStatistics.java index 0e77b61..917fb5a 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/BooleanStatistics.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/BooleanStatistics.java @@ -88,6 +88,11 @@ public class BooleanStatistics extends Statistics { } @Override + String stringify(Boolean value) { + return stringifier.stringify(value); + } + + @Override public boolean isSmallerThan(long size) { return !hasNonNullValue() || (2 < size); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/column/statistics/DoubleStatistics.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/DoubleStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/DoubleStatistics.java index 0dd067b..fb58263 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/DoubleStatistics.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/DoubleStatistics.java @@ -88,8 +88,8 @@ public class DoubleStatistics extends Statistics { } @Override - String toString(Double value) { - return String.format("%.5f", value); + String stringify(Double value) { + return stringifier.stringify(value); } @Override http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/column/statistics/FloatStatistics.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/FloatStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/FloatStatistics.java index 36836c6..c731dcf 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/FloatStatistics.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/FloatStatistics.java @@ -89,8 +89,8 @@ public class FloatStatistics extends Statistics { } @Override - String toString(Float value) { - return String.format("%.5f", value); + String stringify(Float value) { + return stringifier.stringify(value); } @Override http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/column/statistics/IntStatistics.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/IntStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/IntStatistics.java index 5df7f0a..ef68f69 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/IntStatistics.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/IntStatistics.java @@ -88,9 +88,8 @@ public class IntStatistics extends Statistics { } @Override - String toString(Integer value) { - // TODO: implement unsigned int as required - return value.toString(); + String stringify(Integer value) { + return stringifier.stringify(value); } @Override http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/column/statistics/LongStatistics.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/LongStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/LongStatistics.java index fd6d19c..d112afb 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/LongStatistics.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/LongStatistics.java @@ -88,9 +88,8 @@ public class LongStatistics extends Statistics { } @Override - String toString(Long value) { - // TODO: implement unsigned int as required - return value.toString(); + String stringify(Long value) { + return stringifier.stringify(value); } @Override http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java index 6eb2381..00d0bbf 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/Statistics.java @@ -19,11 +19,10 @@ package org.apache.parquet.column.statistics; import java.util.Arrays; -import java.util.Objects; - import org.apache.parquet.column.UnknownColumnTypeException; import org.apache.parquet.io.api.Binary; import org.apache.parquet.schema.PrimitiveComparator; +import org.apache.parquet.schema.PrimitiveStringifier; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; import org.apache.parquet.schema.Type; @@ -40,10 +39,12 @@ public abstract class Statistics> { private final PrimitiveComparator comparator; private boolean hasNonNullValue; private long num_nulls; + final PrimitiveStringifier stringifier; Statistics(PrimitiveType type) { this.type = type; this.comparator = type.comparator(); + this.stringifier = type.stringifier(); hasNonNullValue = false; num_nulls = 0; } @@ -287,19 +288,17 @@ public abstract class Statistics> { * Returns the string representation of min for debugging/logging purposes. */ public String minAsString() { - return toString(genericGetMin()); + return stringify(genericGetMin()); } /** * Returns the string representation of max for debugging/logging purposes. */ public String maxAsString() { - return toString(genericGetMax()); + return stringify(genericGetMax()); } - String toString(T value) { - return Objects.toString(value); - } + abstract String stringify(T value); /** * Abstract method to return whether the min and max values fit in the given http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java index 77acc54..b00ae7e 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java @@ -21,24 +21,46 @@ package org.apache.parquet.schema; public enum OriginalType { MAP, LIST, - UTF8, + UTF8(PrimitiveStringifier.UTF8_STRINGIFIER), MAP_KEY_VALUE, - ENUM, - DECIMAL, - DATE, - TIME_MILLIS, - TIME_MICROS, - TIMESTAMP_MILLIS, - TIMESTAMP_MICROS, - UINT_8, - UINT_16, - UINT_32, - UINT_64, - INT_8, - INT_16, - INT_32, - INT_64, - JSON, - BSON, - INTERVAL; + ENUM(PrimitiveStringifier.UTF8_STRINGIFIER), + DECIMAL { + @Override + PrimitiveStringifier stringifier(PrimitiveType type) { + return PrimitiveStringifier.createDecimalStringifier(type.getDecimalMetadata().getScale()); + } + }, + DATE(PrimitiveStringifier.DATE_STRINGIFIER), + TIME_MILLIS(PrimitiveStringifier.TIME_STRINGIFIER), + TIME_MICROS(PrimitiveStringifier.TIME_STRINGIFIER), + TIMESTAMP_MILLIS(PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER), + TIMESTAMP_MICROS(PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER), + UINT_8(PrimitiveStringifier.UNSIGNED_STRINGIFIER), + UINT_16(PrimitiveStringifier.UNSIGNED_STRINGIFIER), + UINT_32(PrimitiveStringifier.UNSIGNED_STRINGIFIER), + UINT_64(PrimitiveStringifier.UNSIGNED_STRINGIFIER), + INT_8(PrimitiveStringifier.DEFAULT_STRINGIFIER), + INT_16(PrimitiveStringifier.DEFAULT_STRINGIFIER), + INT_32(PrimitiveStringifier.DEFAULT_STRINGIFIER), + INT_64(PrimitiveStringifier.DEFAULT_STRINGIFIER), + JSON(PrimitiveStringifier.UTF8_STRINGIFIER), + BSON(PrimitiveStringifier.DEFAULT_STRINGIFIER), + INTERVAL(PrimitiveStringifier.INTERVAL_STRINGIFIER); + + private final PrimitiveStringifier stringifier; + + PrimitiveStringifier stringifier(PrimitiveType type) { + if (stringifier == null) { + throw new UnsupportedOperationException("Stringifier is not supported for the original type: " + this); + } + return stringifier; + } + + OriginalType() { + this(null); + } + + OriginalType(PrimitiveStringifier stringifier) { + this.stringifier = stringifier; + } } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java new file mode 100644 index 0000000..c1a9b58 --- /dev/null +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.schema; + +import static java.util.concurrent.TimeUnit.HOURS; +import static java.util.concurrent.TimeUnit.MICROSECONDS; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.SECONDS; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.text.SimpleDateFormat; +import java.util.TimeZone; +import java.util.concurrent.TimeUnit; + +import javax.naming.OperationNotSupportedException; + +import org.apache.parquet.io.api.Binary; + +/** + * Class that provides string representations for the primitive values. These string values are to be used for + * logging/debugging purposes. The method {@code stringify} is overloaded for each primitive types. The overloaded + * methods not implemented for the related types throw {@link OperationNotSupportedException}. + */ +public abstract class PrimitiveStringifier { + private final String name; + + private PrimitiveStringifier(String name) { + this.name = name; + } + + @Override + public final String toString() { + return name; + } + + /** + * @param value + * the value to be stringified + * @return the string representation for {@code value} + * @throws UnsupportedOperationException + * if value type is not supported by this stringifier + */ + public String stringify(boolean value) { + throw new UnsupportedOperationException( + "stringify(boolean) was called on a non-boolean stringifier: " + toString()); + } + + /** + * @param value + * the value to be stringified + * @return the string representation for {@code value} + * @throws UnsupportedOperationException + * if value type is not supported by this stringifier + */ + public String stringify(int value) { + throw new UnsupportedOperationException("stringify(int) was called on a non-int stringifier: " + toString()); + } + + /** + * @param value + * the value to be stringified + * @return the string representation for {@code value} + * @throws UnsupportedOperationException + * if value type is not supported by this stringifier + */ + public String stringify(long value) { + throw new UnsupportedOperationException("stringify(long) was called on a non-long stringifier: " + toString()); + } + + /** + * @param value + * the value to be stringified + * @return the string representation for {@code value} + * @throws UnsupportedOperationException + * if value type is not supported by this stringifier + */ + public String stringify(float value) { + throw new UnsupportedOperationException( + "stringify(float) was called on a non-float stringifier: " + toString()); + } + + /** + * @param value + * the value to be stringified + * @return the string representation for {@code value} + * @throws UnsupportedOperationException + * if value type is not supported by this stringifier + */ + public String stringify(double value) { + throw new UnsupportedOperationException( + "stringify(double) was called on a non-double stringifier: " + toString()); + } + + /** + * @param value + * the value to be stringified + * @return the string representation for {@code value} + * @throws UnsupportedOperationException + * if value type is not supported by this stringifier + */ + public String stringify(Binary value) { + throw new UnsupportedOperationException( + "stringify(Binary) was called on a non-Binary stringifier: " + toString()); + } + + private static final String BINARY_NULL = "null"; + private static final String BINARY_HEXA_PREFIX = "0x"; + private static final String BINARY_INVALID = ""; + + static abstract class BinaryStringifierBase extends PrimitiveStringifier { + private BinaryStringifierBase(String name) { + super(name); + } + + @Override + public final String stringify(Binary value) { + return value == null ? BINARY_NULL : stringifyNotNull(value); + } + + abstract String stringifyNotNull(Binary value); + } + + static final PrimitiveStringifier DEFAULT_STRINGIFIER = new BinaryStringifierBase("DEFAULT_STRINGIFIER") { + private final char[] digits = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + + @Override + public String stringify(boolean value) { + return Boolean.toString(value); + } + + @Override + public String stringify(int value) { + return Integer.toString(value); + } + + @Override + public String stringify(long value) { + return Long.toString(value); + } + + @Override + public String stringify(float value) { + return Float.toString(value); + } + + @Override + public String stringify(double value) { + return Double.toString(value); + } + + @Override + String stringifyNotNull(Binary value) { + ByteBuffer buffer = value.toByteBuffer(); + StringBuilder builder = new StringBuilder(2 + buffer.remaining() * 2); + builder.append(BINARY_HEXA_PREFIX); + for (int i = buffer.position(), n = buffer.limit(); i < n; ++i) { + byte b = buffer.get(i); + builder.append(digits[(b >>> 4) & 0x0F]); + builder.append(digits[b & 0x0F]); + } + return builder.toString(); + } + }; + + static final PrimitiveStringifier UNSIGNED_STRINGIFIER = new PrimitiveStringifier("UNSIGNED_STRINGIFIER") { + private static final long INT_MASK = 0x00000000FFFFFFFFl; + + // Implemented based on com.google.common.primitives.UnsignedInts.toString(int, int) + @Override + public String stringify(int value) { + return Long.toString(value & INT_MASK); + } + + // Implemented based on com.google.common.primitives.UnsignedLongs.toString(long, int) + @Override + public String stringify(long value) { + if (value == 0) { + // Simply return "0" + return "0"; + } else if (value > 0) { + return Long.toString(value); + } else { + char[] buf = new char[64]; + int i = buf.length; + // Split x into high-order and low-order halves. + // Individual digits are generated from the bottom half into which + // bits are moved continuously from the top half. + long top = value >>> 32; + long bot = (value & INT_MASK) + ((top % 10) << 32); + top /= 10; + while ((bot > 0) || (top > 0)) { + buf[--i] = Character.forDigit((int) (bot % 10), 10); + bot = (bot / 10) + ((top % 10) << 32); + top /= 10; + } + // Generate string + return new String(buf, i, buf.length - i); + } + } + }; + + static final PrimitiveStringifier UTF8_STRINGIFIER = new BinaryStringifierBase("UTF8_STRINGIFIER") { + @Override + String stringifyNotNull(Binary value) { + return value.toStringUsingUTF8(); + } + }; + + static final PrimitiveStringifier INTERVAL_STRINGIFIER = new BinaryStringifierBase("INTERVAL_STRINGIFIER") { + @Override + String stringifyNotNull(Binary value) { + if (value.length() != 12) { + return BINARY_INVALID; + } + ByteBuffer buffer = value.toByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + int pos = buffer.position(); + String months = UNSIGNED_STRINGIFIER.stringify(buffer.getInt(pos)); + String days = UNSIGNED_STRINGIFIER.stringify(buffer.getInt(pos + 4)); + String millis = UNSIGNED_STRINGIFIER.stringify(buffer.getInt(pos + 8)); + return "interval(" + months + " months, " + days + " days, " + millis + " millis)"; + } + }; + + private static class DateStringifier extends PrimitiveStringifier { + private final SimpleDateFormat formatter; + private static final TimeZone UTC = TimeZone.getTimeZone("utc"); + + private DateStringifier(String name, String format) { + super(name); + formatter = new SimpleDateFormat(format); + formatter.setTimeZone(UTC); + } + + @Override + public String stringify(int value) { + return toFormattedString(toMillis(value)); + } + + @Override + public String stringify(long value) { + return toFormattedString(toMillis(value)); + } + + private String toFormattedString(long millis) { + return formatter.format(millis); + } + + long toMillis(int value) { + // throw the related unsupported exception + super.stringify(value); + return 0; + } + + long toMillis(long value) { + // throw the related unsupported exception + super.stringify(value); + return 0; + } + } + + static final PrimitiveStringifier DATE_STRINGIFIER = new DateStringifier("DATE_STRINGIFIER", "yyyy-MM-dd") { + @Override + long toMillis(int value) { + return TimeUnit.DAYS.toMillis(value); + }; + }; + + static final PrimitiveStringifier TIMESTAMP_MILLIS_STRINGIFIER = new DateStringifier( + "TIMESTAMP_MILLIS_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSS") { + @Override + long toMillis(long value) { + return value; + } + }; + + static final PrimitiveStringifier TIMESTAMP_MICROS_STRINGIFIER = new DateStringifier( + "TIMESTAMP_MICROS_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSS") { + @Override + public String stringify(long value) { + return super.stringify(value) + String.format("%03d", Math.abs(value % 1000)); + } + + @Override + long toMillis(long value) { + return value / 1000; + } + }; + + static final PrimitiveStringifier TIME_STRINGIFIER = new PrimitiveStringifier("TIME_STRINGIFIER") { + @Override + public String stringify(int millis) { + return toTimeString(millis, MILLISECONDS); + } + + @Override + public String stringify(long micros) { + return toTimeString(micros, MICROSECONDS); + } + + private String toTimeString(long duration, TimeUnit unit) { + String format = "%02d:%02d:%02d.%0" + (unit == MILLISECONDS ? "3d" : "6d"); + return String.format(format, + unit.toHours(duration), + convert(duration, unit, MINUTES, HOURS), + convert(duration, unit, SECONDS, MINUTES), + convert(duration, unit, unit, SECONDS)); + } + + private long convert(long duration, TimeUnit from, TimeUnit to, TimeUnit higher) { + return Math.abs(to.convert(duration, from) % to.convert(1, higher)); + } + }; + + static PrimitiveStringifier createDecimalStringifier(final int scale) { + return new BinaryStringifierBase("DECIMAL_STRINGIFIER(scale: " + scale + ")") { + @Override + public String stringify(int value) { + return stringifyWithScale(BigInteger.valueOf(value)); + } + + @Override + public String stringify(long value) { + return stringifyWithScale(BigInteger.valueOf(value)); + } + + @Override + String stringifyNotNull(Binary value) { + try { + return stringifyWithScale(new BigInteger(value.getBytesUnsafe())); + } catch (NumberFormatException e) { + return BINARY_INVALID; + } + } + + private String stringifyWithScale(BigInteger i) { + return new BigDecimal(i, scale).toString(); + } + }; + } +} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java index 2d7491f..a421173 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java @@ -377,7 +377,6 @@ public final class PrimitiveType extends Type { abstract public T convert(PrimitiveTypeNameConverter converter) throws E; abstract PrimitiveComparator comparator(OriginalType logicalType); - } private final PrimitiveTypeName primitive; @@ -732,4 +731,13 @@ public final class PrimitiveType extends Type { public ColumnOrder columnOrder() { return columnOrder; } + + /** + * @return the {@link Type} specific stringifier for generating the proper string representation of the values. + */ + @SuppressWarnings("unchecked") + public PrimitiveStringifier stringifier() { + OriginalType originalType = getOriginalType(); + return originalType == null ? PrimitiveStringifier.DEFAULT_STRINGIFIER : originalType.stringifier(this); + } } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java b/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java index 476fbb3..8ca1ca6 100644 --- a/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java +++ b/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestStatistics.java @@ -21,10 +21,15 @@ package org.apache.parquet.column.statistics; import static org.junit.Assert.*; import java.nio.ByteBuffer; +import java.util.Locale; import org.junit.Test; import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.OriginalType; +import org.apache.parquet.schema.PrimitiveType; +import org.apache.parquet.schema.Types; +import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; public class TestStatistics { private int[] integerArray; @@ -253,7 +258,7 @@ public class TestStatistics { assertEquals(statsFromBytesMinMax.getMin(), Float.MIN_VALUE, 1e-10); // Test print formatting - assertEquals(stats.toString(), String.format("min: %.5f, max: %.5f, num_nulls: %d", 0.00010, 553.59998, 0)); + assertEquals("min: 1.0E-4, max: 553.6, num_nulls: 0", stats.toString()); } @Test @@ -321,7 +326,25 @@ public class TestStatistics { assertEquals(statsFromBytesMinMax.getMin(), Double.MIN_VALUE, 1e-10); // Test print formatting - assertEquals(stats.toString(), String.format("min: %.5f, max: %.5f, num_nulls: %d", 0.00001, 944.50000, 0)); + assertEquals("min: 1.0E-5, max: 944.5, num_nulls: 0", stats.toString()); + } + + @Test + public void testFloatingPointStringIndependentFromLocale() { + Statistics floatStats = Statistics.createStats(Types.optional(PrimitiveTypeName.FLOAT).named("test-float")); + floatStats.updateStats(123.456f); + Statistics doubleStats = Statistics.createStats(Types.optional(PrimitiveTypeName.DOUBLE).named("test-double")); + doubleStats.updateStats(12345.6789); + + Locale defaultLocale = Locale.getDefault(); + try { + // Set the locale to French where the decimal separator would be ',' instead of '.' + Locale.setDefault(Locale.FRENCH); + assertEquals("min: 123.456, max: 123.456, num_nulls: 0", floatStats.toString()); + assertEquals("min: 12345.6789, max: 12345.6789, num_nulls: 0", doubleStats.toString()); + } finally { + Locale.setDefault(defaultLocale); + } } @Test @@ -376,23 +399,24 @@ public class TestStatistics { public void testBinaryMinMax() { //Test basic max/min stringArray = new String[] {"hello", "world", "this", "is", "a", "test", "of", "the", "stats", "class"}; - BinaryStatistics stats = new BinaryStatistics(); + PrimitiveType type = Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("test_binary_utf8"); + BinaryStatistics stats = (BinaryStatistics) Statistics.createStats(type); for (String s: stringArray) { stats.updateStats(Binary.fromString(s)); } - assertEquals(stats.getMax(), Binary.fromString("world")); - assertEquals(stats.getMin(), Binary.fromString("a")); + assertEquals(stats.genericGetMax(), Binary.fromString("world")); + assertEquals(stats.genericGetMin(), Binary.fromString("a")); // Test empty string stringArray = new String[] {"", "", "", "", ""}; - BinaryStatistics statsEmpty = new BinaryStatistics(); + BinaryStatistics statsEmpty = (BinaryStatistics) Statistics.createStats(type); for (String s: stringArray) { statsEmpty.updateStats(Binary.fromString(s)); } - assertEquals(statsEmpty.getMax(), Binary.fromString("")); - assertEquals(statsEmpty.getMin(), Binary.fromString("")); + assertEquals(statsEmpty.genericGetMax(), Binary.fromString("")); + assertEquals(statsEmpty.genericGetMin(), Binary.fromString("")); // Test converting to and from byte[] byte[] stringMaxBytes = stats.getMaxBytes(); @@ -401,11 +425,11 @@ public class TestStatistics { assertEquals(new String(stringMaxBytes), "world"); assertEquals(new String(stringMinBytes), "a"); - BinaryStatistics statsFromBytes = new BinaryStatistics(); + BinaryStatistics statsFromBytes = (BinaryStatistics) Statistics.createStats(type); statsFromBytes.setMinMaxFromBytes(stringMinBytes, stringMaxBytes); - assertEquals(statsFromBytes.getMax(), Binary.fromString("world")); - assertEquals(statsFromBytes.getMin(), Binary.fromString("a")); + assertEquals(statsFromBytes.genericGetMax(), Binary.fromString("world")); + assertEquals(statsFromBytes.genericGetMin(), Binary.fromString("a")); // Test print formatting assertEquals(stats.toString(), "min: a, max: world, num_nulls: 0"); http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java new file mode 100644 index 0000000..53045cf --- /dev/null +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java @@ -0,0 +1,298 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.schema; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.concurrent.TimeUnit.HOURS; +import static java.util.concurrent.TimeUnit.MICROSECONDS; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.apache.parquet.schema.PrimitiveStringifier.DATE_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.DEFAULT_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.INTERVAL_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIME_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.UNSIGNED_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.UTF8_STRINGIFIER; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Calendar; +import java.util.HashSet; +import java.util.Set; +import java.util.TimeZone; +import java.util.concurrent.TimeUnit; + +import org.apache.parquet.io.api.Binary; +import org.junit.Test; + +public class TestPrimitiveStringifier { + + private static final TimeZone UTC = TimeZone.getTimeZone("UTC"); + + @Test + public void testDefaultStringifier() { + PrimitiveStringifier stringifier = DEFAULT_STRINGIFIER; + + assertEquals("true", stringifier.stringify(true)); + assertEquals("false", stringifier.stringify(false)); + + assertEquals("0.0", stringifier.stringify(0.0)); + assertEquals("123456.7891234567", stringifier.stringify(123456.7891234567)); + assertEquals("-98765.43219876543", stringifier.stringify(-98765.43219876543)); + + assertEquals("0.0", stringifier.stringify(0.0f)); + assertEquals("987.6543", stringifier.stringify(987.6543f)); + assertEquals("-123.4567", stringifier.stringify(-123.4567f)); + + assertEquals("0", stringifier.stringify(0)); + assertEquals("1234567890", stringifier.stringify(1234567890)); + assertEquals("-987654321", stringifier.stringify(-987654321)); + + assertEquals("0", stringifier.stringify(0l)); + assertEquals("1234567890123456789", stringifier.stringify(1234567890123456789l)); + assertEquals("-987654321987654321", stringifier.stringify(-987654321987654321l)); + + assertEquals("null", stringifier.stringify(null)); + assertEquals("0x", stringifier.stringify(Binary.EMPTY)); + assertEquals("0x0123456789ABCDEF", stringifier.stringify(Binary.fromConstantByteArray( + new byte[] { 0x01, 0x23, 0x45, 0x67, (byte) 0x89, (byte) 0xAB, (byte) 0xCD, (byte) 0xEF }))); + } + + @Test + public void testUnsignedStringifier() { + PrimitiveStringifier stringifier = UNSIGNED_STRINGIFIER; + + assertEquals("0", stringifier.stringify(0)); + assertEquals("2147483647", stringifier.stringify(2147483647)); + assertEquals("4294967295", stringifier.stringify(0xFFFFFFFF)); + + assertEquals("0", stringifier.stringify(0l)); + assertEquals("9223372036854775807", stringifier.stringify(9223372036854775807l)); + assertEquals("18446744073709551615", stringifier.stringify(0xFFFFFFFFFFFFFFFFl)); + + checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE); + } + + @Test + public void testUTF8Stringifier() { + PrimitiveStringifier stringifier = UTF8_STRINGIFIER; + + assertEquals("null", stringifier.stringify(null)); + assertEquals("", stringifier.stringify(Binary.EMPTY)); + assertEquals("This is a UTF-8 test", stringifier.stringify(Binary.fromString("This is a UTF-8 test"))); + assertEquals("これはUTF-8のテストです", + stringifier.stringify(Binary.fromConstantByteArray("これはUTF-8のテストです".getBytes(UTF_8)))); + + checkThrowingUnsupportedException(stringifier, Binary.class); + } + + @Test + public void testIntervalStringifier() { + PrimitiveStringifier stringifier = INTERVAL_STRINGIFIER; + + assertEquals("null", stringifier.stringify(null)); + + assertEquals("", stringifier.stringify(Binary.EMPTY)); + assertEquals("", + stringifier.stringify(Binary.fromConstantByteArray(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }))); + assertEquals("", + stringifier.stringify(Binary.fromReusedByteArray(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }))); + + ByteBuffer buffer = ByteBuffer.allocate(12); + assertEquals("interval(0 months, 0 days, 0 millis)", + stringifier.stringify(Binary.fromConstantByteBuffer(buffer))); + + buffer.putInt(0x03000000); + buffer.putInt(0x06000000); + buffer.putInt(0x09000000); + buffer.flip(); + assertEquals("interval(3 months, 6 days, 9 millis)", + stringifier.stringify(Binary.fromConstantByteBuffer(buffer))); + + buffer.clear(); + buffer.putInt(0xFFFFFFFF); + buffer.putInt(0xFEFFFFFF); + buffer.putInt(0xFDFFFFFF); + buffer.flip(); + assertEquals("interval(4294967295 months, 4294967294 days, 4294967293 millis)", + stringifier.stringify(Binary.fromReusedByteBuffer(buffer))); + + checkThrowingUnsupportedException(stringifier, Binary.class); + } + + @Test + public void testDateStringifier() { + PrimitiveStringifier stringifier = DATE_STRINGIFIER; + + assertEquals("1970-01-01", stringifier.stringify(0)); + + Calendar cal = Calendar.getInstance(UTC); + cal.clear(); + cal.set(2017, Calendar.DECEMBER, 14); + assertEquals("2017-12-14", stringifier.stringify((int) MILLISECONDS.toDays(cal.getTimeInMillis()))); + + cal.clear(); + cal.set(1492, Calendar.AUGUST, 3); + assertEquals("1492-08-03", stringifier.stringify((int) MILLISECONDS.toDays(cal.getTimeInMillis()))); + + checkThrowingUnsupportedException(stringifier, Integer.TYPE); + } + + @Test + public void testTimestampMillisStringifier() { + PrimitiveStringifier stringifier = PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER; + + assertEquals("1970-01-01T00:00:00.000", stringifier.stringify(0l)); + + Calendar cal = Calendar.getInstance(UTC); + cal.clear(); + cal.set(2017, Calendar.DECEMBER, 15, 10, 9, 54); + cal.set(Calendar.MILLISECOND, 120); + assertEquals("2017-12-15T10:09:54.120", stringifier.stringify(cal.getTimeInMillis())); + + cal.clear(); + cal.set(1948, Calendar.NOVEMBER, 23, 20, 19, 1); + cal.set(Calendar.MILLISECOND, 9); + assertEquals("1948-11-23T20:19:01.009", stringifier.stringify(cal.getTimeInMillis())); + + checkThrowingUnsupportedException(stringifier, Long.TYPE); + } + + @Test + public void testTimestampMicrosStringifier() { + PrimitiveStringifier stringifier = PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER; + + assertEquals("1970-01-01T00:00:00.000000", stringifier.stringify(0l)); + + Calendar cal = Calendar.getInstance(UTC); + cal.clear(); + cal.set(2053, Calendar.JULY, 10, 22, 13, 24); + cal.set(Calendar.MILLISECOND, 84); + long micros = cal.getTimeInMillis() * 1000 + 900; + assertEquals("2053-07-10T22:13:24.084900", stringifier.stringify(micros)); + + cal.clear(); + cal.set(1848, Calendar.MARCH, 15, 9, 23, 59); + cal.set(Calendar.MILLISECOND, 765); + micros = cal.getTimeInMillis() * 1000 - 1; + assertEquals("1848-03-15T09:23:59.765001", stringifier.stringify(micros)); + + checkThrowingUnsupportedException(stringifier, Long.TYPE); + } + + @Test + public void testTimeStringifier() { + PrimitiveStringifier stringifier = TIME_STRINGIFIER; + + assertEquals("00:00:00.000", stringifier.stringify(0)); + assertEquals("00:00:00.000000", stringifier.stringify(0l)); + + assertEquals("12:34:56.789", stringifier.stringify((int) convert(MILLISECONDS, 12, 34, 56, 789))); + assertEquals("12:34:56.789012", stringifier.stringify(convert(MICROSECONDS, 12, 34, 56, 789012))); + + assertEquals("-12:34:56.789", stringifier.stringify((int) convert(MILLISECONDS, -12, -34, -56, -789))); + assertEquals("-12:34:56.789012", stringifier.stringify(convert(MICROSECONDS, -12, -34, -56, -789012))); + + assertEquals("123:12:34.567", stringifier.stringify((int) convert(MILLISECONDS, 123, 12, 34, 567))); + assertEquals("12345:12:34.056789", stringifier.stringify(convert(MICROSECONDS, 12345, 12, 34, 56789))); + + assertEquals("-123:12:34.567", stringifier.stringify((int) convert(MILLISECONDS, -123, -12, -34, -567))); + assertEquals("-12345:12:34.056789", stringifier.stringify(convert(MICROSECONDS, -12345, -12, -34, -56789))); + + checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE); + } + + private long convert(TimeUnit unit, long hours, long minutes, long seconds, long rest) { + return unit.convert(hours, HOURS) + unit.convert(minutes, MINUTES) + unit.convert(seconds, SECONDS) + rest; + } + + @Test + public void testDecimalStringifier() { + PrimitiveStringifier stringifier = PrimitiveStringifier.createDecimalStringifier(4); + + assertEquals("0.0000", stringifier.stringify(0)); + assertEquals("123456.7890", stringifier.stringify(1234567890)); + assertEquals("-98765.4321", stringifier.stringify(-987654321)); + + assertEquals("0.0000", stringifier.stringify(0l)); + assertEquals("123456789012345.6789", stringifier.stringify(1234567890123456789l)); + assertEquals("-98765432109876.5432", stringifier.stringify(-987654321098765432l)); + + assertEquals("null", stringifier.stringify(null)); + assertEquals("", stringifier.stringify(Binary.EMPTY)); + assertEquals("0.0000", stringifier.stringify(Binary.fromReusedByteArray(new byte[] { 0 }))); + assertEquals("9876543210987654321098765432109876543210987654.3210", stringifier.stringify(Binary + .fromConstantByteArray(new BigInteger("98765432109876543210987654321098765432109876543210").toByteArray()))); + assertEquals("-1234567890123456789012345678901234567890123456.7890", stringifier.stringify(Binary + .fromConstantByteArray(new BigInteger("-12345678901234567890123456789012345678901234567890").toByteArray()))); + + checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE, Binary.class); + } + + private void checkThrowingUnsupportedException(PrimitiveStringifier stringifier, Class... excludes) { + Set> set = new HashSet<>(Arrays.asList(excludes)); + if (!set.contains(Integer.TYPE)) { + try { + stringifier.stringify(0); + fail("An UnsupportedOperationException should have been thrown"); + } catch (UnsupportedOperationException e) { + } + } + if (!set.contains(Long.TYPE)) { + try { + stringifier.stringify(0l); + fail("An UnsupportedOperationException should have been thrown"); + } catch (UnsupportedOperationException e) { + } + } + if (!set.contains(Float.TYPE)) { + try { + stringifier.stringify(0.0f); + fail("An UnsupportedOperationException should have been thrown"); + } catch (UnsupportedOperationException e) { + } + } + if (!set.contains(Double.TYPE)) { + try { + stringifier.stringify(0.0); + fail("An UnsupportedOperationException should have been thrown"); + } catch (UnsupportedOperationException e) { + } + } + if (!set.contains(Boolean.TYPE)) { + try { + stringifier.stringify(false); + fail("An UnsupportedOperationException should have been thrown"); + } catch (UnsupportedOperationException e) { + } + } + if (!set.contains(Binary.class)) { + try { + stringifier.stringify(Binary.EMPTY); + fail("An UnsupportedOperationException should have been thrown"); + } catch (UnsupportedOperationException e) { + } + } + } + +} http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/89aeec02/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java ---------------------------------------------------------------------- diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java index 6cb12fa..26b5562 100644 --- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java +++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java @@ -57,6 +57,7 @@ import org.apache.parquet.io.api.Converter; import org.apache.parquet.io.api.GroupConverter; import org.apache.parquet.io.api.PrimitiveConverter; import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.PrimitiveStringifier; import org.apache.parquet.tools.util.MetadataUtils; import org.apache.parquet.tools.util.PrettyPrintWriter; import org.apache.parquet.tools.util.PrettyPrintWriter.WhiteSpaceHandler; @@ -309,16 +310,29 @@ public class DumpCommand extends ArgsOnlyCommand { out.format("value %d: R:%d D:%d V:", offset+i, rlvl, dlvl); if (dlvl == dmax) { - switch (column.getType()) { - case BINARY: out.format("%s", binaryToString(creader.getBinary())); break; - case BOOLEAN: out.format("%s", creader.getBoolean()); break; - case DOUBLE: out.format("%s", creader.getDouble()); break; - case FLOAT: out.format("%s", creader.getFloat()); break; - case INT32: out.format("%s", creader.getInteger()); break; - case INT64: out.format("%s", creader.getLong()); break; - case INT96: out.format("%s", binaryToBigInteger(creader.getBinary())); break; - case FIXED_LEN_BYTE_ARRAY: out.format("%s", binaryToString(creader.getBinary())); break; - } + PrimitiveStringifier stringifier = column.getPrimitiveType().stringifier(); + switch (column.getType()) { + case FIXED_LEN_BYTE_ARRAY: + case INT96: + case BINARY: + out.print(stringifier.stringify(creader.getBinary())); + break; + case BOOLEAN: + out.print(stringifier.stringify(creader.getBoolean())); + break; + case DOUBLE: + out.print(stringifier.stringify(creader.getDouble())); + break; + case FLOAT: + out.print(stringifier.stringify(creader.getFloat())); + break; + case INT32: + out.print(stringifier.stringify(creader.getInteger())); + break; + case INT64: + out.print(stringifier.stringify(creader.getLong())); + break; + } } else { out.format(""); }