Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 885EC200C0F for ; Thu, 19 Jan 2017 02:27:15 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 871CF160B44; Thu, 19 Jan 2017 01:27:15 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 5DDF5160B5D for ; Thu, 19 Jan 2017 02:27:14 +0100 (CET) Received: (qmail 52858 invoked by uid 500); 19 Jan 2017 01:27:13 -0000 Mailing-List: contact commits-help@parquet.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@parquet.apache.org Delivered-To: mailing list commits@parquet.apache.org Received: (qmail 52542 invoked by uid 99); 19 Jan 2017 01:27:13 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 19 Jan 2017 01:27:13 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id DB575F4039; Thu, 19 Jan 2017 01:27:12 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: blue@apache.org To: commits@parquet.apache.org Date: Thu, 19 Jan 2017 01:27:22 -0000 Message-Id: <581a54624b6f4cf8930fdc8782d9e979@git.apache.org> In-Reply-To: <25406da3dfe343a9a44d6bc62fd223d9@git.apache.org> References: <25406da3dfe343a9a44d6bc62fd223d9@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [11/50] [abbrv] parquet-mr git commit: PARQUET-430: Change to use Locale parameterized version of String.toUpperCase()/toLowerCase archived-at: Thu, 19 Jan 2017 01:27:15 -0000 PARQUET-430: Change to use Locale parameterized version of String.toUpperCase()/toLowerCase A String is being converted to upper or lowercase, using the platform's default encoding. This may result in improper conversions when used with international characters. For instance, "TITLE".toLowerCase() in a Turkish locale returns "tıtle", where 'ı' -- without a dot -- is the LATIN SMALL LETTER DOTLESS I character. To obtain correct results for locale insensitive strings, we'd better use toLowerCase(Locale.ENGLISH). For more information on this, please see: - http://stackoverflow.com/questions/11063102/using-locales-with-javas-tolowercase-and-touppercase - http://lotusnotus.com/lotusnotus_en.nsf/dx/dotless-i-tolowercase-and-touppercase-functions-use-responsibly.htm - http://java.sys-con.com/node/46241 This PR changes our use of String.toUpperCase()/toLowerCase() to String.toUpperCase(Locale.*ENGLISH*)/toLowerCase(*Locale.ENGLISH*) Author: proflin Closes #312 from proflin/PARQUET-430 and squashes the following commits: ed55822 [proflin] PARQUET-430 Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/60bbf8e7 Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/60bbf8e7 Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/60bbf8e7 Branch: refs/heads/parquet-1.8.x Commit: 60bbf8e7f488f85c5df033fa167960dd3817d1ed Parents: 7ec373d Author: proflin Authored: Mon Feb 15 16:35:33 2016 -0800 Committer: Ryan Blue Committed: Mon Jan 9 16:54:53 2017 -0800 ---------------------------------------------------------------------- .../org/apache/parquet/filter2/predicate/Operators.java | 9 +++++---- .../src/main/java/org/apache/parquet/schema/GroupType.java | 3 ++- .../java/org/apache/parquet/schema/MessageTypeParser.java | 5 +++-- .../main/java/org/apache/parquet/schema/PrimitiveType.java | 3 ++- .../parquet/hadoop/metadata/CompressionCodecName.java | 4 +++- 5 files changed, 15 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/60bbf8e7/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java index 32b4430..eca0f67 100644 --- a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java +++ b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java @@ -19,6 +19,7 @@ package org.apache.parquet.filter2.predicate; import java.io.Serializable; +import java.util.Locale; import org.apache.parquet.hadoop.metadata.ColumnPath; import org.apache.parquet.io.api.Binary; @@ -129,7 +130,7 @@ public final class Operators { // null in their own constructors. this.value = value; - String name = getClass().getSimpleName().toLowerCase(); + String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + value + ")"; } @@ -258,7 +259,7 @@ public final class Operators { protected BinaryLogicalFilterPredicate(FilterPredicate left, FilterPredicate right) { this.left = checkNotNull(left, "left"); this.right = checkNotNull(right, "right"); - String name = getClass().getSimpleName().toLowerCase(); + String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); this.toString = name + "(" + left + ", " + right + ")"; } @@ -386,7 +387,7 @@ public final class Operators { UserDefinedByClass(Column column, Class udpClass) { super(column); this.udpClass = checkNotNull(udpClass, "udpClass"); - String name = getClass().getSimpleName().toLowerCase(); + String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + udpClass.getName() + ")"; // defensively try to instantiate the class early to make sure that it's possible @@ -442,7 +443,7 @@ public final class Operators { UserDefinedByInstance(Column column, U udpInstance) { super(column); this.udpInstance = checkNotNull(udpInstance, "udpInstance"); - String name = getClass().getSimpleName().toLowerCase(); + String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH); this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + udpInstance + ")"; } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/60bbf8e7/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java index 027fbc0..f8404a1 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import org.apache.parquet.io.InvalidRecordException; @@ -208,7 +209,7 @@ public class GroupType extends Type { @Override public void writeToStringBuilder(StringBuilder sb, String indent) { sb.append(indent) - .append(getRepetition().name().toLowerCase()) + .append(getRepetition().name().toLowerCase(Locale.ENGLISH)) .append(" group ") .append(getName()) .append(getOriginalType() == null ? "" : " (" + getOriginalType() +")") http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/60bbf8e7/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java index 4981398..b7274c2 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java @@ -19,6 +19,7 @@ package org.apache.parquet.schema; import java.util.Arrays; +import java.util.Locale; import java.util.StringTokenizer; import org.apache.parquet.Log; @@ -195,7 +196,7 @@ public class MessageTypeParser { private static PrimitiveTypeName asPrimitive(String t, Tokenizer st) { try { - return PrimitiveTypeName.valueOf(t.toUpperCase()); + return PrimitiveTypeName.valueOf(t.toUpperCase(Locale.ENGLISH)); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("expected one of " + Arrays.toString(PrimitiveTypeName.values()) +" got " + t + " at " + st.getLocationString(), e); } @@ -203,7 +204,7 @@ public class MessageTypeParser { private static Repetition asRepetition(String t, Tokenizer st) { try { - return Repetition.valueOf(t.toUpperCase()); + return Repetition.valueOf(t.toUpperCase(Locale.ENGLISH)); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("expected one of " + Arrays.toString(Repetition.values()) +" got " + t + " at " + st.getLocationString(), e); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/60bbf8e7/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java index 7988f4a..3dfffc6 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java @@ -20,6 +20,7 @@ package org.apache.parquet.schema; import java.util.Arrays; import java.util.List; +import java.util.Locale; import org.apache.parquet.column.ColumnReader; import org.apache.parquet.io.InvalidRecordException; @@ -395,7 +396,7 @@ public final class PrimitiveType extends Type { @Override public void writeToStringBuilder(StringBuilder sb, String indent) { sb.append(indent) - .append(getRepetition().name().toLowerCase()) + .append(getRepetition().name().toLowerCase(Locale.ENGLISH)) .append(" ") .append(primitive.name().toLowerCase()); if (primitive == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) { http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/60bbf8e7/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java index 558bea7..d03d280 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java @@ -21,6 +21,8 @@ package org.apache.parquet.hadoop.metadata; import org.apache.parquet.format.CompressionCodec; import org.apache.parquet.hadoop.codec.CompressionCodecNotSupportedException; +import java.util.Locale; + public enum CompressionCodecName { UNCOMPRESSED(null, CompressionCodec.UNCOMPRESSED, ""), SNAPPY("org.apache.parquet.hadoop.codec.SnappyCodec", CompressionCodec.SNAPPY, ".snappy"), @@ -31,7 +33,7 @@ public enum CompressionCodecName { if (name == null) { return UNCOMPRESSED; } - return valueOf(name.toUpperCase()); + return valueOf(name.toUpperCase(Locale.ENGLISH)); } public static CompressionCodecName fromCompressionCodec(Class clazz) {