parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jul...@apache.org
Subject parquet-mr git commit: PARQUET-430: Change to use Locale parameterized version of String.toUpperCase()/toLowerCase
Date Tue, 16 Feb 2016 00:35:39 GMT
Repository: parquet-mr
Updated Branches:
  refs/heads/master c26fa7881 -> 6c9ca4d4c


PARQUET-430: Change to use Locale parameterized version of String.toUpperCase()/toLowerCase

A String is being converted to upper or lowercase, using the platform's default encoding.
This may result in improper conversions when used with international characters.

For instance, "TITLE".toLowerCase() in a Turkish locale returns "tıtle", where 'ı' -- without
a dot -- is the LATIN SMALL LETTER DOTLESS I character. To obtain correct results for locale
insensitive strings, we'd better use toLowerCase(Locale.ENGLISH).

For more information on this, please see:
- http://stackoverflow.com/questions/11063102/using-locales-with-javas-tolowercase-and-touppercase
- http://lotusnotus.com/lotusnotus_en.nsf/dx/dotless-i-tolowercase-and-touppercase-functions-use-responsibly.htm
- http://java.sys-con.com/node/46241

This PR changes our use of String.toUpperCase()/toLowerCase() to String.toUpperCase(Locale.*ENGLISH*)/toLowerCase(*Locale.ENGLISH*)

Author: proflin <proflin.me@gmail.com>

Closes #312 from proflin/PARQUET-430 and squashes the following commits:

ed55822 [proflin] PARQUET-430


Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/6c9ca4d4
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/6c9ca4d4
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/6c9ca4d4

Branch: refs/heads/master
Commit: 6c9ca4d4c0de4dff29b79f28ac5c51b4f6fed0da
Parents: c26fa78
Author: proflin <proflin.me@gmail.com>
Authored: Mon Feb 15 16:35:33 2016 -0800
Committer: Julien Le Dem <julien@dremio.com>
Committed: Mon Feb 15 16:35:33 2016 -0800

----------------------------------------------------------------------
 .../org/apache/parquet/filter2/predicate/Operators.java     | 9 +++++----
 .../src/main/java/org/apache/parquet/schema/GroupType.java  | 3 ++-
 .../java/org/apache/parquet/schema/MessageTypeParser.java   | 5 +++--
 .../main/java/org/apache/parquet/schema/PrimitiveType.java  | 3 ++-
 .../parquet/hadoop/metadata/CompressionCodecName.java       | 4 +++-
 5 files changed, 15 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6c9ca4d4/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java
b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java
index 32b4430..eca0f67 100644
--- a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java
+++ b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/Operators.java
@@ -19,6 +19,7 @@
 package org.apache.parquet.filter2.predicate;
 
 import java.io.Serializable;
+import java.util.Locale;
 
 import org.apache.parquet.hadoop.metadata.ColumnPath;
 import org.apache.parquet.io.api.Binary;
@@ -129,7 +130,7 @@ public final class Operators {
       // null in their own constructors.
       this.value = value;
 
-      String name = getClass().getSimpleName().toLowerCase();
+      String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH);
       this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + value +
")";
     }
 
@@ -258,7 +259,7 @@ public final class Operators {
     protected BinaryLogicalFilterPredicate(FilterPredicate left, FilterPredicate right) {
       this.left = checkNotNull(left, "left");
       this.right = checkNotNull(right, "right");
-      String name = getClass().getSimpleName().toLowerCase();
+      String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH);
       this.toString = name + "(" + left + ", " + right + ")";
     }
 
@@ -386,7 +387,7 @@ public final class Operators {
     UserDefinedByClass(Column<T> column, Class<U> udpClass) {
       super(column);
       this.udpClass = checkNotNull(udpClass, "udpClass");
-      String name = getClass().getSimpleName().toLowerCase();
+      String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH);
       this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + udpClass.getName()
+ ")";
 
       // defensively try to instantiate the class early to make sure that it's possible
@@ -442,7 +443,7 @@ public final class Operators {
     UserDefinedByInstance(Column<T> column, U udpInstance) {
       super(column);
       this.udpInstance = checkNotNull(udpInstance, "udpInstance");
-      String name = getClass().getSimpleName().toLowerCase();
+      String name = getClass().getSimpleName().toLowerCase(Locale.ENGLISH);
       this.toString = name + "(" + column.getColumnPath().toDotString() + ", " + udpInstance
+ ")";
     }
 

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6c9ca4d4/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java
index 027fbc0..f8404a1 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java
@@ -24,6 +24,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 
 import org.apache.parquet.io.InvalidRecordException;
@@ -208,7 +209,7 @@ public class GroupType extends Type {
   @Override
   public void writeToStringBuilder(StringBuilder sb, String indent) {
     sb.append(indent)
-        .append(getRepetition().name().toLowerCase())
+        .append(getRepetition().name().toLowerCase(Locale.ENGLISH))
         .append(" group ")
         .append(getName())
         .append(getOriginalType() == null ? "" : " (" + getOriginalType() +")")

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6c9ca4d4/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
index 4981398..b7274c2 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/MessageTypeParser.java
@@ -19,6 +19,7 @@
 package org.apache.parquet.schema;
 
 import java.util.Arrays;
+import java.util.Locale;
 import java.util.StringTokenizer;
 
 import org.apache.parquet.Log;
@@ -195,7 +196,7 @@ public class MessageTypeParser {
 
   private static PrimitiveTypeName asPrimitive(String t, Tokenizer st) {
     try {
-      return PrimitiveTypeName.valueOf(t.toUpperCase());
+      return PrimitiveTypeName.valueOf(t.toUpperCase(Locale.ENGLISH));
     } catch (IllegalArgumentException e) {
       throw new IllegalArgumentException("expected one of " + Arrays.toString(PrimitiveTypeName.values())
 +" got " + t + " at " + st.getLocationString(), e);
     }
@@ -203,7 +204,7 @@ public class MessageTypeParser {
 
   private static Repetition asRepetition(String t, Tokenizer st) {
     try {
-      return Repetition.valueOf(t.toUpperCase());
+      return Repetition.valueOf(t.toUpperCase(Locale.ENGLISH));
     } catch (IllegalArgumentException e) {
       throw new IllegalArgumentException("expected one of " + Arrays.toString(Repetition.values())
 +" got " + t + " at " + st.getLocationString(), e);
     }

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6c9ca4d4/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
index 1cdc6c3..8056188 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
@@ -20,6 +20,7 @@ package org.apache.parquet.schema;
 
 import java.util.Arrays;
 import java.util.List;
+import java.util.Locale;
 
 import org.apache.parquet.column.ColumnReader;
 import org.apache.parquet.io.InvalidRecordException;
@@ -394,7 +395,7 @@ public final class PrimitiveType extends Type {
   @Override
   public void writeToStringBuilder(StringBuilder sb, String indent) {
     sb.append(indent)
-        .append(getRepetition().name().toLowerCase())
+        .append(getRepetition().name().toLowerCase(Locale.ENGLISH))
         .append(" ")
         .append(primitive.name().toLowerCase());
     if (primitive == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {

http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/6c9ca4d4/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java
index 558bea7..d03d280 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/CompressionCodecName.java
@@ -21,6 +21,8 @@ package org.apache.parquet.hadoop.metadata;
 import org.apache.parquet.format.CompressionCodec;
 import org.apache.parquet.hadoop.codec.CompressionCodecNotSupportedException;
 
+import java.util.Locale;
+
 public enum CompressionCodecName {
   UNCOMPRESSED(null, CompressionCodec.UNCOMPRESSED, ""),
   SNAPPY("org.apache.parquet.hadoop.codec.SnappyCodec", CompressionCodec.SNAPPY, ".snappy"),
@@ -31,7 +33,7 @@ public enum CompressionCodecName {
      if (name == null) {
        return UNCOMPRESSED;
      }
-     return valueOf(name.toUpperCase());
+     return valueOf(name.toUpperCase(Locale.ENGLISH));
   }
 
   public static CompressionCodecName fromCompressionCodec(Class<?> clazz) {


Mime
View raw message