Repository: parquet-mr
Updated Branches:
refs/heads/master 412ab9669 -> 46448e934
PARQUET-201: Fix ValidTypeMap being overly strict with respect to OriginalTypes
Author: Alex Levenson <alexlevenson@twitter.com>
Closes #219 from isnotinvain/alexlevenson/PARQUET-201 and squashes the following commits:
1cd8b58 [Alex Levenson] Merge branch 'master' into alexlevenson/PARQUET-201
1d83e13 [Alex Levenson] Fix ValidTypeMap being overly strict with respect to OriginalTypes
Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/46448e93
Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/46448e93
Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/46448e93
Branch: refs/heads/master
Commit: 46448e934250705b6ebd6f21caa09698d611dbfd
Parents: 412ab96
Author: Alex Levenson <alexlevenson@twitter.com>
Authored: Wed Jun 24 13:58:04 2015 -0700
Committer: Alex Levenson <alexlevenson@twitter.com>
Committed: Wed Jun 24 13:58:04 2015 -0700
----------------------------------------------------------------------
.../predicate/PrimitiveToBoxedClass.java | 50 ++++++++++
.../predicate/SchemaCompatibilityValidator.java | 13 +--
.../parquet/filter2/predicate/ValidTypeMap.java | 96 +++++---------------
.../TestSchemaCompatibilityValidator.java | 2 +-
.../filter2/predicate/TestValidTypeMap.java | 34 +++----
5 files changed, 87 insertions(+), 108 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/46448e93/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/PrimitiveToBoxedClass.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/PrimitiveToBoxedClass.java
b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/PrimitiveToBoxedClass.java
new file mode 100644
index 0000000..f034f37
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/PrimitiveToBoxedClass.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.filter2.predicate;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static org.apache.parquet.Preconditions.checkArgument;
+
+/**
+ * Converts a {@code Class<primitive>} to it's corresponding {@code Class<Boxed>},
eg
+ * {@code Class<int>} to {@code Class<Integer>}
+ */
+public class PrimitiveToBoxedClass {
+ private static final Map<Class<?>, Class<?>> primitiveToBoxed = new HashMap<Class<?>,
Class<?>>();
+
+ static {
+ primitiveToBoxed.put(boolean.class, Boolean.class);
+ primitiveToBoxed.put(byte.class, Byte.class);
+ primitiveToBoxed.put(short.class, Short.class);
+ primitiveToBoxed.put(char.class, Character.class);
+ primitiveToBoxed.put(int.class, Integer.class);
+ primitiveToBoxed.put(long.class, Long.class);
+ primitiveToBoxed.put(float.class, Float.class);
+ primitiveToBoxed.put(double.class, Double.class);
+ }
+
+ public static Class<?> get(Class<?> c) {
+ checkArgument(c.isPrimitive(), "Class " + c + " is not primitive!");
+ return primitiveToBoxed.get(c);
+ }
+
+ private PrimitiveToBoxedClass() { }
+}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/46448e93/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/SchemaCompatibilityValidator.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/SchemaCompatibilityValidator.java
b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/SchemaCompatibilityValidator.java
index d473841..d4e1211 100644
--- a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/SchemaCompatibilityValidator.java
+++ b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/SchemaCompatibilityValidator.java
@@ -22,7 +22,6 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.parquet.filter2.predicate.Operators.And;
import org.apache.parquet.filter2.predicate.Operators.Column;
import org.apache.parquet.filter2.predicate.Operators.ColumnFilterPredicate;
@@ -36,8 +35,8 @@ import org.apache.parquet.filter2.predicate.Operators.Not;
import org.apache.parquet.filter2.predicate.Operators.NotEq;
import org.apache.parquet.filter2.predicate.Operators.Or;
import org.apache.parquet.filter2.predicate.Operators.UserDefined;
+import org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
import static org.apache.parquet.Preconditions.checkArgument;
import static org.apache.parquet.Preconditions.checkNotNull;
@@ -73,19 +72,11 @@ public class SchemaCompatibilityValidator implements FilterPredicate.Visitor<Voi
// we are validating that what the user provided agrees with these.
private final Map<ColumnPath, ColumnDescriptor> columnsAccordingToSchema = new HashMap<ColumnPath,
ColumnDescriptor>();
- // the original type of a column, keyed by path
- private final Map<ColumnPath, OriginalType> originalTypes = new HashMap<ColumnPath,
OriginalType>();
-
private SchemaCompatibilityValidator(MessageType schema) {
for (ColumnDescriptor cd : schema.getColumns()) {
ColumnPath columnPath = ColumnPath.get(cd.getPath());
columnsAccordingToSchema.put(columnPath, cd);
-
- OriginalType ot = schema.getType(cd.getPath()).getOriginalType();
- if (ot != null) {
- originalTypes.put(columnPath, ot);
- }
}
}
@@ -182,7 +173,7 @@ public class SchemaCompatibilityValidator implements FilterPredicate.Visitor<Voi
+ "Column " + path.toDotString() + " is repeated.");
}
- ValidTypeMap.assertTypeValid(column, descriptor.getType(), originalTypes.get(path));
+ ValidTypeMap.assertTypeValid(column, descriptor.getType());
}
private ColumnDescriptor getColumnDescriptor(ColumnPath columnPath) {
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/46448e93/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/ValidTypeMap.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/ValidTypeMap.java
b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/ValidTypeMap.java
index 4f8b10d..574604a 100644
--- a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/ValidTypeMap.java
+++ b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/ValidTypeMap.java
@@ -23,9 +23,8 @@ import java.util.HashSet;
import java.util.Map;
import java.util.Set;
-import org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.parquet.filter2.predicate.Operators.Column;
-import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
@@ -43,40 +42,36 @@ public class ValidTypeMap {
private ValidTypeMap() { }
// classToParquetType and parquetTypeToClass are used as a bi-directional map
- private static final Map<Class<?>, Set<FullTypeDescriptor>> classToParquetType
= new HashMap<Class<?>, Set<FullTypeDescriptor>>();
- private static final Map<FullTypeDescriptor, Set<Class<?>>> parquetTypeToClass
= new HashMap<FullTypeDescriptor, Set<Class<?>>>();
+ private static final Map<Class<?>, Set<PrimitiveTypeName>> classToParquetType
= new HashMap<Class<?>, Set<PrimitiveTypeName>>();
+ private static final Map<PrimitiveTypeName, Set<Class<?>>> parquetTypeToClass
= new HashMap<PrimitiveTypeName, Set<Class<?>>>();
// set up the mapping in both directions
- private static void add(Class<?> c, FullTypeDescriptor f) {
- Set<FullTypeDescriptor> descriptors = classToParquetType.get(c);
+ private static void add(Class<?> c, PrimitiveTypeName p) {
+ Set<PrimitiveTypeName> descriptors = classToParquetType.get(c);
if (descriptors == null) {
- descriptors = new HashSet<FullTypeDescriptor>();
+ descriptors = new HashSet<PrimitiveTypeName>();
classToParquetType.put(c, descriptors);
}
- descriptors.add(f);
+ descriptors.add(p);
- Set<Class<?>> classes = parquetTypeToClass.get(f);
+ Set<Class<?>> classes = parquetTypeToClass.get(p);
if (classes == null) {
classes = new HashSet<Class<?>>();
- parquetTypeToClass.put(f, classes);
+ parquetTypeToClass.put(p, classes);
}
classes.add(c);
}
static {
- // basic primitive columns
- add(Integer.class, new FullTypeDescriptor(PrimitiveTypeName.INT32, null));
- add(Long.class, new FullTypeDescriptor(PrimitiveTypeName.INT64, null));
- add(Float.class, new FullTypeDescriptor(PrimitiveTypeName.FLOAT, null));
- add(Double.class, new FullTypeDescriptor(PrimitiveTypeName.DOUBLE, null));
- add(Boolean.class, new FullTypeDescriptor(PrimitiveTypeName.BOOLEAN, null));
+ for (PrimitiveTypeName t: PrimitiveTypeName.values()) {
+ Class<?> c = t.javaType;
- // Both of these binary types are valid
- add(Binary.class, new FullTypeDescriptor(PrimitiveTypeName.BINARY, null));
- add(Binary.class, new FullTypeDescriptor(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, null));
+ if (c.isPrimitive()) {
+ c = PrimitiveToBoxedClass.get(c);
+ }
- add(Binary.class, new FullTypeDescriptor(PrimitiveTypeName.BINARY, OriginalType.UTF8));
- add(Binary.class, new FullTypeDescriptor(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, OriginalType.UTF8));
+ add(c, t);
+ }
}
/**
@@ -87,14 +82,12 @@ public class ValidTypeMap {
*
* @param foundColumn the column as declared by the user
* @param primitiveType the primitive type according to the schema
- * @param originalType the original type according to the schema
*/
- public static <T extends Comparable<T>> void assertTypeValid(Column<T>
foundColumn, PrimitiveTypeName primitiveType, OriginalType originalType) {
+ public static <T extends Comparable<T>> void assertTypeValid(Column<T>
foundColumn, PrimitiveTypeName primitiveType) {
Class<T> foundColumnType = foundColumn.getColumnType();
ColumnPath columnPath = foundColumn.getColumnPath();
- Set<FullTypeDescriptor> validTypeDescriptors = classToParquetType.get(foundColumnType);
- FullTypeDescriptor typeInFileMetaData = new FullTypeDescriptor(primitiveType, originalType);
+ Set<PrimitiveTypeName> validTypeDescriptors = classToParquetType.get(foundColumnType);
if (validTypeDescriptors == null) {
StringBuilder message = new StringBuilder();
@@ -105,18 +98,18 @@ public class ValidTypeMap {
.append(foundColumnType.getName())
.append(" which is not supported in FilterPredicates.");
- Set<Class<?>> supportedTypes = parquetTypeToClass.get(typeInFileMetaData);
+ Set<Class<?>> supportedTypes = parquetTypeToClass.get(primitiveType);
if (supportedTypes != null) {
message
.append(" Supported types for this column are: ")
.append(supportedTypes);
} else {
- message.append(" There are no supported types for columns of " + typeInFileMetaData);
+ message.append(" There are no supported types for columns of " + primitiveType);
}
throw new IllegalArgumentException(message.toString());
}
- if (!validTypeDescriptors.contains(typeInFileMetaData)) {
+ if (!validTypeDescriptors.contains(primitiveType)) {
StringBuilder message = new StringBuilder();
message
.append("FilterPredicate column: ")
@@ -126,53 +119,10 @@ public class ValidTypeMap {
.append(") does not match the schema found in file metadata. Column ")
.append(columnPath.toDotString())
.append(" is of type: ")
- .append(typeInFileMetaData)
+ .append(primitiveType)
.append("\nValid types for this column are: ")
- .append(parquetTypeToClass.get(typeInFileMetaData));
+ .append(parquetTypeToClass.get(primitiveType));
throw new IllegalArgumentException(message.toString());
}
}
-
- private static final class FullTypeDescriptor {
- private final PrimitiveTypeName primitiveType;
- private final OriginalType originalType;
-
- private FullTypeDescriptor(PrimitiveTypeName primitiveType, OriginalType originalType)
{
- this.primitiveType = primitiveType;
- this.originalType = originalType;
- }
-
- public PrimitiveTypeName getPrimitiveType() {
- return primitiveType;
- }
-
- public OriginalType getOriginalType() {
- return originalType;
- }
-
- @Override
- public String toString() {
- return "FullTypeDescriptor(" + "PrimitiveType: " + primitiveType + ", OriginalType:
" + originalType + ')';
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
-
- FullTypeDescriptor that = (FullTypeDescriptor) o;
-
- if (originalType != that.originalType) return false;
- if (primitiveType != that.primitiveType) return false;
-
- return true;
- }
-
- @Override
- public int hashCode() {
- int result = primitiveType != null ? primitiveType.hashCode() : 0;
- result = 31 * result + (originalType != null ? originalType.hashCode() : 0);
- return result;
- }
- }
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/46448e93/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java
b/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java
index 455eae4..e40294d 100644
--- a/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java
+++ b/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java
@@ -112,7 +112,7 @@ public class TestSchemaCompatibilityValidator {
fail("this should throw");
} catch (IllegalArgumentException e) {
assertEquals("FilterPredicate column: x.bar's declared type (java.lang.Long) does not
match the schema found in file metadata. "
- + "Column x.bar is of type: FullTypeDescriptor(PrimitiveType: INT32, OriginalType:
null)\n"
+ + "Column x.bar is of type: INT32\n"
+ "Valid types for this column are: [class java.lang.Integer]", e.getMessage());
}
}
http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/46448e93/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestValidTypeMap.java
----------------------------------------------------------------------
diff --git a/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestValidTypeMap.java
b/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestValidTypeMap.java
index 8da008f..d441369 100644
--- a/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestValidTypeMap.java
+++ b/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestValidTypeMap.java
@@ -61,26 +61,25 @@ public class TestValidTypeMap {
@Test
public void testValidTypes() {
- assertTypeValid(intColumn, PrimitiveTypeName.INT32, null);
- assertTypeValid(longColumn, PrimitiveTypeName.INT64, null);
- assertTypeValid(floatColumn, PrimitiveTypeName.FLOAT, null);
- assertTypeValid(doubleColumn, PrimitiveTypeName.DOUBLE, null);
- assertTypeValid(booleanColumn, PrimitiveTypeName.BOOLEAN, null);
- assertTypeValid(binaryColumn, PrimitiveTypeName.BINARY, null);
- assertTypeValid(binaryColumn, PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, null);
- assertTypeValid(binaryColumn, PrimitiveTypeName.BINARY, OriginalType.UTF8);
- assertTypeValid(binaryColumn, PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, OriginalType.UTF8);
+ assertTypeValid(intColumn, PrimitiveTypeName.INT32);
+ assertTypeValid(longColumn, PrimitiveTypeName.INT64);
+ assertTypeValid(floatColumn, PrimitiveTypeName.FLOAT);
+ assertTypeValid(doubleColumn, PrimitiveTypeName.DOUBLE);
+ assertTypeValid(booleanColumn, PrimitiveTypeName.BOOLEAN);
+ assertTypeValid(binaryColumn, PrimitiveTypeName.BINARY);
+ assertTypeValid(binaryColumn, PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY);
+ assertTypeValid(binaryColumn, PrimitiveTypeName.INT96);
}
@Test
public void testMismatchedTypes() {
try {
- assertTypeValid(intColumn, PrimitiveTypeName.DOUBLE, null);
+ assertTypeValid(intColumn, PrimitiveTypeName.DOUBLE);
fail("This should throw!");
} catch (IllegalArgumentException e) {
assertEquals("FilterPredicate column: int.column's declared type (java.lang.Integer)
does not match the "
+ "schema found in file metadata. Column int.column is of type: "
- + "FullTypeDescriptor(PrimitiveType: DOUBLE, OriginalType: null)\n"
+ + "DOUBLE\n"
+ "Valid types for this column are: [class java.lang.Double]", e.getMessage());
}
}
@@ -88,24 +87,13 @@ public class TestValidTypeMap {
@Test
public void testUnsupportedType() {
try {
- assertTypeValid(invalidColumn, PrimitiveTypeName.INT32, null);
+ assertTypeValid(invalidColumn, PrimitiveTypeName.INT32);
fail("This should throw!");
} catch (IllegalArgumentException e) {
assertEquals("Column invalid.column was declared as type: "
+ "org.apache.parquet.filter2.predicate.TestValidTypeMap$InvalidColumnType which
is not supported "
+ "in FilterPredicates. Supported types for this column are: [class java.lang.Integer]",
e.getMessage());
}
-
- try {
- assertTypeValid(invalidColumn, PrimitiveTypeName.INT32, OriginalType.UTF8);
- fail("This should throw!");
- } catch (IllegalArgumentException e) {
- assertEquals("Column invalid.column was declared as type: "
- + "org.apache.parquet.filter2.predicate.TestValidTypeMap$InvalidColumnType which
is not supported "
- + "in FilterPredicates. There are no supported types for columns of FullTypeDescriptor(PrimitiveType:
INT32, OriginalType: UTF8)",
- e.getMessage());
- }
-
}
}
|