hawq-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From odiache...@apache.org
Subject incubator-hawq git commit: HAWQ-992. PXF Hive data type check in Fragmenter too restrictive.
Date Wed, 24 Aug 2016 23:55:07 GMT
Repository: incubator-hawq
Updated Branches:
  refs/heads/HAWQ-992 [created] 24f5e363d


HAWQ-992. PXF Hive data type check in Fragmenter too restrictive.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/24f5e363
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/24f5e363
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/24f5e363

Branch: refs/heads/HAWQ-992
Commit: 24f5e363d48f13108cb86750a5a5f2e76844a2f3
Parents: c2280de
Author: Oleksandr Diachenko <odiachenko@pivotal.io>
Authored: Wed Aug 24 16:54:55 2016 -0700
Committer: Oleksandr Diachenko <odiachenko@pivotal.io>
Committed: Wed Aug 24 16:54:55 2016 -0700

----------------------------------------------------------------------
 .../pxf/api/utilities/ColumnDescriptor.java     | 15 ++++-
 .../hawq/pxf/api/utilities/EnumHawqType.java    | 49 ++++++++------
 .../plugins/hive/HiveColumnarSerdeResolver.java |  3 +-
 .../plugins/hive/HiveInputFormatFragmenter.java | 70 ++------------------
 .../hive/utilities/EnumHiveToHawqType.java      | 12 ++++
 .../plugins/hive/utilities/HiveUtilities.java   | 43 ++++++++++++
 .../pxf/service/utilities/ProtocolData.java     | 16 ++++-
 7 files changed, 116 insertions(+), 92 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java
b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java
index baaca1d..ff85672 100644
--- a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java
+++ b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java
@@ -30,6 +30,7 @@ public class ColumnDescriptor {
     String gpdbColumnName;
     String gpdbColumnTypeName;
     int gpdbColumnIndex;
+    String[] gpdbColumnTypeModifiers;
 
     /**
      * Reserved word for a table record key.
@@ -44,12 +45,14 @@ public class ColumnDescriptor {
      * @param typecode OID
      * @param index column index
      * @param typename type name
+     * @param typemods type modifiers
      */
-    public ColumnDescriptor(String name, int typecode, int index, String typename) {
+    public ColumnDescriptor(String name, int typecode, int index, String typename, String[]
typemods) {
         gpdbColumnTypeCode = typecode;
         gpdbColumnTypeName = typename;
         gpdbColumnName = name;
         gpdbColumnIndex = index;
+        gpdbColumnTypeModifiers = typemods;
     }
 
     /**
@@ -62,6 +65,9 @@ public class ColumnDescriptor {
         this.gpdbColumnName = copy.gpdbColumnName;
         this.gpdbColumnIndex = copy.gpdbColumnIndex;
         this.gpdbColumnTypeName = copy.gpdbColumnTypeName;
+        System.arraycopy(this.gpdbColumnTypeModifiers, 0,
+                copy.gpdbColumnTypeModifiers, 0,
+                this.gpdbColumnTypeModifiers.length);
     }
 
     public String columnName() {
@@ -80,6 +86,10 @@ public class ColumnDescriptor {
         return gpdbColumnTypeName;
     }
 
+    public String[] columnTypeModifiers() {
+        return gpdbColumnTypeModifiers;
+    }
+
     /**
      * Returns <tt>true</tt> if {@link #gpdbColumnName} is a {@link #RECORD_KEY_NAME}.
      *
@@ -94,6 +104,7 @@ public class ColumnDescriptor {
 		return "ColumnDescriptor [gpdbColumnTypeCode=" + gpdbColumnTypeCode
 				+ ", gpdbColumnName=" + gpdbColumnName
 				+ ", gpdbColumnTypeName=" + gpdbColumnTypeName
-				+ ", gpdbColumnIndex=" + gpdbColumnIndex + "]";
+				+ ", gpdbColumnIndex=" + gpdbColumnIndex
+				+ ", gpdbColumnTypeModifiers=" + gpdbColumnTypeModifiers + "]";
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java
b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java
index b5a94c6..01d40f0 100644
--- a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java
+++ b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java
@@ -20,6 +20,8 @@
 package org.apache.hawq.pxf.api.utilities;
 
 import java.io.IOException;
+
+import org.apache.hawq.pxf.api.io.DataType;
 import org.codehaus.jackson.JsonGenerator;
 import org.codehaus.jackson.map.JsonSerializer;
 import org.codehaus.jackson.map.annotate.JsonSerialize;
@@ -43,35 +45,32 @@ class EnumHawqTypeSerializer extends JsonSerializer<EnumHawqType>
{
  */
 @JsonSerialize(using = EnumHawqTypeSerializer.class)
 public enum EnumHawqType {
-    Int2Type("int2"),
-    Int4Type("int4"),
-    Int8Type("int8"),
-    Float4Type("float4"),
-    Float8Type("float8"),
-    TextType("text"),
-    VarcharType("varchar", (byte) 1, true),
-    ByteaType("bytea"),
-    DateType("date"),
-    TimestampType("timestamp"),
-    BoolType("bool"),
-    NumericType("numeric", (byte) 2, true),
-    BpcharType("bpchar", (byte) 1, true);
+    Int2Type("int2", DataType.SMALLINT),
+    Int4Type("int4", DataType.INTEGER),
+    Int8Type("int8", DataType.BIGINT),
+    Float4Type("float4", DataType.REAL),
+    Float8Type("float8", DataType.FLOAT8),
+    TextType("text", DataType.TEXT),
+    VarcharType("varchar", DataType.VARCHAR, (byte) 1, true),
+    ByteaType("bytea", DataType.BYTEA),
+    DateType("date", DataType.DATE),
+    TimestampType("timestamp", DataType.TIMESTAMP),
+    BoolType("bool", DataType.BOOLEAN),
+    NumericType("numeric", DataType.NUMERIC, (byte) 2, true),
+    BpcharType("bpchar", DataType.BPCHAR, (byte) 1, true);
 
+    private DataType dataType;
     private String typeName;
     private byte modifiersNum;
     private boolean validateIntegerModifiers;
 
-    EnumHawqType(String typeName) {
+    EnumHawqType(String typeName, DataType dataType) {
         this.typeName = typeName;
+        this.dataType = dataType;
     }
 
-    EnumHawqType(String typeName, byte modifiersNum) {
-        this(typeName);
-        this.modifiersNum = modifiersNum;
-    }
-
-    EnumHawqType(String typeName, byte modifiersNum, boolean validateIntegerModifiers) {
-        this(typeName);
+    EnumHawqType(String typeName, DataType dataType, byte modifiersNum, boolean validateIntegerModifiers)
{
+        this(typeName, dataType);
         this.modifiersNum = modifiersNum;
         this.validateIntegerModifiers = validateIntegerModifiers;
     }
@@ -99,6 +98,14 @@ public enum EnumHawqType {
     public boolean getValidateIntegerModifiers() {
         return this.validateIntegerModifiers;
     }
+
+    /**
+     * 
+     * @return data type
+     */
+    public DataType getDataType() {
+        return this.dataType;
+    }
 }
 
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java
b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java
index d298bac..43e3b65 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java
@@ -28,6 +28,7 @@ import org.apache.hawq.pxf.api.io.DataType;
 import org.apache.hawq.pxf.api.utilities.ColumnDescriptor;
 import org.apache.hawq.pxf.api.utilities.InputData;
 import org.apache.hawq.pxf.api.utilities.Utilities;
+import org.apache.hawq.pxf.plugins.hive.utilities.HiveUtilities;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -126,7 +127,7 @@ public class HiveColumnarSerdeResolver extends HiveResolver {
         for (int i = 0; i < numberOfDataColumns; i++) {
             ColumnDescriptor column = input.getColumn(i);
             String columnName = column.columnName();
-            String columnType = HiveInputFormatFragmenter.toHiveType(DataType.get(column.columnTypeCode()),
columnName);
+            String columnType = HiveUtilities.toHiveType(DataType.get(column.columnTypeCode()));
             columnNames.append(delim).append(columnName);
             columnTypes.append(delim).append(columnType);
             delim = ",";

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
index a666b8b..b944206 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
@@ -26,6 +26,8 @@ import org.apache.hawq.pxf.api.UserDataException;
 import org.apache.hawq.pxf.api.io.DataType;
 import org.apache.hawq.pxf.api.utilities.ColumnDescriptor;
 import org.apache.hawq.pxf.api.utilities.InputData;
+import org.apache.hawq.pxf.plugins.hive.utilities.EnumHiveToHawqType;
+import org.apache.hawq.pxf.plugins.hive.utilities.HiveUtilities;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -146,82 +148,18 @@ public class HiveInputFormatFragmenter extends HiveDataFragmenter {
         for (FieldSchema hiveCol : hiveColumns) {
             ColumnDescriptor colDesc = inputData.getColumn(index++);
             DataType colType = DataType.get(colDesc.columnTypeCode());
-            compareTypes(colType, hiveCol.getType(), colDesc.columnName());
+            HiveUtilities.compareTypes(colType, hiveCol.getType(), colDesc.columnName());
         }
         // check partition fields
         List<FieldSchema> hivePartitions = tbl.getPartitionKeys();
         for (FieldSchema hivePart : hivePartitions) {
             ColumnDescriptor colDesc = inputData.getColumn(index++);
             DataType colType = DataType.get(colDesc.columnTypeCode());
-            compareTypes(colType, hivePart.getType(), colDesc.columnName());
+            HiveUtilities.compareTypes(colType, hivePart.getType(), colDesc.columnName());
         }
 
     }
 
-    private void compareTypes(DataType type, String hiveType, String fieldName) {
-        String convertedHive = toHiveType(type, fieldName);
-        if (!convertedHive.equals(hiveType)
-                && !(convertedHive.equals("smallint") && hiveType.equals("tinyint")))
{
-            throw new UnsupportedTypeException(
-                    "Schema mismatch definition: Field " + fieldName
-                            + " (Hive type " + hiveType + ", HAWQ type "
-                            + type.toString() + ")");
-        }
-        if (LOG.isDebugEnabled()) {
-            LOG.debug("Field " + fieldName + ": Hive type " + hiveType
-                    + ", HAWQ type " + type.toString());
-        }
-    }
-
-    /**
-     * Converts HAWQ type to hive type. The supported mappings are:<ul>
-     * <li>{@code BOOLEAN -> boolean}</li>
-     * <li>{@code SMALLINT -> smallint (tinyint is converted to smallint)}</li>
-     * <li>{@code BIGINT -> bigint}</li>
-     * <li>{@code TIMESTAMP, TIME -> timestamp}</li>
-     * <li>{@code NUMERIC -> decimal}</li>
-     * <li>{@code BYTEA -> binary}</li>
-     * <li>{@code INTERGER -> int}</li>
-     * <li>{@code TEXT -> string}</li>
-     * <li>{@code REAL -> float}</li>
-     * <li>{@code FLOAT8 -> double}</li>
-     * </ul>
-     * All other types (both in HAWQ and in HIVE) are not supported.
-     *
-     * @param type HAWQ data type
-     * @param name field name
-     * @return Hive type
-     * @throws UnsupportedTypeException if type is not supported
-     */
-    public static String toHiveType(DataType type, String name) {
-        switch (type) {
-            case BOOLEAN:
-            case SMALLINT:
-            case BIGINT:
-            case TIMESTAMP:
-                return type.toString().toLowerCase();
-            case NUMERIC:
-                return "decimal";
-            case BYTEA:
-                return "binary";
-            case INTEGER:
-                return "int";
-            case TEXT:
-                return "string";
-            case REAL:
-                return "float";
-            case FLOAT8:
-                return "double";
-            case TIME:
-                return "timestamp";
-            default:
-                throw new UnsupportedTypeException(
-                        type.toString()
-                                + " conversion is not supported by HiveInputFormatFragmenter
(Field "
-                                + name + ")");
-        }
-    }
-
     /*
      * Validates that partition format corresponds to PXF supported formats and
      * transforms the class name to an enumeration for writing it to the

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java
b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java
index a747bd5..1cedaa8 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java
@@ -19,6 +19,7 @@
 
 package org.apache.hawq.pxf.plugins.hive.utilities;
 
+import org.apache.hawq.pxf.api.io.DataType;
 import org.apache.hawq.pxf.api.utilities.EnumHawqType;
 import org.apache.hawq.pxf.api.UnsupportedTypeException;
 
@@ -110,4 +111,15 @@ public enum EnumHiveToHawqType {
                 + hiveType + " to HAWQ's type");
     }
 
+    public static EnumHiveToHawqType getHawqToHiveType(DataType dataType) {
+
+        for (EnumHiveToHawqType t : values()) {
+
+            if (t.getHawqType().getDataType().equals(dataType)) {
+                return t;
+            }
+        }
+        throw new UnsupportedTypeException("Unable to map HAWQ's type: "
+                + dataType + " to Hive's type");
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
index 096c0ff..579ab0b 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hawq.pxf.api.Metadata;
 import org.apache.hawq.pxf.api.UnsupportedTypeException;
 import org.apache.hawq.pxf.api.utilities.EnumHawqType;
+import org.apache.hawq.pxf.api.io.DataType;
 import org.apache.hawq.pxf.plugins.hive.utilities.EnumHiveToHawqType;
 
 /**
@@ -256,4 +257,46 @@ public class HiveUtilities {
             throw new RuntimeException("Failed connecting to Hive MetaStore service: " +
cause.getMessage(), cause);
         }
     }
+
+
+    /**
+     * Converts HAWQ type to hive type. The supported mappings are:<ul>
+     * <li>{@code BOOLEAN -> boolean}</li>
+     * <li>{@code SMALLINT -> smallint (tinyint is converted to smallint)}</li>
+     * <li>{@code BIGINT -> bigint}</li>
+     * <li>{@code TIMESTAMP, TIME -> timestamp}</li>
+     * <li>{@code NUMERIC -> decimal}</li>
+     * <li>{@code BYTEA -> binary}</li>
+     * <li>{@code INTERGER -> int}</li>
+     * <li>{@code TEXT -> string}</li>
+     * <li>{@code REAL -> float}</li>
+     * <li>{@code FLOAT8 -> double}</li>
+     * </ul>
+     * All other types (both in HAWQ and in HIVE) are not supported.
+     *
+     * @param type HAWQ data type
+     * @param name field name
+     * @return Hive type
+     * @throws UnsupportedTypeException if type is not supported
+     */
+    public static String toHiveType(DataType type) {
+
+        EnumHiveToHawqType hiveToHawqType = EnumHiveToHawqType.getHawqToHiveType(type);
+        return hiveToHawqType.getTypeName();
+    }
+
+    public static void compareTypes(DataType type, String hiveType, String columnName) {
+        String convertedHive = toHiveType(type);
+        if (!convertedHive.equals(hiveType)
+                && !(convertedHive.equals("smallint") && hiveType.equals("tinyint")))
{
+            throw new UnsupportedTypeException(
+                    "Schema mismatch definition:" 
+                            + " (Hive type " + hiveType + ", HAWQ type "
+                            + type.toString() + ")");
+        }
+        if (LOG.isDebugEnabled()) {
+            LOG.debug(" Hive type " + hiveType
+                    + ", HAWQ type " + type.toString());
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java
b/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java
index 0337937..5e6f6c4 100644
--- a/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java
+++ b/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java
@@ -29,7 +29,6 @@ import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.security.UserGroupInformation;
-
 import org.apache.hawq.pxf.api.OutputFormat;
 import org.apache.hawq.pxf.api.utilities.ColumnDescriptor;
 import org.apache.hawq.pxf.api.utilities.InputData;
@@ -390,9 +389,10 @@ public class ProtocolData extends InputData {
             String columnName = getProperty("ATTR-NAME" + i);
             int columnTypeCode = getIntProperty("ATTR-TYPECODE" + i);
             String columnTypeName = getProperty("ATTR-TYPENAME" + i);
+            String[] columnTypeMods = parseTypeMods(i);
 
             ColumnDescriptor column = new ColumnDescriptor(columnName,
-                    columnTypeCode, i, columnTypeName);
+                    columnTypeCode, i, columnTypeName, columnTypeMods);
             tupleDescription.add(column);
 
             if (columnName.equalsIgnoreCase(ColumnDescriptor.RECORD_KEY_NAME)) {
@@ -401,6 +401,18 @@ public class ProtocolData extends InputData {
         }
     }
 
+    private String[] parseTypeMods(int columnIndex) {
+        Integer typeModeCount = Integer.parseInt(getOptionalProperty("ATTR-TYPEMOD" + columnIndex
+ "COUNT"));
+        String[] result = null;
+        if (typeModeCount > 0) {
+            result = new String[typeModeCount];
+            for (int i = 0; i < typeModeCount; i++) {
+                result[i] = getProperty("ATTR-TYPEMOD" + columnIndex + "-" + i);
+            }
+        }
+        return result;
+    }
+
     /**
      * Sets the index of the allocated data fragment
      *


Mime
View raw message