Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 5CDA6101F4 for ; Sun, 15 Sep 2013 17:24:40 +0000 (UTC) Received: (qmail 42277 invoked by uid 500); 15 Sep 2013 17:24:38 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 42204 invoked by uid 500); 15 Sep 2013 17:24:38 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 42187 invoked by uid 99); 15 Sep 2013 17:24:37 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 15 Sep 2013 17:24:37 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 15 Sep 2013 17:24:22 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id F1E012388900; Sun, 15 Sep 2013 17:23:58 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1523463 [1/5] - in /hive/trunk: common/src/java/org/apache/hadoop/hive/common/type/ common/src/test/org/apache/hadoop/hive/common/type/ data/files/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql... Date: Sun, 15 Sep 2013 17:23:55 -0000 To: commits@hive.apache.org From: hashutosh@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20130915172358.F1E012388900@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: hashutosh Date: Sun Sep 15 17:23:53 2013 New Revision: 1523463 URL: http://svn.apache.org/r1523463 Log: HIVE-4844 : Add varchar data type (Jason Dere via Ashutosh Chauhan) Added: hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveBaseChar.java hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveVarchar.java hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/ hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java hive/trunk/data/files/vc1.txt hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java hive/trunk/ql/src/test/queries/clientnegative/invalid_varchar_length_1.q hive/trunk/ql/src/test/queries/clientnegative/invalid_varchar_length_2.q hive/trunk/ql/src/test/queries/clientnegative/invalid_varchar_length_3.q hive/trunk/ql/src/test/queries/clientpositive/alter_varchar1.q hive/trunk/ql/src/test/queries/clientpositive/ctas_varchar.q hive/trunk/ql/src/test/queries/clientpositive/partition_varchar1.q hive/trunk/ql/src/test/queries/clientpositive/varchar_1.q hive/trunk/ql/src/test/queries/clientpositive/varchar_2.q hive/trunk/ql/src/test/queries/clientpositive/varchar_cast.q hive/trunk/ql/src/test/queries/clientpositive/varchar_comparison.q hive/trunk/ql/src/test/queries/clientpositive/varchar_join1.q hive/trunk/ql/src/test/queries/clientpositive/varchar_nested_types.q hive/trunk/ql/src/test/queries/clientpositive/varchar_udf1.q hive/trunk/ql/src/test/queries/clientpositive/varchar_union1.q hive/trunk/ql/src/test/results/clientnegative/invalid_varchar_length_1.q.out hive/trunk/ql/src/test/results/clientnegative/invalid_varchar_length_2.q.out hive/trunk/ql/src/test/results/clientnegative/invalid_varchar_length_3.q.out hive/trunk/ql/src/test/results/clientpositive/alter_varchar1.q.out hive/trunk/ql/src/test/results/clientpositive/ctas_varchar.q.out hive/trunk/ql/src/test/results/clientpositive/partition_varchar1.q.out hive/trunk/ql/src/test/results/clientpositive/varchar_1.q.out hive/trunk/ql/src/test/results/clientpositive/varchar_2.q.out hive/trunk/ql/src/test/results/clientpositive/varchar_cast.q.out hive/trunk/ql/src/test/results/clientpositive/varchar_comparison.q.out hive/trunk/ql/src/test/results/clientpositive/varchar_join1.q.out hive/trunk/ql/src/test/results/clientpositive/varchar_nested_types.q.out hive/trunk/ql/src/test/results/clientpositive/varchar_udf1.q.out hive/trunk/ql/src/test/results/clientpositive/varchar_union1.q.out hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveVarcharObjectInspector.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveVarchar.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/HiveVarcharObjectInspector.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaHiveVarcharObjectInspector.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/SettableHiveVarcharObjectInspector.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantHiveVarcharObjectInspector.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeParams.java hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/typeinfo/ hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/typeinfo/TestTypeInfoUtils.java Modified: hive/trunk/data/files/datatypes.txt hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFEncode.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java hive/trunk/serde/if/serde.thrift hive/trunk/serde/src/gen/thrift/gen-cpp/serde_constants.cpp hive/trunk/serde/src/gen/thrift/gen-cpp/serde_constants.h hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/test/ThriftTestObj.java hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/Complex.java hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/MegaStruct.java hive/trunk/serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php hive/trunk/serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py hive/trunk/serde/src/gen/thrift/gen-rb/serde_constants.rb hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/ParameterizedPrimitiveTypeUtils.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfo.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java Added: hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveBaseChar.java URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveBaseChar.java?rev=1523463&view=auto ============================================================================== --- hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveBaseChar.java (added) +++ hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveBaseChar.java Sun Sep 15 17:23:53 2013 @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import org.apache.commons.lang.StringUtils; + +public abstract class HiveBaseChar { + protected String value; + protected int characterLength = -1; + + protected HiveBaseChar() { + } + + /** + * Sets the string value to a new value, obeying the max length defined for this object. + * @param val new value + */ + public void setValue(String val, int maxLength) { + characterLength = -1; + value = HiveBaseChar.enforceMaxLength(val, maxLength); + } + + public void setValue(HiveBaseChar val, int maxLength) { + if ((maxLength < 0) + || (val.characterLength > 0 && val.characterLength <= maxLength)) { + // No length enforcement required, or source length is less than max length. + // We can copy the source value as-is. + value = val.value; + this.characterLength = val.characterLength; + } else { + setValue(val.value, maxLength); + } + } + + public static String enforceMaxLength(String val, int maxLength) { + String value = val; + + if (maxLength > 0) { + int valLength = val.codePointCount(0, val.length()); + if (valLength > maxLength) { + // Truncate the excess trailing spaces to fit the character length. + // Also make sure we take supplementary chars into account. + value = val.substring(0, val.offsetByCodePoints(0, maxLength)); + } + } + return value; + } + + public String getValue() { + return value; + } + + public int getCharacterLength() { + if (characterLength < 0) { + characterLength = value.codePointCount(0, value.length()); + } + return characterLength; + } +} Added: hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveVarchar.java URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveVarchar.java?rev=1523463&view=auto ============================================================================== --- hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveVarchar.java (added) +++ hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveVarchar.java Sun Sep 15 17:23:53 2013 @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +/** + * + * HiveVarChar. + * String wrapper to support SQL VARCHAR features. + * Max string length is enforced. + * + */ +public class HiveVarchar extends HiveBaseChar + implements Comparable { + + public static final int MAX_VARCHAR_LENGTH = 65535; + + public HiveVarchar() { + } + + public HiveVarchar(String val, int len) { + setValue(val, len); + } + + public HiveVarchar(HiveVarchar hc, int len) { + setValue(hc, len); + } + + /** + * Set the new value + */ + public void setValue(String val) { + super.setValue(val, -1); + } + + public void setValue(HiveVarchar hc) { + super.setValue(hc.getValue(), -1); + } + + @Override + public String toString() { + return getValue(); + } + + public int compareTo(HiveVarchar rhs) { + if (rhs == this) { + return 0; + } + return this.getValue().compareTo(rhs.getValue()); + } + + public boolean equals(HiveVarchar rhs) { + if (rhs == this) { + return true; + } + return this.getValue().equals(rhs.getValue()); + } + + @Override + public int hashCode() { + return getValue().hashCode(); + } +} Added: hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java URL: http://svn.apache.org/viewvc/hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java?rev=1523463&view=auto ============================================================================== --- hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java (added) +++ hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java Sun Sep 15 17:23:53 2013 @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.LogUtils; +import org.apache.hadoop.hive.common.LogUtils.LogInitializationException; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Random; + + +public class TestHiveVarchar extends TestCase { + public TestHiveVarchar() { + super(); + } + + static Random rnd = new Random(); + + public static int getRandomSupplementaryChar() { + int lowSurrogate = 0xDC00 + rnd.nextInt(1024); + //return 0xD8000000 + lowSurrogate; + int highSurrogate = 0xD800; + return Character.toCodePoint((char)highSurrogate, (char)lowSurrogate); + } + + public static int getRandomCodePoint() { + int codePoint; + if (rnd.nextDouble() < 0.50) { + codePoint = 32 + rnd.nextInt(90); + } else { + codePoint = getRandomSupplementaryChar(); + } + if (!Character.isValidCodePoint(codePoint)) { + System.out.println(Integer.toHexString(codePoint) + " is not a valid code point"); + } + return codePoint; + } + + public static int getRandomCodePoint(int excludeChar) { + while (true) { + int codePoint = getRandomCodePoint(); + if (codePoint != excludeChar) { + return codePoint; + } + } + } + + public void testStringLength() throws Exception { + int strLen = 20; + int[] lengths = { 15, 20, 25 }; + // Try with supplementary characters + for (int idx1 = 0; idx1 < lengths.length; ++idx1) { + // Create random test string + StringBuffer sb = new StringBuffer(); + int curLen = lengths[idx1]; + for (int idx2 = 0; idx2 < curLen; ++idx2) { + sb.appendCodePoint(getRandomCodePoint(' ')); + } + String testString = sb.toString(); + assertEquals(curLen, testString.codePointCount(0, testString.length())); + String enforcedString = HiveBaseChar.enforceMaxLength(testString, strLen); + if (curLen <= strLen) { + // No truncation needed + assertEquals(testString, enforcedString); + } else { + // String should have been truncated. + assertEquals(strLen, enforcedString.codePointCount(0, enforcedString.length())); + } + } + + // Try with ascii chars + String[] testStrings = { + "abcdefg", + "abcdefghijklmnopqrst", + "abcdefghijklmnopqrstuvwxyz" + }; + for (String testString : testStrings) { + int curLen = testString.length(); + assertEquals(curLen, testString.codePointCount(0, testString.length())); + String enforcedString = HiveBaseChar.enforceMaxLength(testString, strLen); + if (curLen <= strLen) { + // No truncation needed + assertEquals(testString, enforcedString); + } else { + // String should have been truncated. + assertEquals(strLen, enforcedString.codePointCount(0, enforcedString.length())); + } + } + } + + public void testComparison() throws Exception { + HiveVarchar hc1 = new HiveVarchar("abcd", 20); + HiveVarchar hc2 = new HiveVarchar("abcd", 20); + + // Identical strings should be equal + assertTrue(hc1.equals(hc2)); + assertTrue(hc2.equals(hc1)); + assertEquals(0, hc1.compareTo(hc2)); + assertEquals(0, hc2.compareTo(hc1)); + + // Unequal strings + hc2 = new HiveVarchar("abcde", 20); + assertFalse(hc1.equals(hc2)); + assertFalse(hc2.equals(hc1)); + assertFalse(0 == hc1.compareTo(hc2)); + assertFalse(0 == hc2.compareTo(hc1)); + + // Trailing spaces are significant + hc2 = new HiveVarchar("abcd ", 30); + + assertFalse(hc1.equals(hc2)); + assertFalse(hc2.equals(hc1)); + assertFalse(0 == hc1.compareTo(hc2)); + assertFalse(0 == hc2.compareTo(hc1)); + + // Leading spaces are significant + hc2 = new HiveVarchar(" abcd", 20); + assertFalse(hc1.equals(hc2)); + assertFalse(hc2.equals(hc1)); + assertFalse(0 == hc1.compareTo(hc2)); + assertFalse(0 == hc2.compareTo(hc1)); + } +} Modified: hive/trunk/data/files/datatypes.txt URL: http://svn.apache.org/viewvc/hive/trunk/data/files/datatypes.txt?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/data/files/datatypes.txt (original) +++ hive/trunk/data/files/datatypes.txt Sun Sep 15 17:23:53 2013 @@ -1,3 +1,3 @@ -\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N --1false-1.1\N\N\N-1-1-1.0-1\N\N\N\N\N\N -1true1.11121x2ykva92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789123456789.0123456YWJjZA==2013-01-01 +\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N +-1false-1.1\N\N\N-1-1-1.0-1\N\N\N\N\N\N\N +1true1.11121x2ykva92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789123456789.0123456YWJjZA==2013-01-01abc123 Added: hive/trunk/data/files/vc1.txt URL: http://svn.apache.org/viewvc/hive/trunk/data/files/vc1.txt?rev=1523463&view=auto ============================================================================== --- hive/trunk/data/files/vc1.txt (added) +++ hive/trunk/data/files/vc1.txt Sun Sep 15 17:23:53 2013 @@ -0,0 +1,3 @@ +1abc +2abc +3 abc Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Sun Sep 15 17:23:53 2013 @@ -39,6 +39,7 @@ import javax.xml.parsers.DocumentBuilder import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -159,6 +160,7 @@ import org.apache.hadoop.hive.serde2.typ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.ReflectionUtils; import org.w3c.dom.Document; @@ -361,6 +363,8 @@ public final class FunctionRegistry { GenericUDFToBinary.class); registerGenericUDF(serdeConstants.DECIMAL_TYPE_NAME, GenericUDFToDecimal.class); + registerGenericUDF(serdeConstants.VARCHAR_TYPE_NAME, + GenericUDFToVarchar.class); // Aggregate functions registerGenericUDAF("max", new GenericUDAFMax()); @@ -626,6 +630,17 @@ public final class FunctionRegistry { registerNumericType(PrimitiveCategory.STRING, 8); } + static int getCommonLength(int aLen, int bLen) { + int maxLength; + if (aLen < 0 || bLen < 0) { + // negative length should take precedence over positive value? + maxLength = -1; + } else { + maxLength = Math.max(aLen, bLen); + } + return maxLength; + } + /** * Given 2 TypeInfo types and the PrimitiveCategory selected as the common class between the two, * return a TypeInfo corresponding to the common PrimitiveCategory, and with type qualifiers @@ -643,6 +658,16 @@ public final class FunctionRegistry { // For types with parameters (like varchar), we need to determine the type parameters // that should be added to this type, based on the original 2 TypeInfos. switch (typeCategory) { + case VARCHAR: + int maxLength = getCommonLength( + TypeInfoUtils.getCharacterLengthForType(a), + TypeInfoUtils.getCharacterLengthForType(b)); + VarcharTypeParams varcharParams = new VarcharTypeParams(); + varcharParams.setLength(maxLength); + // Generate type name so that we can retrieve the TypeInfo for that type. + String typeName = PrimitiveObjectInspectorUtils + .getTypeEntryFromTypeSpecs(typeCategory, varcharParams).toString(); + return TypeInfoFactory.getPrimitiveTypeInfo(typeName); default: // Type doesn't require any qualifiers. @@ -840,7 +865,6 @@ public final class FunctionRegistry { return false; } - /** * Get the GenericUDAF evaluator for the name and argumentClasses. * @@ -1019,7 +1043,8 @@ public final class FunctionRegistry { */ public static int matchCost(TypeInfo argumentPassed, TypeInfo argumentAccepted, boolean exact) { - if (argumentAccepted.equals(argumentPassed)) { + if (argumentAccepted.equals(argumentPassed) + || TypeInfoUtils.doPrimitiveCategoriesMatch(argumentPassed, argumentAccepted)) { // matches return 0; } @@ -1468,6 +1493,7 @@ public final class FunctionRegistry { udfClass == UDFToDouble.class || udfClass == UDFToFloat.class || udfClass == UDFToInteger.class || udfClass == UDFToLong.class || udfClass == UDFToShort.class || udfClass == UDFToString.class || + udfClass == GenericUDFToVarchar.class || udfClass == GenericUDFTimestamp.class || udfClass == GenericUDFToBinary.class || udfClass == GenericUDFToDate.class; } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Sun Sep 15 17:23:53 2013 @@ -628,7 +628,7 @@ public abstract class BaseSemanticAnalyz case HiveParser.TOK_UNIONTYPE: return getUnionTypeStringFromAST(typeNode); default: - return DDLSemanticAnalyzer.getTypeName(typeNode.getType()); + return DDLSemanticAnalyzer.getTypeName(typeNode); } } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Sun Sep 15 17:23:53 2013 @@ -38,6 +38,7 @@ import java.util.Set; import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.Tree; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; @@ -127,6 +128,10 @@ import org.apache.hadoop.hive.ql.securit import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.TextInputFormat; @@ -148,6 +153,7 @@ public class DDLSemanticAnalyzer extends TokenToTypeName.put(HiveParser.TOK_FLOAT, serdeConstants.FLOAT_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DOUBLE, serdeConstants.DOUBLE_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_STRING, serdeConstants.STRING_TYPE_NAME); + TokenToTypeName.put(HiveParser.TOK_VARCHAR, serdeConstants.VARCHAR_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_BINARY, serdeConstants.BINARY_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DATE, serdeConstants.DATE_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DATETIME, serdeConstants.DATETIME_TYPE_NAME); @@ -155,12 +161,27 @@ public class DDLSemanticAnalyzer extends TokenToTypeName.put(HiveParser.TOK_DECIMAL, serdeConstants.DECIMAL_TYPE_NAME); } - public static String getTypeName(int token) throws SemanticException { + public static String getTypeName(ASTNode node) throws SemanticException { + int token = node.getType(); + String typeName; + // datetime type isn't currently supported if (token == HiveParser.TOK_DATETIME) { throw new SemanticException(ErrorMsg.UNSUPPORTED_TYPE.getMsg()); } - return TokenToTypeName.get(token); + + switch (token) { + case HiveParser.TOK_VARCHAR: + PrimitiveCategory primitiveCategory = PrimitiveCategory.VARCHAR; + typeName = TokenToTypeName.get(token); + VarcharTypeParams varcharParams = ParseUtils.getVarcharParams(typeName, node); + typeName = PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs( + primitiveCategory, varcharParams).toString(); + break; + default: + typeName = TokenToTypeName.get(token); + } + return typeName; } static class TablePartition { Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g Sun Sep 15 17:23:53 2013 @@ -105,6 +105,7 @@ KW_DATETIME: 'DATETIME'; KW_TIMESTAMP: 'TIMESTAMP'; KW_DECIMAL: 'DECIMAL'; KW_STRING: 'STRING'; +KW_VARCHAR: 'VARCHAR'; KW_ARRAY: 'ARRAY'; KW_STRUCT: 'STRUCT'; KW_MAP: 'MAP'; Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Sun Sep 15 17:23:53 2013 @@ -110,6 +110,7 @@ TOK_DATELITERAL; TOK_DATETIME; TOK_TIMESTAMP; TOK_STRING; +TOK_VARCHAR; TOK_BINARY; TOK_DECIMAL; TOK_LIST; @@ -1771,6 +1772,7 @@ primitiveType | KW_STRING -> TOK_STRING | KW_BINARY -> TOK_BINARY | KW_DECIMAL -> TOK_DECIMAL + | KW_VARCHAR LPAREN length=Number RPAREN -> ^(TOK_VARCHAR $length) ; listType Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java Sun Sep 15 17:23:53 2013 @@ -20,14 +20,17 @@ package org.apache.hadoop.hive.ql.parse; import java.util.ArrayList; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; /** @@ -116,7 +119,9 @@ public final class ParseUtils { typeParams = tableFieldTypeInfo.getTypeParams(); if (typeParams != null) { switch (tableFieldTypeInfo.getPrimitiveCategory()) { - // No parameterized types yet + case VARCHAR: + // Nothing to do here - the parameter will be passed to the UDF factory method below + break; default: throw new SemanticException("Type cast for " + tableFieldTypeInfo.getPrimitiveCategory() + " does not take type parameters"); @@ -132,4 +137,22 @@ public final class ParseUtils { return ret; } + + public static VarcharTypeParams getVarcharParams(String typeName, ASTNode node) + throws SemanticException { + if (node.getChildCount() != 1) { + throw new SemanticException("Bad params for type " + typeName); + } + + try { + VarcharTypeParams typeParams = new VarcharTypeParams(); + String lengthStr = node.getChild(0).getText(); + Integer length = Integer.valueOf(lengthStr); + typeParams.setLength(length.intValue()); + typeParams.validateParams(); + return typeParams; + } catch (SerDeException err) { + throw new SemanticException(err); + } + } } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Sun Sep 15 17:23:53 2013 @@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.udf.gen import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -562,6 +563,8 @@ public final class TypeCheckProcFactory serdeConstants.DOUBLE_TYPE_NAME); conversionFunctionTextHashMap.put(HiveParser.TOK_STRING, serdeConstants.STRING_TYPE_NAME); + conversionFunctionTextHashMap.put(HiveParser.TOK_VARCHAR, + serdeConstants.VARCHAR_TYPE_NAME); conversionFunctionTextHashMap.put(HiveParser.TOK_BINARY, serdeConstants.BINARY_TYPE_NAME); conversionFunctionTextHashMap.put(HiveParser.TOK_DATE, @@ -783,8 +786,14 @@ public final class TypeCheckProcFactory if (isFunction) { ASTNode funcNameNode = (ASTNode)expr.getChild(0); switch (funcNameNode.getType()) { - // Get type param from AST and add to cast function. - // But, no parameterized types to handle at the moment + case HiveParser.TOK_VARCHAR: + // Add type params + VarcharTypeParams varcharTypeParams = new VarcharTypeParams(); + varcharTypeParams.length = Integer.valueOf((funcNameNode.getChild(0).getText())); + if (genericUDF != null) { + ((SettableUDF)genericUDF).setParams(varcharTypeParams); + } + break; default: // Do nothing break; Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java Sun Sep 15 17:23:53 2013 @@ -25,10 +25,13 @@ import java.util.List; import java.util.Map; import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; @@ -36,7 +39,8 @@ import org.apache.hadoop.hive.ql.parse.B import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** * CreateTableDesc. @@ -45,6 +49,7 @@ import org.apache.hadoop.hive.serde2.obj @Explain(displayName = "Create Table") public class CreateTableDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; + private static Log LOG = LogFactory.getLog(CreateTableDesc.class); String databaseName; String tableName; boolean isExternal; @@ -461,12 +466,15 @@ public class CreateTableDesc extends DDL while (partColsIter.hasNext()) { FieldSchema fs = partColsIter.next(); String partCol = fs.getName(); - PrimitiveObjectInspectorUtils.PrimitiveTypeEntry pte = PrimitiveObjectInspectorUtils - .getTypeEntryFromTypeName( - fs.getType()); - if(null == pte){ + TypeInfo pti = null; + try { + pti = TypeInfoFactory.getPrimitiveTypeInfo(fs.getType()); + } catch (Exception err) { + LOG.error(err); + } + if(null == pti){ throw new SemanticException(ErrorMsg.PARTITION_COLUMN_NON_PRIMITIVE.getMsg() + " Found " - + partCol + " of type: " + fs.getType()); + + partCol + " of type: " + fs.getType()); } Iterator colNamesIter = colNames.iterator(); while (colNamesIter.hasNext()) { Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java Sun Sep 15 17:23:53 2013 @@ -66,7 +66,8 @@ public class ExprNodeConstantDesc extend .getPrimitiveJavaObjectInspector(pc).getPrimitiveWritableObject( getValue()); return PrimitiveObjectInspectorFactory - .getPrimitiveWritableConstantObjectInspector(pc, writableValue); + .getPrimitiveWritableConstantObjectInspector( + (PrimitiveTypeInfo) getTypeInfo(), writableValue); } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFEncode.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFEncode.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFEncode.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFEncode.java Sun Sep 15 17:23:53 2013 @@ -19,6 +19,8 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; @@ -30,8 +32,8 @@ extended = "Possible options for the cha "is null, the result will also be null") public class GenericUDFEncode extends GenericUDF { private transient CharsetEncoder encoder = null; - private transient StringObjectInspector stringOI = null; - private transient StringObjectInspector charsetOI = null; + private transient PrimitiveObjectInspector stringOI = null; + private transient PrimitiveObjectInspector charsetOI = null; private transient BytesWritable result = new BytesWritable(); @Override @@ -41,23 +43,27 @@ public class GenericUDFEncode extends Ge } if (arguments[0].getCategory() != Category.PRIMITIVE || - ((PrimitiveObjectInspector)arguments[0]).getPrimitiveCategory() != PrimitiveCategory.STRING){ - throw new UDFArgumentTypeException(0, "The first argument to Encode() must be a string"); + PrimitiveGrouping.STRING_GROUP != PrimitiveObjectInspectorUtils.getPrimitiveGrouping( + ((PrimitiveObjectInspector)arguments[0]).getPrimitiveCategory())){ + throw new UDFArgumentTypeException( + 0, "The first argument to Encode() must be a string/varchar"); } - stringOI = (StringObjectInspector) arguments[0]; + stringOI = (PrimitiveObjectInspector) arguments[0]; if (arguments[1].getCategory() != Category.PRIMITIVE || - ((PrimitiveObjectInspector)arguments[1]).getPrimitiveCategory() != PrimitiveCategory.STRING){ - throw new UDFArgumentTypeException(1, "The second argument to Encode() must be a string"); + PrimitiveGrouping.STRING_GROUP != PrimitiveObjectInspectorUtils.getPrimitiveGrouping( + ((PrimitiveObjectInspector)arguments[1]).getPrimitiveCategory())){ + throw new UDFArgumentTypeException( + 1, "The second argument to Encode() must be a string/varchar"); } - charsetOI = (StringObjectInspector) arguments[1]; + charsetOI = (PrimitiveObjectInspector) arguments[1]; // If the character set for encoding is constant, we can optimize that - StringObjectInspector charSetOI = (StringObjectInspector) arguments[1]; - if (charSetOI instanceof ConstantObjectInspector){ - String charSetName = ((Text) ((ConstantObjectInspector) charSetOI).getWritableConstantValue()).toString(); + if (charsetOI instanceof ConstantObjectInspector){ + String charSetName = + ((ConstantObjectInspector) arguments[1]).getWritableConstantValue().toString(); encoder = Charset.forName(charSetName).newEncoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT); } @@ -68,7 +74,7 @@ public class GenericUDFEncode extends Ge @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - String value = stringOI.getPrimitiveJavaObject(arguments[0].get()); + String value = PrimitiveObjectInspectorUtils.getString(arguments[0].get(), stringOI); if (value == null) { return null; } @@ -81,7 +87,8 @@ public class GenericUDFEncode extends Ge throw new HiveException(e); } } else { - encoded = Charset.forName(charsetOI.getPrimitiveJavaObject(arguments[1].get())).encode(value); + encoded = Charset.forName( + PrimitiveObjectInspectorUtils.getString(arguments[1].get(), charsetOI)).encode(value); } result.setSize(encoded.limit()); encoded.get(result.getBytes(), 0, encoded.limit()); Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java Sun Sep 15 17:23:53 2013 @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java Sun Sep 15 17:23:53 2013 @@ -83,6 +83,7 @@ public class GenericUDAFComputeStats ext case DOUBLE: return new GenericUDAFDoubleStatsEvaluator(); case STRING: + case VARCHAR: return new GenericUDAFStringStatsEvaluator(); case BINARY: return new GenericUDAFBinaryStatsEvaluator(); @@ -102,12 +103,12 @@ public class GenericUDAFComputeStats ext /* Object Inspector corresponding to the input parameter. */ - private PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector inputOI; /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". */ - private Object[] partialResult; + private transient Object[] partialResult; /* Object Inspectors corresponding to the struct returned by TerminatePartial and the long * field within the struct - "count" @@ -115,17 +116,17 @@ public class GenericUDAFComputeStats ext private transient StructObjectInspector soi; private transient StructField countTruesField; - private WritableLongObjectInspector countTruesFieldOI; + private transient WritableLongObjectInspector countTruesFieldOI; private transient StructField countFalsesField; - private WritableLongObjectInspector countFalsesFieldOI; + private transient WritableLongObjectInspector countFalsesFieldOI; private transient StructField countNullsField; - private WritableLongObjectInspector countNullsFieldOI; + private transient WritableLongObjectInspector countNullsFieldOI; /* Output of final result of the aggregation */ - private Object[] result; + private transient Object[] result; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) @@ -305,13 +306,13 @@ public class GenericUDAFComputeStats ext /* Object Inspector corresponding to the input parameter. */ - private PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector inputOI; private transient PrimitiveObjectInspector numVectorsOI; /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". */ - private Object[] partialResult; + private transient Object[] partialResult; /* Object Inspectors corresponding to the struct returned by TerminatePartial and the long * field within the struct - "count" @@ -319,23 +320,23 @@ public class GenericUDAFComputeStats ext private transient StructObjectInspector soi; private transient StructField minField; - private WritableLongObjectInspector minFieldOI; + private transient WritableLongObjectInspector minFieldOI; private transient StructField maxField; - private WritableLongObjectInspector maxFieldOI; + private transient WritableLongObjectInspector maxFieldOI; private transient StructField countNullsField; - private WritableLongObjectInspector countNullsFieldOI; + private transient WritableLongObjectInspector countNullsFieldOI; private transient StructField ndvField; - private WritableStringObjectInspector ndvFieldOI; + private transient WritableStringObjectInspector ndvFieldOI; private transient StructField numBitVectorsField; - private WritableIntObjectInspector numBitVectorsFieldOI; + private transient WritableIntObjectInspector numBitVectorsFieldOI; /* Output of final result of the aggregation */ - private Object[] result; + private transient Object[] result; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { @@ -601,13 +602,13 @@ public class GenericUDAFComputeStats ext /* Object Inspector corresponding to the input parameter. */ - private PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector inputOI; private transient PrimitiveObjectInspector numVectorsOI; /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". */ - private Object[] partialResult; + private transient Object[] partialResult; /* Object Inspectors corresponding to the struct returned by TerminatePartial and the long * field within the struct - "count" @@ -615,23 +616,23 @@ public class GenericUDAFComputeStats ext private transient StructObjectInspector soi; private transient StructField minField; - private WritableDoubleObjectInspector minFieldOI; + private transient WritableDoubleObjectInspector minFieldOI; private transient StructField maxField; - private WritableDoubleObjectInspector maxFieldOI; + private transient WritableDoubleObjectInspector maxFieldOI; private transient StructField countNullsField; - private WritableLongObjectInspector countNullsFieldOI; + private transient WritableLongObjectInspector countNullsFieldOI; private transient StructField ndvField; - private WritableStringObjectInspector ndvFieldOI; + private transient WritableStringObjectInspector ndvFieldOI; private transient StructField numBitVectorsField; - private WritableIntObjectInspector numBitVectorsFieldOI; + private transient WritableIntObjectInspector numBitVectorsFieldOI; /* Output of final result of the aggregation */ - private Object[] result; + private transient Object[] result; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { @@ -898,13 +899,13 @@ public class GenericUDAFComputeStats ext /* Object Inspector corresponding to the input parameter. */ - private PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector inputOI; private transient PrimitiveObjectInspector numVectorsOI; /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". */ - private Object[] partialResult; + private transient Object[] partialResult; /* Object Inspectors corresponding to the struct returned by TerminatePartial and the * fields within the struct - "maxLength", "sumLength", "count", "countNulls", "ndv" @@ -912,26 +913,26 @@ public class GenericUDAFComputeStats ext private transient StructObjectInspector soi; private transient StructField maxLengthField; - private WritableLongObjectInspector maxLengthFieldOI; + private transient WritableLongObjectInspector maxLengthFieldOI; private transient StructField sumLengthField; - private WritableLongObjectInspector sumLengthFieldOI; + private transient WritableLongObjectInspector sumLengthFieldOI; private transient StructField countField; - private WritableLongObjectInspector countFieldOI; + private transient WritableLongObjectInspector countFieldOI; private transient StructField countNullsField; - private WritableLongObjectInspector countNullsFieldOI; + private transient WritableLongObjectInspector countNullsFieldOI; private transient StructField ndvField; - private WritableStringObjectInspector ndvFieldOI; + private transient WritableStringObjectInspector ndvFieldOI; private transient StructField numBitVectorsField; - private WritableIntObjectInspector numBitVectorsFieldOI; + private transient WritableIntObjectInspector numBitVectorsFieldOI; /* Output of final result of the aggregation */ - private Object[] result; + private transient Object[] result; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { @@ -1217,12 +1218,12 @@ public class GenericUDAFComputeStats ext /* Object Inspector corresponding to the input parameter. */ - private PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector inputOI; /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". */ - private Object[] partialResult; + private transient Object[] partialResult; /* Object Inspectors corresponding to the struct returned by TerminatePartial and the * fields within the struct - "maxLength", "sumLength", "count", "countNulls" @@ -1230,20 +1231,20 @@ public class GenericUDAFComputeStats ext private transient StructObjectInspector soi; private transient StructField maxLengthField; - private WritableLongObjectInspector maxLengthFieldOI; + private transient WritableLongObjectInspector maxLengthFieldOI; private transient StructField sumLengthField; - private WritableLongObjectInspector sumLengthFieldOI; + private transient WritableLongObjectInspector sumLengthFieldOI; private transient StructField countField; - private WritableLongObjectInspector countFieldOI; + private transient WritableLongObjectInspector countFieldOI; private transient StructField countNullsField; - private WritableLongObjectInspector countNullsFieldOI; + private transient WritableLongObjectInspector countNullsFieldOI; /* Output of final result of the aggregation */ - private Object[] result; + private transient Object[] result; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java Sun Sep 15 17:23:53 2013 @@ -133,9 +133,14 @@ public abstract class GenericUDFBaseComp TypeInfo oiTypeInfo0 = TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[0]); TypeInfo oiTypeInfo1 = TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[1]); - if (oiTypeInfo0 != oiTypeInfo1) { + if (oiTypeInfo0 == oiTypeInfo1 + || TypeInfoUtils.doPrimitiveCategoriesMatch(oiTypeInfo0, oiTypeInfo1)) { + compareType = CompareType.SAME_TYPE; + } else { compareType = CompareType.NEED_CONVERT; - TypeInfo compareType = FunctionRegistry.getCommonClassForComparison(oiTypeInfo0, oiTypeInfo1); + TypeInfo compareType = FunctionRegistry.getCommonClassForComparison( + oiTypeInfo0, oiTypeInfo1); + // For now, we always convert to double if we can't find a common type compareOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( (compareType == null) ? @@ -143,8 +148,6 @@ public abstract class GenericUDFBaseComp converter0 = ObjectInspectorConverters.getConverter(arguments[0], compareOI); converter1 = ObjectInspectorConverters.getConverter(arguments[1], compareOI); - } else { - compareType = CompareType.SAME_TYPE; } } return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java Sun Sep 15 17:23:53 2013 @@ -27,7 +27,11 @@ import org.apache.hadoop.hive.serde.serd import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.io.Text; @@ -61,15 +65,12 @@ public class GenericUDFConcatWS extends for (int i = 0; i < arguments.length; i++) { switch(arguments[i].getCategory()) { case LIST: - if (((ListObjectInspector)arguments[i]).getListElementObjectInspector() - .getTypeName().equals(serdeConstants.STRING_TYPE_NAME) - || ((ListObjectInspector)arguments[i]).getListElementObjectInspector() - .getTypeName().equals(serdeConstants.VOID_TYPE_NAME)) { - break; + if (isStringOrVoidType( + ((ListObjectInspector) arguments[i]).getListElementObjectInspector())) { + break; } case PRIMITIVE: - if (arguments[i].getTypeName().equals(serdeConstants.STRING_TYPE_NAME) - || arguments[i].getTypeName().equals(serdeConstants.VOID_TYPE_NAME)) { + if (isStringOrVoidType(arguments[i])) { break; } default: @@ -84,6 +85,18 @@ public class GenericUDFConcatWS extends return PrimitiveObjectInspectorFactory.writableStringObjectInspector; } + protected boolean isStringOrVoidType(ObjectInspector oi) { + if (oi.getCategory() == Category.PRIMITIVE) { + if (PrimitiveGrouping.STRING_GROUP + == PrimitiveObjectInspectorUtils.getPrimitiveGrouping( + ((PrimitiveObjectInspector) oi).getPrimitiveCategory()) + || ((PrimitiveObjectInspector) oi).getPrimitiveCategory() == PrimitiveCategory.VOID) { + return true; + } + } + return false; + } + private final Text resultText = new Text(); @Override @@ -91,8 +104,8 @@ public class GenericUDFConcatWS extends if (arguments[0].get() == null) { return null; } - String separator = ((StringObjectInspector) argumentOIs[0]) - .getPrimitiveJavaObject(arguments[0].get()); + String separator = PrimitiveObjectInspectorUtils.getString( + arguments[0].get(), (PrimitiveObjectInspector)argumentOIs[0]); StringBuilder sb = new StringBuilder(); boolean first = true; @@ -116,8 +129,8 @@ public class GenericUDFConcatWS extends sb.append(strArrayOI.getListElement(strArray, j)); } } else { - sb.append(((StringObjectInspector) argumentOIs[i]) - .getPrimitiveJavaObject(arguments[i].get())); + sb.append(PrimitiveObjectInspectorUtils.getString( + arguments[i].get(), (PrimitiveObjectInspector)argumentOIs[i])); } } } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java Sun Sep 15 17:23:53 2013 @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; @@ -93,8 +94,12 @@ public class GenericUDFReflect2 extends try { method = findMethod(targetClass, methodName.toString(), null, true); + // While getTypeFor() returns a TypeEntry, we won't actually be able to get any + // type parameter information from this since the TypeEntry is derived from a return type. + PrimitiveTypeEntry typeEntry = getTypeFor(method.getReturnType()); returnOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - getTypeFor(method.getReturnType()).primitiveCategory); + PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs( + typeEntry.primitiveCategory, typeEntry.typeParams)); returnObj = (Writable) returnOI.getPrimitiveWritableClass().newInstance(); } catch (Exception e) { throw new UDFArgumentException(e); Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java Sun Sep 15 17:23:53 2013 @@ -24,9 +24,15 @@ import org.apache.hadoop.hive.ql.exec.De import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -41,30 +47,30 @@ import org.apache.hadoop.hive.serde2.typ + " delimiters are used: ',' as delimiter1 and '=' as delimiter2.") public class GenericUDFStringToMap extends GenericUDF { private final HashMap ret = new HashMap(); - private transient StringObjectInspector soi_text, soi_de1 = null, soi_de2 = null; + private transient Converter soi_text, soi_de1 = null, soi_de2 = null; final static String default_de1 = ","; final static String default_de2 = ":"; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (!TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[0]).equals( - TypeInfoFactory.stringTypeInfo) - || (arguments.length > 1 && - !TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[1]).equals( - TypeInfoFactory.stringTypeInfo)) - || (arguments.length > 2 && - !TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[2]).equals( - TypeInfoFactory.stringTypeInfo))) { - throw new UDFArgumentException("All argument should be string"); + for (int idx = 0; idx < Math.min(arguments.length, 3); ++idx) { + if (arguments[idx].getCategory() != Category.PRIMITIVE + || PrimitiveObjectInspectorUtils.getPrimitiveGrouping( + ((PrimitiveObjectInspector) arguments[idx]).getPrimitiveCategory()) + != PrimitiveGrouping.STRING_GROUP) { + throw new UDFArgumentException("All argument should be string/character type"); + } } - - soi_text = (StringObjectInspector) arguments[0]; + soi_text = ObjectInspectorConverters.getConverter(arguments[0], + PrimitiveObjectInspectorFactory.javaStringObjectInspector); if (arguments.length > 1) { - soi_de1 = (StringObjectInspector) arguments[1]; + soi_de1 = ObjectInspectorConverters.getConverter(arguments[1], + PrimitiveObjectInspectorFactory.javaStringObjectInspector); } if (arguments.length > 2) { - soi_de2 = (StringObjectInspector) arguments[2]; + soi_de2 = ObjectInspectorConverters.getConverter(arguments[2], + PrimitiveObjectInspectorFactory.javaStringObjectInspector); } return ObjectInspectorFactory.getStandardMapObjectInspector( @@ -75,11 +81,11 @@ public class GenericUDFStringToMap exten @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { ret.clear(); - String text = soi_text.getPrimitiveJavaObject(arguments[0].get()); + String text = (String) soi_text.convert(arguments[0].get()); String delimiter1 = (soi_de1 == null) ? - default_de1 : soi_de1.getPrimitiveJavaObject(arguments[1].get()); + default_de1 : (String) soi_de1.convert(arguments[1].get()); String delimiter2 = (soi_de2 == null) ? - default_de2 : soi_de2.getPrimitiveJavaObject(arguments[2].get()); + default_de2 : (String) soi_de2.convert(arguments[2].get()); String[] keyValuePairs = text.split(delimiter1); Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java Sun Sep 15 17:23:53 2013 @@ -25,6 +25,8 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.DateConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; /** * GenericUDFToDate @@ -49,10 +51,11 @@ public class GenericUDFToDate extends Ge } try { argumentOI = (PrimitiveObjectInspector) arguments[0]; - switch (argumentOI.getPrimitiveCategory()) { - case DATE: - case STRING: - case TIMESTAMP: + PrimitiveGrouping pg = + PrimitiveObjectInspectorUtils.getPrimitiveGrouping(argumentOI.getPrimitiveCategory()); + switch (pg) { + case DATE_GROUP: + case STRING_GROUP: break; default: throw new UDFArgumentException( Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java?rev=1523463&view=auto ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java (added) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java Sun Sep 15 17:23:53 2013 @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.io.Serializable; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.SettableUDF; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.HiveVarcharConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; + +@Description(name = "varchar", +value = "CAST( as VARCHAR(length)) - Converts the argument to a varchar value.", +extended = "Values will be truncated if the input value is too long to fit" ++ " within the varchar length." ++ "Example:\n " ++ " > SELECT CAST(1234 AS varchar(10)) FROM src LIMIT 1;\n" ++ " '1234'") +public class GenericUDFToVarchar extends GenericUDF + implements SettableUDF, Serializable { + private static final Log LOG = LogFactory.getLog(GenericUDFToVarchar.class.getName()); + private transient PrimitiveObjectInspector argumentOI; + private transient HiveVarcharConverter converter; + + // The varchar type parameters need to be set prior to initialization, + // and must be preserved when the plan serialized to other processes. + private VarcharTypeParams typeParams; + + public GenericUDFToVarchar() { + } + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentException("VARCHAR cast requires a value argument"); + } + try { + argumentOI = (PrimitiveObjectInspector) arguments[0]; + } catch (ClassCastException e) { + throw new UDFArgumentException( + "The function VARCHAR takes only primitive types"); + } + + // Check if this UDF has been provided with type params for the output varchar type + SettableHiveVarcharObjectInspector outputOI; + if (typeParams != null) { + outputOI = (SettableHiveVarcharObjectInspector) + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs( + PrimitiveCategory.VARCHAR, typeParams)); + } else { + outputOI = PrimitiveObjectInspectorFactory.writableHiveVarcharObjectInspector; + } + + converter = new HiveVarcharConverter(argumentOI, outputOI); + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object o0 = arguments[0].get(); + if (o0 == null) { + return null; + } + + return converter.convert(o0); + } + + @Override + public String getDisplayString(String[] children) { + assert (children.length == 1); + StringBuilder sb = new StringBuilder(); + sb.append("CAST( "); + sb.append(children[0]); + sb.append(" AS VARCHAR("); + String paramsStr = ""; + if (typeParams != null) { + paramsStr = typeParams.toString(); + } + sb.append(paramsStr); + sb.append(")"); + return sb.toString(); + } + +/** + * Provide varchar type parameters for the output object inspector. + * This should be done before the UDF is initialized. + */ + @Override + public void setParams(Object typeParams) throws UDFArgumentException { + if (converter != null) { + LOG.warn("Type converter already initialized, setting type params now will not be useful"); + } + if (typeParams instanceof VarcharTypeParams) { + this.typeParams = (VarcharTypeParams)typeParams; + } else { + throw new UDFArgumentException( + "Was expecting VarcharTypeParams, instead got " + typeParams.getClass().getName()); + } + } + + @Override + public Object getParams() { + return typeParams; + } + +} Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java Sun Sep 15 17:23:53 2013 @@ -29,17 +29,21 @@ import org.apache.hadoop.hive.ql.exec.Fu import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.IdentityConverter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.io.Text; /** @@ -350,6 +354,69 @@ public final class GenericUDFUtils { }; /** + * Helper class for UDFs returning string/varchar/char + */ + public static class StringHelper { + + protected Object returnValue; + protected PrimitiveCategory type; + + public StringHelper(PrimitiveCategory type) throws UDFArgumentException { + this.type = type; + switch (type) { + case STRING: + returnValue = new Text(); + break; + case VARCHAR: + returnValue = new HiveVarcharWritable(); + break; + default: + throw new UDFArgumentException("Unexpected non-string type " + type); + } + } + + public Object setReturnValue(String val) throws UDFArgumentException { + if (val == null) { + return null; + } + switch (type) { + case STRING: + ((Text)returnValue).set(val); + return returnValue; + case VARCHAR: + ((HiveVarcharWritable)returnValue).set(val); + return returnValue; + default: + throw new UDFArgumentException("Bad return type " + type); + } + } + + /** + * Helper function to help GenericUDFs determine the return type + * character length for char/varchar. + * @param poi PrimitiveObjectInspector representing the type + * @return character length of the type + * @throws UDFArgumentException + */ + public static int getFixedStringSizeForType(PrimitiveObjectInspector poi) + throws UDFArgumentException { + // TODO: we can support date, int, .. any types which would have a fixed length value + switch (poi.getPrimitiveCategory()) { + case VARCHAR: + VarcharTypeParams varcharParams = null; + varcharParams = (VarcharTypeParams) poi.getTypeParams(); + if (varcharParams == null || varcharParams.length < 0) { + throw new UDFArgumentException("varchar type used without type params"); + } + return varcharParams.length; + default: + throw new UDFArgumentException("No fixed size for type " + poi.getTypeName()); + } + } + + } + + /** * Return an ordinal from an integer. */ public static String getOrdinal(int i) { Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java?rev=1523463&r1=1523462&r2=1523463&view=diff ============================================================================== --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java (original) +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java Sun Sep 15 17:23:53 2013 @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec; -import java.lang.reflect.Type; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.LinkedList; @@ -27,6 +26,7 @@ import java.util.List; import junit.framework.Assert; import junit.framework.TestCase; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -44,6 +45,7 @@ public class TestFunctionRegistry extend public class TestUDF { public void same(DoubleWritable x, DoubleWritable y) {} public void same(HiveDecimalWritable x, HiveDecimalWritable y) {} + public void same(Text x, Text y) {} public void one(IntWritable x, HiveDecimalWritable y) {} public void one(IntWritable x, DoubleWritable y) {} public void one(IntWritable x, IntWritable y) {} @@ -57,8 +59,16 @@ public class TestFunctionRegistry extend public void typeaffinity2(DoubleWritable x) {} } + TypeInfo varchar5; + TypeInfo varchar10; + TypeInfo maxVarchar; + @Override protected void setUp() { + String maxVarcharTypeName = "varchar(" + HiveVarchar.MAX_VARCHAR_LENGTH + ")"; + maxVarchar = TypeInfoFactory.getPrimitiveTypeInfo(maxVarcharTypeName); + varchar10 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(10)"); + varchar5 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(5)"); } private void implicit(TypeInfo a, TypeInfo b, boolean convertible) { @@ -72,6 +82,21 @@ public class TestFunctionRegistry extend implicit(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.decimalTypeInfo, true); implicit(TypeInfoFactory.dateTypeInfo, TypeInfoFactory.decimalTypeInfo, false); implicit(TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.decimalTypeInfo, false); + implicit(varchar10, TypeInfoFactory.stringTypeInfo, true); + implicit(TypeInfoFactory.stringTypeInfo, varchar10, true); + + // Try with parameterized varchar types + TypeInfo varchar10 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(10)"); + TypeInfo varchar20 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(20)"); + + implicit(varchar10, TypeInfoFactory.stringTypeInfo, true); + implicit(varchar20, TypeInfoFactory.stringTypeInfo, true); + implicit(TypeInfoFactory.stringTypeInfo, varchar10, true); + implicit(TypeInfoFactory.stringTypeInfo, varchar20, true); + implicit(varchar20, varchar10, true); + + implicit(TypeInfoFactory.intTypeInfo, varchar10, true); + implicit(TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo, true); } private static List getMethods(Class udfClass, String methodName) { @@ -136,8 +161,8 @@ public class TestFunctionRegistry extend } assert(!throwException); assertEquals(2, result.getParameterTypes().length); - assertEquals(result.getParameterTypes()[0], a); - assertEquals(result.getParameterTypes()[1], b); + assertEquals(a, result.getParameterTypes()[0]); + assertEquals(b, result.getParameterTypes()[1]); } public void testGetMethodInternal() { @@ -166,12 +191,15 @@ public class TestFunctionRegistry extend verify(TestUDF.class, "one", TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, IntWritable.class, IntWritable.class, false); + // Passing varchar arguments should prefer the version of evaluate() with Text args. + verify(TestUDF.class, "same", varchar5, varchar10, Text.class, Text.class, false); + verify(TestUDF.class, "mismatch", TypeInfoFactory.voidTypeInfo, TypeInfoFactory.intTypeInfo, null, null, true); } private void common(TypeInfo a, TypeInfo b, TypeInfo result) { - assertEquals(FunctionRegistry.getCommonClass(a,b), result); + assertEquals(result, FunctionRegistry.getCommonClass(a,b)); } public void testCommonClass() { @@ -183,10 +211,13 @@ public class TestFunctionRegistry extend TypeInfoFactory.decimalTypeInfo); common(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + + common(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo); + common(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); } private void comparison(TypeInfo a, TypeInfo b, TypeInfo result) { - assertEquals(FunctionRegistry.getCommonClassForComparison(a,b), result); + assertEquals(result, FunctionRegistry.getCommonClassForComparison(a,b)); } public void testCommonClassComparison() { @@ -198,6 +229,61 @@ public class TestFunctionRegistry extend TypeInfoFactory.decimalTypeInfo); comparison(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.doubleTypeInfo); + + comparison(TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo); + comparison(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.dateTypeInfo, + TypeInfoFactory.stringTypeInfo); + + comparison(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo); + comparison(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + comparison(varchar5, varchar10, varchar10); + } + + /** + * Method to print out the comparison/conversion behavior for data types. + */ + public void testPrintTypeCompatibility() { + if (true) { + return; + } + + String[] typeStrings = { + "void", "boolean", "tinyint", "smallint", "int", "bigint", "float", "double", + "string", "timestamp", "date", "binary", "decimal", "varchar(10)", "varchar(5)", + }; + for (String cat1 : typeStrings) { + TypeInfo ti1 = null; + try { + ti1 = TypeInfoUtils.getTypeInfoFromTypeString(cat1); + } catch (Exception err) { + System.out.println(err); + System.out.println("Unable to get TypeInfo for " + cat1 + ", skipping ..."); + continue; + } + + for (String cat2 : typeStrings) { + TypeInfo commonClass = null; + boolean implicitConvertable = false; + try { + TypeInfo ti2 = TypeInfoUtils.getTypeInfoFromTypeString(cat2); + try { + commonClass = FunctionRegistry.getCommonClassForComparison(ti1, ti2); + //implicitConvertable = FunctionRegistry.implicitConvertable(ti1, ti2); + } catch (Exception err) { + System.out.println("Failed to get common class for " + ti1 + ", " + ti2 + ": " + err); + err.printStackTrace(); + //System.out.println("Unable to get TypeInfo for " + cat2 + ", skipping ..."); + } + System.out.println(cat1 + " - " + cat2 + ": " + commonClass); + //System.out.println(cat1 + " - " + cat2 + ": " + implicitConvertable); + } catch (Exception err) { + System.out.println(err); + System.out.println("Unable to get TypeInfo for " + cat2 + ", skipping ..."); + continue; + } + } + } } private void unionAll(TypeInfo a, TypeInfo b, TypeInfo result) { @@ -213,11 +299,26 @@ public class TestFunctionRegistry extend TypeInfoFactory.decimalTypeInfo); unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + + unionAll(varchar5, varchar10, varchar10); + unionAll(varchar10, varchar5, varchar10); + unionAll(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + unionAll(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo); } public void testGetTypeInfoForPrimitiveCategory() { + // varchar should take string length into account. + // varchar(5), varchar(10) => varchar(10) + assertEquals(varchar10, FunctionRegistry.getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo) varchar5, (PrimitiveTypeInfo) varchar10, PrimitiveCategory.VARCHAR)); + assertEquals(varchar10, FunctionRegistry.getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo) varchar10, (PrimitiveTypeInfo) varchar5, PrimitiveCategory.VARCHAR)); + // non-qualified types should simply return the TypeInfo associated with that type assertEquals(TypeInfoFactory.stringTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo) varchar10, (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, + PrimitiveCategory.STRING)); + assertEquals(TypeInfoFactory.stringTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory( (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, PrimitiveCategory.STRING)); Added: hive/trunk/ql/src/test/queries/clientnegative/invalid_varchar_length_1.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/invalid_varchar_length_1.q?rev=1523463&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientnegative/invalid_varchar_length_1.q (added) +++ hive/trunk/ql/src/test/queries/clientnegative/invalid_varchar_length_1.q Sun Sep 15 17:23:53 2013 @@ -0,0 +1,2 @@ +drop table if exists invalid_varchar_length_1; +create table invalid_varchar_length_1 (c1 varchar(1000000)); Added: hive/trunk/ql/src/test/queries/clientnegative/invalid_varchar_length_2.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/invalid_varchar_length_2.q?rev=1523463&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientnegative/invalid_varchar_length_2.q (added) +++ hive/trunk/ql/src/test/queries/clientnegative/invalid_varchar_length_2.q Sun Sep 15 17:23:53 2013 @@ -0,0 +1 @@ +select cast(value as varchar(100000)) from src limit 1;