hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1520413 - in /hive/trunk: ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/test/org/apache/hadoop/hive/ql/exec/ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/ serde/src/test/org/apache/hadoop/hive/serde2/objectin...
Date Thu, 05 Sep 2013 20:32:58 GMT
Author: hashutosh
Date: Thu Sep  5 20:32:58 2013
New Revision: 1520413

URL: http://svn.apache.org/r1520413
Log:
HIVE-5203 : FunctionRegistry.getMethodInternal() should prefer method arguments with closer
affinity to the original argument types (Jason Dere via Ashutosh Chauhan)

Added:
    hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/
    hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=1520413&r1=1520412&r2=1520413&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Thu Sep  5
20:32:58 2013
@@ -149,8 +149,11 @@ import org.apache.hadoop.hive.ql.udf.xml
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
@@ -956,6 +959,59 @@ public final class FunctionRegistry {
   }
 
   /**
+   * Given a set of candidate methods and list of argument types, try to
+   * select the best candidate based on how close the passed argument types are
+   * to the candidate argument types.
+   * For a varchar argument, we would prefer evaluate(string) over evaluate(double).
+   * @param udfMethods  list of candidate methods
+   * @param argumentsPassed list of argument types to match to the candidate methods
+   */
+  static void filterMethodsByTypeAffinity(List<Method> udfMethods, List<TypeInfo>
argumentsPassed) {
+    if (udfMethods.size() > 1) {
+      // Prefer methods with a closer signature based on the primitive grouping of each argument.
+      // Score each method based on its similarity to the passed argument types.
+      int currentScore = 0;
+      int bestMatchScore = 0;
+      Method bestMatch = null;
+      for (Method m: udfMethods) {
+        currentScore = 0;
+        List<TypeInfo> argumentsAccepted =
+            TypeInfoUtils.getParameterTypeInfos(m, argumentsPassed.size());
+        Iterator<TypeInfo> argsPassedIter = argumentsPassed.iterator();
+        for (TypeInfo acceptedType : argumentsAccepted) {
+          // Check the affinity of the argument passed in with the accepted argument,
+          // based on the PrimitiveGrouping
+          TypeInfo passedType = argsPassedIter.next();
+          if (acceptedType.getCategory() == Category.PRIMITIVE
+              && passedType.getCategory() == Category.PRIMITIVE) {
+            PrimitiveGrouping acceptedPg = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(
+                ((PrimitiveTypeInfo) acceptedType).getPrimitiveCategory());
+            PrimitiveGrouping passedPg = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(
+                ((PrimitiveTypeInfo) passedType).getPrimitiveCategory());
+            if (acceptedPg == passedPg) {
+              // The passed argument matches somewhat closely with an accepted argument
+              ++currentScore;
+            }
+          }
+        }
+        // Check if the score for this method is any better relative to others
+        if (currentScore > bestMatchScore) {
+          bestMatchScore = currentScore;
+          bestMatch = m;
+        } else if (currentScore == bestMatchScore) {
+          bestMatch = null; // no longer a best match if more than one.
+        }
+      }
+
+      if (bestMatch != null) {
+        // Found a best match during this processing, use it.
+        udfMethods.clear();
+        udfMethods.add(bestMatch);
+      }
+    }
+  }
+
+  /**
    * Gets the closest matching method corresponding to the argument list from a
    * list of methods.
    *
@@ -1025,6 +1081,13 @@ public final class FunctionRegistry {
       // No matching methods found
       throw new NoMatchingMethodException(udfClass, argumentsPassed, mlist);
     }
+
+    if (udfMethods.size() > 1) {
+      // First try selecting methods based on the type affinity of the arguments passed
+      // to the candidate method arguments.
+      filterMethodsByTypeAffinity(udfMethods, argumentsPassed);
+    }
+
     if (udfMethods.size() > 1) {
 
       // if the only difference is numeric types, pick the method

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java?rev=1520413&r1=1520412&r2=1520413&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java Thu Sep
 5 20:32:58 2013
@@ -18,7 +18,9 @@
 
 package org.apache.hadoop.hive.ql.exec;
 
+import java.lang.reflect.Type;
 import java.lang.reflect.Method;
+import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
 
@@ -33,6 +35,7 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
 
 public class TestFunctionRegistry extends TestCase {
 
@@ -45,6 +48,11 @@ public class TestFunctionRegistry extend
     public void mismatch(DateWritable x, HiveDecimalWritable y) {}
     public void mismatch(TimestampWritable x, HiveDecimalWritable y) {}
     public void mismatch(BytesWritable x, DoubleWritable y) {}
+    public void typeaffinity1(DateWritable x) {}
+    public void typeaffinity1(DoubleWritable x) {};
+    public void typeaffinity1(Text x) {}
+    public void typeaffinity2(IntWritable x) {}
+    public void typeaffinity2(DoubleWritable x) {}
   }
 
   @Override
@@ -64,6 +72,52 @@ public class TestFunctionRegistry extend
     implicit(TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.decimalTypeInfo, false);
   }
 
+  private static List<Method> getMethods(Class<?> udfClass, String methodName)
{
+    List<Method> mlist = new ArrayList<Method>();
+
+    for (Method m : udfClass.getMethods()) {
+      if (m.getName().equals(methodName)) {
+        mlist.add(m);
+      }
+    }
+    return mlist;
+  }
+
+  private void typeAffinity(String methodName, TypeInfo inputType,
+      int expectedNumFoundMethods, Class expectedFoundType) {
+    List<Method> mlist = getMethods(TestUDF.class, methodName);
+    assertEquals(true, 1 < mlist.size());
+    List<TypeInfo> inputTypes = new ArrayList<TypeInfo>();
+    inputTypes.add(inputType);
+
+    // narrow down the possible choices based on type affinity
+    FunctionRegistry.filterMethodsByTypeAffinity(mlist, inputTypes);
+    assertEquals(expectedNumFoundMethods, mlist.size());
+    if (expectedNumFoundMethods == 1) {
+      assertEquals(expectedFoundType, mlist.get(0).getParameterTypes()[0]);
+    }
+  }
+
+  public void testTypeAffinity() {
+    // Prefer numeric type arguments over other method signatures
+    typeAffinity("typeaffinity1", TypeInfoFactory.shortTypeInfo, 1, DoubleWritable.class);
+    typeAffinity("typeaffinity1", TypeInfoFactory.intTypeInfo, 1, DoubleWritable.class);
+    typeAffinity("typeaffinity1", TypeInfoFactory.floatTypeInfo, 1, DoubleWritable.class);
+
+    // Prefer date type arguments over other method signatures
+    typeAffinity("typeaffinity1", TypeInfoFactory.dateTypeInfo, 1, DateWritable.class);
+    typeAffinity("typeaffinity1", TypeInfoFactory.timestampTypeInfo, 1, DateWritable.class);
+
+    // String type affinity
+    typeAffinity("typeaffinity1", TypeInfoFactory.stringTypeInfo, 1, Text.class);
+
+    // Type affinity does not help when multiple methods have the same type affinity.
+    typeAffinity("typeaffinity2", TypeInfoFactory.shortTypeInfo, 2, null);
+
+    // Type affinity does not help when type affinity does not match input args
+    typeAffinity("typeaffinity2", TypeInfoFactory.dateTypeInfo, 2, null);
+  }
+
   private void verify(Class udf, String name, TypeInfo ta, TypeInfo tb,
                       Class a, Class b, boolean throwException) {
     List<TypeInfo> args = new LinkedList<TypeInfo>();

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java?rev=1520413&r1=1520412&r2=1520413&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
(original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java
Thu Sep  5 20:32:58 2013
@@ -952,6 +952,44 @@ public final class PrimitiveObjectInspec
     return t == null ? null : t.primitiveJavaClass;
   }
 
+  /**
+   * Provide a general grouping for each primitive data type.
+   */
+  public static enum PrimitiveGrouping {
+    NUMERIC_GROUP, STRING_GROUP, BOOLEAN_GROUP, DATE_GROUP, BINARY_GROUP, UNKNOWN_GROUP
+  };
+
+  /**
+   * Based on the PrimitiveCategory of a type, return the PrimitiveGrouping
+   * that the PrimitiveCategory belongs to (numeric, string, date, etc).
+   * @param primitiveCategory Primitive category of the type
+   * @return PrimitveGrouping corresponding to the PrimitiveCategory,
+   *         or UNKNOWN_GROUP if the type does not match to a grouping.
+   */
+  public static PrimitiveGrouping getPrimitiveGrouping(PrimitiveCategory primitiveCategory)
{
+    switch (primitiveCategory) {
+      case BYTE:
+      case SHORT:
+      case INT:
+      case LONG:
+      case FLOAT:
+      case DOUBLE:
+      case DECIMAL:
+        return PrimitiveGrouping.NUMERIC_GROUP;
+      case STRING:
+        return PrimitiveGrouping.STRING_GROUP;
+      case BOOLEAN:
+        return PrimitiveGrouping.BOOLEAN_GROUP;
+      case TIMESTAMP:
+      case DATE:
+        return PrimitiveGrouping.DATE_GROUP;
+      case BINARY:
+        return PrimitiveGrouping.BINARY_GROUP;
+      default:
+        return PrimitiveGrouping.UNKNOWN_GROUP;
+    }
+  }
+
   private PrimitiveObjectInspectorUtils() {
     // prevent instantiation
   }

Added: hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java?rev=1520413&view=auto
==============================================================================
--- hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java
(added)
+++ hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/primitive/TestPrimitiveObjectInspectorUtils.java
Thu Sep  5 20:32:58 2013
@@ -0,0 +1,45 @@
+package org.apache.hadoop.hive.serde2.objectinspector.primitive;
+
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
+
+import junit.framework.TestCase;
+
+public class TestPrimitiveObjectInspectorUtils extends TestCase {
+
+  public void testGetPrimitiveGrouping() {
+    assertEquals(PrimitiveGrouping.NUMERIC_GROUP,
+        PrimitiveObjectInspectorUtils.getPrimitiveGrouping(PrimitiveCategory.BYTE));
+    assertEquals(PrimitiveGrouping.NUMERIC_GROUP,
+        PrimitiveObjectInspectorUtils.getPrimitiveGrouping(PrimitiveCategory.SHORT));
+    assertEquals(PrimitiveGrouping.NUMERIC_GROUP,
+        PrimitiveObjectInspectorUtils.getPrimitiveGrouping(PrimitiveCategory.INT));
+    assertEquals(PrimitiveGrouping.NUMERIC_GROUP,
+        PrimitiveObjectInspectorUtils.getPrimitiveGrouping(PrimitiveCategory.LONG));
+    assertEquals(PrimitiveGrouping.NUMERIC_GROUP,
+        PrimitiveObjectInspectorUtils.getPrimitiveGrouping(PrimitiveCategory.FLOAT));
+    assertEquals(PrimitiveGrouping.NUMERIC_GROUP,
+        PrimitiveObjectInspectorUtils.getPrimitiveGrouping(PrimitiveCategory.DOUBLE));
+    assertEquals(PrimitiveGrouping.NUMERIC_GROUP,
+        PrimitiveObjectInspectorUtils.getPrimitiveGrouping(PrimitiveCategory.DECIMAL));
+
+    assertEquals(PrimitiveGrouping.STRING_GROUP,
+        PrimitiveObjectInspectorUtils.getPrimitiveGrouping(PrimitiveCategory.STRING));
+
+    assertEquals(PrimitiveGrouping.DATE_GROUP,
+        PrimitiveObjectInspectorUtils.getPrimitiveGrouping(PrimitiveCategory.DATE));
+    assertEquals(PrimitiveGrouping.DATE_GROUP,
+        PrimitiveObjectInspectorUtils.getPrimitiveGrouping(PrimitiveCategory.TIMESTAMP));
+
+    assertEquals(PrimitiveGrouping.BOOLEAN_GROUP,
+        PrimitiveObjectInspectorUtils.getPrimitiveGrouping(PrimitiveCategory.BOOLEAN));
+
+    assertEquals(PrimitiveGrouping.BINARY_GROUP,
+        PrimitiveObjectInspectorUtils.getPrimitiveGrouping(PrimitiveCategory.BINARY));
+
+    assertEquals(PrimitiveGrouping.UNKNOWN_GROUP,
+        PrimitiveObjectInspectorUtils.getPrimitiveGrouping(PrimitiveCategory.UNKNOWN));
+    assertEquals(PrimitiveGrouping.UNKNOWN_GROUP,
+        PrimitiveObjectInspectorUtils.getPrimitiveGrouping(PrimitiveCategory.VOID));
+  }
+}



Mime
View raw message