hive-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Navis류승우 <navis....@nexr.com>
Subject Re: A GenericUDF Function to Extract a Field From an Array of Structs
Date Wed, 03 Apr 2013 01:07:49 GMT
try to change codes in evaluate method like,

for (int i = 0; i < numElements; i++) {
      Object element = listOI.getListElement(arguments[0].get(), i);
      Object product = structOI.getStructFieldData(element,
structOI.getStructFieldRef("productCategory"));
      ret.add(((PrimitiveObjectInspector)prodCatOI).getPrimitiveWritableObject(product));
}

2013/3/29 Peter Chu <pete.chu@outlook.com>:
> Sorry, the test should be following (changed extract_shas to
> extract_product_category):
>
> import org.apache.hadoop.hive.ql.metadata.HiveException;
> import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
> import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
> import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
> import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
> import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
> import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
> import
> org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
> import org.testng.annotations.Test;
>
> import java.util.ArrayList;
> import java.util.List;
>
> public class TestGenericUDFExtractProductCategory
> {
>     ArrayList<String> fieldNames = new ArrayList<String>();
>     ArrayList<ObjectInspector> fieldObjectInspectors = new
> ArrayList<ObjectInspector>();
>
>     @Test
>     public void simpleTest()
>         throws Exception
>     {
>         ListObjectInspector firstInspector = new MyListObjectInspector();
>
>         ArrayList test = new ArrayList();
>         test.add("test");
>
>         ArrayList test2 = new ArrayList();
>         test2.add(test);
>
>         StructObjectInspector soi =
> ObjectInspectorFactory.getStandardStructObjectInspector(test, test2);
>
>         fieldNames.add("productCategory");
>
> fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
>
>         GenericUDF.DeferredObject firstDeferredObject = new
> MyDeferredObject(test2);
>
>         GenericUDF extract_product_category = new
> GenericUDFExtractProductCategory();
>
>         extract_product_category.initialize(new
> ObjectInspector[]{firstInspector});
>
>         extract_product_category.evaluate(new
> DeferredObject[]{firstDeferredObject});
>     }
>
>     public class MyDeferredObject implements DeferredObject
>     {
>         private Object value;
>
>         public MyDeferredObject(Object value) {
>             this.value = value;
>         }
>
>         @Override
>         public Object get() throws HiveException
>         {
>             return value;
>         }
>     }
>
>     private class MyListObjectInspector implements ListObjectInspector
>     {
>         @Override
>         public ObjectInspector getListElementObjectInspector()
>         {
>             return
> ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,
> fieldObjectInspectors);
>         }
>
>         @Override
>         public Object getListElement(Object data, int index)
>         {
>             List myList = (List) data;
>             if (myList == null || index > myList.size()) {
>                 return null;
>             }
>             return myList.get(index);
>         }
>
>         @Override
>         public int getListLength(Object data)
>         {
>             if (data == null) {
>                 return -1;
>             }
>             return ((List) data).size();
>         }
>
>         @Override
>         public List<?> getList(Object data)
>         {
>             return (List) data;
>         }
>
>         @Override
>         public String getTypeName()
>         {
>             return null;  //To change body of implemented methods use File |
> Settings | File Templates.
>         }
>
>         @Override
>         public Category getCategory()
>         {
>             return Category.LIST;
>         }
>     }
> }
>
> ________________________________
> From: pete.chu@outlook.com
> To: user@hive.apache.org
> Subject: A GenericUDF Function to Extract a Field From an Array of Structs
> Date: Thu, 28 Mar 2013 14:16:33 -0700
>
> I am trying to write a GenericUDF function to collect all of a specific
> struct field(s) within an array for each record, and return them in an array
> as well.
>
> I wrote the UDF (as below), and it seems to work but:
>
> 1) It does not work when I am performing this on an external table, it works
> fine on a managed table, any idea?
>
> 2) I am having a tough time writing a test on this.  I have attached the
> test I have so far, and it does not work,
> always getting 'java.util.ArrayList cannot be cast to
> org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector' or cannot
> cast String to LazyString',
> my question is how do I supply a list of structs for the evalue method?
>
> Any help will be greatly appreciated.
>
> Thanks,
> Peter
>
> The table:
>
> CREATE EXTERNAL TABLE FOO (
>   TS string,
>   customerId string,
>   products array< struct<productCategory:string> >
> )
> PARTITIONED BY (ds string)
> ROW FORMAT SERDE 'some.serde'
> WITH SERDEPROPERTIES ('error.ignore'='true')
> LOCATION 'some_locations'
> ;
>
> A row of record holds:
> 1340321132000, 'some_company',
> [{"productCategory":"footwear"},{"productCategory":"eyewear"}]
>
> This is my code:
>
> import org.apache.hadoop.hive.ql.exec.Description;
> import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
> import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
> import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
> import org.apache.hadoop.hive.ql.metadata.HiveException;
> import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
> import org.apache.hadoop.hive.serde2.lazy.LazyString;
> import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
> import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
> import
> org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
> import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
> import
> org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
> import org.apache.hadoop.hive.serde2.objectinspector.StructField;
> import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
> import
> org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
> import
> org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
> import org.apache.hadoop.io.Text;
>
> import java.util.ArrayList;
>
> @Description(name = "extract_product_category",
>         value = "_FUNC_( array< struct<productCategory:string> > ) - Collect
> all product category field values inside an array of struct(s), and return
> the results in an array<string>",
>         extended = "Example:\n SELECT
> _FUNC_(array_of_structs_with_product_category_field)")
> public class GenericUDFExtractProductCategory
>         extends GenericUDF
> {
>     private ArrayList ret;
>
>     private ListObjectInspector listOI;
>     private StructObjectInspector structOI;
>     private ObjectInspector prodCatOI;
>
>     @Override
>     public ObjectInspector initialize(ObjectInspector[] args)
>             throws UDFArgumentException
>     {
>         if (args.length != 1) {
>             throw new UDFArgumentLengthException("The function
> extract_product_category() requires exactly one argument.");
>         }
>
>         if (args[0].getCategory() != Category.LIST) {
>             throw new UDFArgumentTypeException(0, "Type array<struct> is
> expected to be the argument for extract_product_category but " +
> args[0].getTypeName() + " is found instead");
>         }
>
>         listOI = ((ListObjectInspector) args[0]);
>         structOI = ((StructObjectInspector)
> listOI.getListElementObjectInspector());
>
>         if (structOI.getAllStructFieldRefs().size() != 1) {
>             throw new UDFArgumentTypeException(0, "Incorrect number of
> fields in the struct, should be one");
>         }
>
>         StructField productCategoryField =
> structOI.getStructFieldRef("productCategory");
>         //If not, throw exception
>         if (productCategoryField == null) {
>             throw new UDFArgumentTypeException(0, "NO \"productCategory\"
> field in input structure");
>         }
>
>         //Are they of the correct types?
>         //We store these object inspectors for use in the evaluate() method
>         prodCatOI = productCategoryField.getFieldObjectInspector();
>
>         //First are they primitives
>         if (prodCatOI.getCategory() != Category.PRIMITIVE) {
>             throw new UDFArgumentTypeException(0, "productCategory field
> must be of string type");
>         }
>
>         //Are they of the correct primitives?
>         if (((PrimitiveObjectInspector)prodCatOI).getPrimitiveCategory() !=
> PrimitiveObjectInspector.PrimitiveCategory.STRING) {
>             throw new UDFArgumentTypeException(0, "productCategory field
> must be of string type");
>         }
>
>         ret = new ArrayList();
>
>         return
> ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
>     }
>
>     @Override
>     public ArrayList evaluate(DeferredObject[] arguments)
>             throws HiveException
>     {
>         ret.clear();
>
>         if (arguments.length != 1) {
>             return null;
>         }
>
>         if (arguments[0].get() == null) {
>         return null;
>         }
>
>         int numElements = listOI.getListLength(arguments[0].get());
>
>         for (int i = 0; i < numElements; i++) {
>             LazyString prodCatDataObject = (LazyString)
> (structOI.getStructFieldData(listOI.getListElement(arguments[0].get(), i),
> structOI.getStructFieldRef("productCategory")));
>             Text productCategoryValue = ((StringObjectInspector)
> prodCatOI).getPrimitiveWritableObject(prodCatDataObject);
>             ret.add(productCategoryValue);
>         }
>         return ret;
>     }
>
>     @Override
>     public String getDisplayString(String[] strings)
>     {
>         assert (strings.length > 0);
>         StringBuilder sb = new StringBuilder();
>         sb.append("extract_product_category(");
>         sb.append(strings[0]);
>         sb.append(")");
>         return sb.toString();
>     }
> }
>
>
> My Test:
>
> import org.apache.hadoop.hive.ql.metadata.HiveException;
> import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
> import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
> import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
> import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
> import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
> import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
> import
> org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
> import org.testng.annotations.Test;
>
> import java.util.ArrayList;
> import java.util.List;
>
> public class TestGenericUDFExtractShas
> {
>     ArrayList<String> fieldNames = new ArrayList<String>();
>     ArrayList<ObjectInspector> fieldObjectInspectors = new
> ArrayList<ObjectInspector>();
>
>     @Test
>     public void simpleTest()
>         throws Exception
>     {
>         ListObjectInspector firstInspector = new MyListObjectInspector();
>
>         ArrayList test = new ArrayList();
>         test.add("test");
>
>         ArrayList test2 = new ArrayList();
>         test2.add(test);
>
>         StructObjectInspector soi =
> ObjectInspectorFactory.getStandardStructObjectInspector(test, test2);
>
>         fieldNames.add("productCategory");
>
> fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
>
>         GenericUDF.DeferredObject firstDeferredObject = new
> MyDeferredObject(test2);
>
>         GenericUDF extract_shas = new GenericUDFExtractShas();
>
>         extract_shas.initialize(new ObjectInspector[]{firstInspector});
>
>         extract_shas.evaluate(new DeferredObject[]{firstDeferredObject});
>     }
>
>     public class MyDeferredObject implements DeferredObject
>     {
>         private Object value;
>
>         public MyDeferredObject(Object value) {
>             this.value = value;
>         }
>
>         @Override
>         public Object get() throws HiveException
>         {
>             return value;
>         }
>     }
>
>     private class MyListObjectInspector implements ListObjectInspector
>     {
>         @Override
>         public ObjectInspector getListElementObjectInspector()
>         {
>             return
> ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,
> fieldObjectInspectors);
>         }
>
>         @Override
>         public Object getListElement(Object data, int index)
>         {
>             List myList = (List) data;
>             if (myList == null || index > myList.size()) {
>                 return null;
>             }
>             return myList.get(index);
>         }
>
>         @Override
>         public int getListLength(Object data)
>         {
>             if (data == null) {
>                 return -1;
>             }
>             return ((List) data).size();
>         }
>
>         @Override
>         public List<?> getList(Object data)
>         {
>             return (List) data;
>         }
>
>         @Override
>         public String getTypeName()
>         {
>             return null;  //To change body of implemented methods use File |
> Settings | File Templates.
>         }
>
>         @Override
>         public Category getCategory()
>         {
>             return Category.LIST;
>         }
>     }
> }

Mime
View raw message