incubator-hcatalog-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ga...@apache.org
Subject svn commit: r1239731 - in /incubator/hcatalog/trunk: ./ src/java/org/apache/hcatalog/data/ src/test/org/apache/hcatalog/data/
Date Thu, 02 Feb 2012 17:20:42 GMT
Author: gates
Date: Thu Feb  2 17:20:41 2012
New Revision: 1239731

URL: http://svn.apache.org/viewvc?rev=1239731&view=rev
Log:
HCATALOG-204 HCatRecord SerDe

Added:
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java
    incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestHCatRecordSerDe.java
Modified:
    incubator/hcatalog/trunk/CHANGES.txt
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/DefaultHCatRecord.java
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecord.java

Modified: incubator/hcatalog/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/CHANGES.txt?rev=1239731&r1=1239730&r2=1239731&view=diff
==============================================================================
--- incubator/hcatalog/trunk/CHANGES.txt (original)
+++ incubator/hcatalog/trunk/CHANGES.txt Thu Feb  2 17:20:41 2012
@@ -23,6 +23,20 @@ Trunk (unreleased changes)
   INCOMPATIBLE CHANGES
 
   NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
+Release 0.3.0 - Unreleased
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+  HCAT-204. HCatRecord SerDe (kgorath via gates)
+
   HCAT-192. HBase output storage driver integration with zookeeper based revision manager
(toffer via hashutosh) 
 
   HCAT-191. HBase input storage driver integration with zookeeper based revision manager.
(avandana via toffer)
@@ -201,7 +215,7 @@ Trunk (unreleased changes)
 
   HCAT-115. Superfluous warning on fresh install (ctdean via khorgath)
 
-Release 0.2.0 - Unreleased
+Release 0.2.0 - October 2, 2011
 
   INCOMPATIBLE CHANGES
 
@@ -210,7 +224,7 @@ Release 0.2.0 - Unreleased
  
     HCAT-46. Send a message on a message bus when a partition is marked done (hashutosh via
macyang)
 
-    HCAT-3. Send a message on a message bus when events occur in Metastore (hashutosh)
+    HCAT-2. Send a message on a message bus when events occur in Metastore (hashutosh)
   
     HCAT-16. Add InputFormat/OutputFormat for handling exported tables/partitions.
     (Krishna Kumar via macyang)

Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/DefaultHCatRecord.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/DefaultHCatRecord.java?rev=1239731&r1=1239730&r2=1239731&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/DefaultHCatRecord.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/DefaultHCatRecord.java Thu
Feb  2 17:20:41 2012
@@ -29,7 +29,7 @@ import org.apache.hcatalog.data.schema.H
 
 public class DefaultHCatRecord extends HCatRecord {
 
-    private final List<Object> contents;
+    private List<Object> contents;
 
     public DefaultHCatRecord(){
         contents = new ArrayList<Object>();
@@ -150,4 +150,9 @@ public class DefaultHCatRecord extends H
         set(recordSchema.getPosition(fieldName),value);
     }
 
+    @Override
+    public void copy(HCatRecord r) throws HCatException {
+      this.contents = r.getAll();
+    }
+
 }

Modified: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecord.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecord.java?rev=1239731&r1=1239730&r2=1239731&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecord.java (original)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecord.java Thu Feb  2
17:20:41 2012
@@ -35,6 +35,7 @@ public abstract class HCatRecord impleme
     public abstract Object get(String fieldName, HCatSchema recordSchema) throws HCatException;
     public abstract void set(String fieldName, HCatSchema recordSchema, Object value ) throws
HCatException;
     public abstract void remove(int idx) throws HCatException;
+    public abstract void copy(HCatRecord r) throws HCatException;
 
     protected Object get(String fieldName, HCatSchema recordSchema, Class clazz) throws HCatException{
         // TODO : if needed, verify that recordschema entry for fieldname matches appropriate
type.

Added: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java?rev=1239731&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java
(added)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java
Thu Feb  2 17:20:41 2012
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hcatalog.data;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+
+public class HCatRecordObjectInspector extends StandardStructObjectInspector {
+
+  protected HCatRecordObjectInspector(List<String> structFieldNames,
+      List<ObjectInspector> structFieldObjectInspectors) {
+    super(structFieldNames, structFieldObjectInspectors);
+  }
+  
+  @Override
+  public Object getStructFieldData(Object data, StructField fieldRef) {
+    if (data == null){
+      return null;
+    }
+    
+    int fieldID = ((MyField) fieldRef).getFieldID();
+    assert (fieldID >= 0 && fieldID < fields.size());
+    
+    return ((HCatRecord) data).get(fieldID);
+  }
+  
+  @Override
+  public List<Object> getStructFieldsDataAsList(Object o) {
+    return ((HCatRecord) o).getAll();
+  }
+
+}

Added: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java?rev=1239731&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java
(added)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java
Thu Feb  2 17:20:41 2012
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hcatalog.data;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+/**
+ * ObjectInspectorFactory for HCatRecordObjectInspectors (and associated helper inspectors)
+ */
+public class HCatRecordObjectInspectorFactory {
+  
+  public static final Log LOG = LogFactory
+      .getLog(HCatRecordObjectInspectorFactory.class.getName());
+
+  static HashMap<TypeInfo, HCatRecordObjectInspector> cachedHCatRecordObjectInspectors
= 
+      new HashMap<TypeInfo, HCatRecordObjectInspector>();
+  static HashMap<TypeInfo, ObjectInspector> cachedObjectInspectors = 
+      new HashMap<TypeInfo, ObjectInspector>();
+
+  /**
+   * Returns HCatRecordObjectInspector given a StructTypeInfo type definition for the record
to look into
+   * @param typeInfo Type definition for the record to look into 
+   * @return appropriate HCatRecordObjectInspector
+   * @throws SerDeException
+   */
+  public static HCatRecordObjectInspector getHCatRecordObjectInspector(
+      StructTypeInfo typeInfo) throws SerDeException {
+    HCatRecordObjectInspector oi = cachedHCatRecordObjectInspectors.get(typeInfo);
+    if (oi == null) {
+      LOG.debug("Got asked for OI for "+typeInfo.getCategory()+"["+typeInfo.getTypeName()+"]");
+
+      switch (typeInfo.getCategory()) {
+      case STRUCT :
+        StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
+        List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+        List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+        List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
+        for (int i = 0; i < fieldTypeInfos.size(); i++) {
+          fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i)));
+        }
+        oi = new HCatRecordObjectInspector(fieldNames,fieldObjectInspectors);
+        break;
+      default: 
+        // Hmm.. not good, 
+        // the only type expected here is STRUCT, which maps to HCatRecord 
+        // - anything else is an error. Return null as the inspector.
+        throw new SerDeException("TypeInfo ["+typeInfo.getTypeName()
+            + "] was not of struct type - HCatRecord expected struct type, got ["
+            + typeInfo.getCategory().toString()+"]");
+      }
+      cachedHCatRecordObjectInspectors.put(typeInfo, oi);
+    }
+    return oi;
+  }
+
+  public static ObjectInspector getStandardObjectInspectorFromTypeInfo(TypeInfo typeInfo)
{
+    
+
+    ObjectInspector oi = cachedObjectInspectors.get(typeInfo);
+    if (oi == null){
+      LOG.debug("Got asked for OI for "+typeInfo.getCategory()+"["+typeInfo.getTypeName()+"]");
+
+      switch (typeInfo.getCategory()) {
+      case PRIMITIVE:
+        oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
+            ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
+        break;
+      case STRUCT:
+        StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
+        List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+        List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+        List<ObjectInspector> fieldObjectInspectors = 
+            new ArrayList<ObjectInspector>(fieldTypeInfos.size());
+        for (int i = 0; i < fieldTypeInfos.size(); i++) {
+          fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i)));
+        }
+        oi = ObjectInspectorFactory.getStandardStructObjectInspector(
+            fieldNames, fieldObjectInspectors
+            );
+        break;
+      case LIST:
+        ObjectInspector elementObjectInspector = getStandardObjectInspectorFromTypeInfo(
+            ((ListTypeInfo)typeInfo).getListElementTypeInfo());
+        oi = ObjectInspectorFactory.getStandardListObjectInspector(elementObjectInspector);
+        break;
+      case MAP:
+        ObjectInspector keyObjectInspector = getStandardObjectInspectorFromTypeInfo(
+            ((MapTypeInfo)typeInfo).getMapKeyTypeInfo());
+        ObjectInspector valueObjectInspector = getStandardObjectInspectorFromTypeInfo(
+            ((MapTypeInfo)typeInfo).getMapValueTypeInfo());
+        oi = ObjectInspectorFactory.getStandardMapObjectInspector(keyObjectInspector,valueObjectInspector);
+        break;
+      default:
+        oi = null;
+      }
+      cachedObjectInspectors.put(typeInfo, oi);
+    }
+    return oi;
+  }
+  
+  
+}

Added: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java?rev=1239731&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java (added)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/HCatRecordSerDe.java Thu Feb
 2 17:20:41 2012
@@ -0,0 +1,277 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hcatalog.data;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.TreeMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.Writable;
+import org.apache.hcatalog.common.HCatUtil;
+
+/**
+ * SerDe class for serializing to and from HCatRecord
+ */
+public class HCatRecordSerDe implements SerDe {
+  
+  public static final Log LOG = LogFactory
+      .getLog(HCatRecordSerDe.class.getName());
+
+  public HCatRecordSerDe() throws SerDeException{
+  }
+  
+  private List<String> columnNames;
+  private List<TypeInfo> columnTypes;
+  private StructTypeInfo rowTypeInfo;
+
+  private HCatRecordObjectInspector cachedObjectInspector;
+  
+  @Override
+  public void initialize(Configuration conf, Properties tbl)
+      throws SerDeException {
+
+    if (LOG.isDebugEnabled()){
+      LOG.debug("Initializing HCatRecordSerDe");
+      HCatUtil.logEntrySet(LOG, "props to serde", tbl.entrySet());
+    }
+    
+    // Get column names and types
+    String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
+    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
+    
+    // all table column names
+    if (columnNameProperty.length() == 0) {
+      columnNames = new ArrayList<String>();
+    } else {
+      columnNames = Arrays.asList(columnNameProperty.split(","));
+    }
+    
+    // all column types
+    if (columnTypeProperty.length() == 0) {
+      columnTypes = new ArrayList<TypeInfo>();
+    } else {
+      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+    }
+
+    if (LOG.isDebugEnabled()){
+      LOG.debug("columns:" + columnNameProperty);
+      for (String s : columnNames){
+        LOG.debug("cn:"+s);
+      }
+      LOG.debug("types: " + columnTypeProperty);
+      for (TypeInfo t : columnTypes){
+        LOG.debug("ct:"+t.getTypeName()+",type:"+t.getCategory());
+      }
+    }
+  
+    
+    assert (columnNames.size() == columnTypes.size());
+    
+    rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
+    
+    cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
+    
+  }
+
+  /**
+   * The purpose of a deserialize method is to turn a data blob 
+   * which is a writable representation of the data into an
+   * object that can then be parsed using the appropriate
+   * ObjectInspector. In this case, since HCatRecord is directly
+   * already the Writable object, there's no extra work to be done
+   * here. Most of the logic resides in the ObjectInspector to be
+   * able to return values from within the HCatRecord to hive when
+   * it wants it.
+   */
+  @Override
+  public Object deserialize(Writable data) throws SerDeException {
+    if (!(data instanceof HCatRecord)) {
+      throw new SerDeException(getClass().getName() + ": expects HCatRecord!");
+    }
+
+    return (HCatRecord) data;
+  }
+
+  /**
+   * The purpose of the serialize method is to turn an object-representation
+   * with a provided ObjectInspector into a Writable format, which
+   * the underlying layer can then use to write out.
+   * 
+   * In this case, it means that Hive will call this method to convert
+   * an object with appropriate objectinspectors that it knows about, 
+   * to write out a HCatRecord. 
+   */
+  @Override
+  public Writable serialize(Object obj, ObjectInspector objInspector)
+      throws SerDeException {
+    if (objInspector.getCategory() != Category.STRUCT) {
+      throw new SerDeException(getClass().toString()
+          + " can only serialize struct types, but we got: "
+          + objInspector.getTypeName());
+    }
+    return new DefaultHCatRecord((List<Object>)serializeStruct(obj,(StructObjectInspector)objInspector));
+  }
+
+  
+  /**
+   * Return serialized HCatRecord from an underlying 
+   * object-representation, and readable by an ObjectInspector
+   * @param obj : Underlying object-representation
+   * @param soi : StructObjectInspector 
+   * @return HCatRecord
+   */
+  private List<?> serializeStruct(Object obj, StructObjectInspector soi)
+      throws SerDeException {
+
+    List<? extends StructField> fields = soi.getAllStructFieldRefs();
+    List<Object> list = soi.getStructFieldsDataAsList(obj);
+
+    List<Object> l = new ArrayList<Object>(fields.size());
+
+    if (fields != null){
+      for (int i = 0; i < fields.size(); i++) {
+
+        // Get the field objectInspector and the field object.
+        ObjectInspector foi = fields.get(i).getFieldObjectInspector();
+        Object f = (list == null ? null : list.get(i));
+        Object res = serializeField(f, foi);
+        l.add(i, res);
+      }
+    }
+    return l;
+  }
+
+  /**
+   * Return underlying Java Object from an object-representation 
+   * that is readable by a provided ObjectInspector.
+   */
+  private Object serializeField(Object field,
+      ObjectInspector fieldObjectInspector) throws SerDeException {
+    Object res = null;
+    if (fieldObjectInspector.getCategory() == Category.PRIMITIVE){
+      res = ((PrimitiveObjectInspector)fieldObjectInspector).getPrimitiveJavaObject(field);
+    } else if (fieldObjectInspector.getCategory() == Category.STRUCT){
+      res = serializeStruct(field,(StructObjectInspector)fieldObjectInspector);
+    } else if (fieldObjectInspector.getCategory() == Category.LIST){
+      res = serializeList(field,(ListObjectInspector)fieldObjectInspector);
+    } else if (fieldObjectInspector.getCategory() == Category.MAP){
+      res = serializeMap(field,(MapObjectInspector)fieldObjectInspector);
+    } else {
+      throw new SerDeException(getClass().toString() 
+          + " does not know what to do with fields of unknown category: "
+          + fieldObjectInspector.getCategory() + " , type: " + fieldObjectInspector.getTypeName());
+    }
+    return res;
+  }
+
+  /**
+   * Helper method to return underlying Java Map from
+   * an object-representation that is readable by a provided
+   * MapObjectInspector
+   */
+  private Map<?,?> serializeMap(Object f, MapObjectInspector moi) throws SerDeException
{
+    ObjectInspector koi = moi.getMapKeyObjectInspector();
+    ObjectInspector voi = moi.getMapValueObjectInspector();
+    Map<Object,Object> m = new TreeMap<Object, Object>();
+
+    Map<?, ?> readMap = moi.getMap(f);
+    if (readMap == null) {
+      return null;
+    } else {
+      for (Map.Entry<?, ?> entry: readMap.entrySet()) {
+        m.put(serializeField(entry.getKey(),koi), serializeField(entry.getValue(),voi));
+      }
+    }
+    return m;
+  }
+
+  private List<?> serializeList(Object f, ListObjectInspector loi) throws SerDeException
{
+    List l = loi.getList(f);
+    ObjectInspector eloi = loi.getListElementObjectInspector();
+    if (eloi.getCategory() == Category.PRIMITIVE){
+      return l;
+    } else if (eloi.getCategory() == Category.STRUCT){
+      List<List<?>> list = new ArrayList<List<?>>(l.size());
+      for (int i = 0 ; i < l.size() ; i++ ){
+        list.add(serializeStruct(l.get(i), (StructObjectInspector) eloi));
+      }
+      return list;
+    } else if (eloi.getCategory() == Category.LIST){
+      List<List<?>> list = new ArrayList<List<?>>(l.size());
+      for (int i = 0 ; i < l.size() ; i++ ){
+        list.add(serializeList(l.get(i), (ListObjectInspector) eloi));
+      }
+    } else if (eloi.getCategory() == Category.MAP){
+      List<Map<?,?>> list = new ArrayList<Map<?,?>>(l.size());
+      for (int i = 0 ; i < l.size() ; i++ ){
+        list.add(serializeMap(l.get(i), (MapObjectInspector) eloi));
+      }
+      throw new SerDeException("HCatSerDe map type unimplemented");
+    } else {
+      throw new SerDeException(getClass().toString() 
+          + " does not know what to do with fields of unknown category: "
+          + eloi.getCategory() + " , type: " + eloi.getTypeName());
+    }
+    return l;
+  }
+
+  /**
+   * Return an object inspector that can read through the object 
+   * that we return from deserialize(). To wit, that means we need 
+   * to return an ObjectInspector that can read HCatRecord, given
+   * the type info for it during initialize(). This also means
+   * that this method cannot and should not be called before initialize()
+   */
+  @Override
+  public ObjectInspector getObjectInspector() throws SerDeException {
+    return (ObjectInspector) cachedObjectInspector;
+  }
+
+  @Override
+  public Class<? extends Writable> getSerializedClass() {
+    return HCatRecord.class;
+  }
+
+  @Override
+  public SerDeStats getSerDeStats() {
+    // no support for statistics yet
+    return null;
+  }
+
+}

Added: incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestHCatRecordSerDe.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestHCatRecordSerDe.java?rev=1239731&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestHCatRecordSerDe.java (added)
+++ incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestHCatRecordSerDe.java Thu
Feb  2 17:20:41 2012
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hcatalog.data;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.io.Writable;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+public class TestHCatRecordSerDe extends TestCase{
+
+  public Map<Properties,HCatRecord> getData(){
+    Map<Properties,HCatRecord> data = new HashMap<Properties,HCatRecord>();
+
+    List<Object> rlist = new ArrayList<Object>(11);
+    rlist.add(new Byte("123"));
+    rlist.add(new Short("456"));
+    rlist.add(new Integer(789));
+    rlist.add(new Long(1000L));
+    rlist.add(new Double(5.3D));
+    rlist.add(new Float(2.39F));
+    rlist.add(new String("hcat and hadoop"));
+    rlist.add(null);
+
+    List<Object> innerStruct = new ArrayList<Object>(2);
+    innerStruct.add(new String("abc"));
+    innerStruct.add(new String("def"));
+    rlist.add(innerStruct);
+    
+    List<Integer> innerList = new ArrayList<Integer>();
+    innerList.add(314);
+    innerList.add(007);
+    rlist.add(innerList);
+    
+    Map<Short, String> map = new HashMap<Short, String>(3);
+    map.put(new Short("2"), "hcat is cool");
+    map.put(new Short("3"), "is it?");
+    map.put(new Short("4"), "or is it not?");
+    rlist.add(map);
+
+    rlist.add(new Boolean(true));
+    
+    List<Object> c1 = new ArrayList<Object>();
+      List<Object> c1_1 = new ArrayList<Object>();
+      c1_1.add(new Integer(12));
+        List<Object> i2 = new ArrayList<Object>();
+          List<Integer> ii1 = new ArrayList<Integer>();
+            ii1.add(new Integer(13));
+            ii1.add(new Integer(14));
+          i2.add(ii1);
+          Map<String,List<?>> ii2 = new HashMap<String,List<?>>();
+            List<Integer> iii1 = new ArrayList<Integer>();
+              iii1.add(new Integer(15));
+            ii2.put("phew", iii1);
+          i2.add(ii2);
+      c1_1.add(i2);
+      c1.add(c1_1);
+    rlist.add(c1);
+
+    String typeString = 
+        "tinyint,smallint,int,bigint,double,float,string,string,"
+        + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean,"
+        + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>";
+    Properties props = new Properties();
+    
+    props.put(Constants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1");
+    props.put(Constants.LIST_COLUMN_TYPES, typeString);
+
+    data.put(props, new DefaultHCatRecord(rlist));
+    return data;
+  }
+
+  public void testRW() throws Exception {
+
+    Configuration conf = new Configuration();
+
+    for (Entry<Properties,HCatRecord> e : getData().entrySet()){
+      Properties tblProps = e.getKey();
+      HCatRecord r = e.getValue();
+      
+      HCatRecordSerDe hrsd = new HCatRecordSerDe();
+      hrsd.initialize(conf, tblProps);
+
+      System.out.println("ORIG:"+r.toString());
+
+      Writable s = hrsd.serialize(r,hrsd.getObjectInspector());
+      System.out.println("ONE:"+s.toString());
+
+      HCatRecord r2 = (HCatRecord) hrsd.deserialize(s);
+      Assert.assertTrue(r.equals(r2));
+
+      // If it went through correctly, then s is also a HCatRecord, 
+      // and also equal to the above, and a deepcopy, and this holds 
+      // through for multiple levels more of serialization as well.
+
+      Writable s2 = hrsd.serialize(s, hrsd.getObjectInspector());
+      System.out.println("TWO:"+s2.toString());
+      Assert.assertTrue(r.equals((HCatRecord)s));
+      Assert.assertTrue(r.equals((HCatRecord)s2));
+      
+      // serialize using another serde, and read out that object repr.
+      LazySimpleSerDe testSD = new LazySimpleSerDe();
+      testSD.initialize(conf, tblProps);
+      
+      Writable s3 = testSD.serialize(s, hrsd.getObjectInspector());
+      System.out.println("THREE:"+s3.toString());
+      Object o3 = testSD.deserialize(s3);
+      Assert.assertFalse(r.getClass().equals(o3.getClass()));
+
+      // then serialize again using hrsd, and compare results
+      HCatRecord s4 = (HCatRecord) hrsd.serialize(o3, testSD.getObjectInspector());
+      System.out.println("FOUR:"+s4.toString());
+      Assert.assertFalse(r.equals(s4));
+      
+    }
+
+  }
+
+}



Mime
View raw message