hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dhr...@apache.org
Subject svn commit: r696525 [1/2] - in /hadoop/core/trunk: ./ src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ src/contrib/hive/serde/src/test/org/apache/hadoop/hive/s...
Date Thu, 18 Sep 2008 00:09:19 GMT
Author: dhruba
Date: Wed Sep 17 17:09:17 2008
New Revision: 696525

URL: http://svn.apache.org/viewvc?rev=696525&view=rev
Log:
HADOOP-4138. Refactor the Hive SerDe library to better structure
the interfaces to the serializer and de-serializer.
(Zheng Shao via dhruba)


Added:
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStreamTypedSerDe.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ColumnSet.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/MetadataTypedColumnsetSerDe.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDe.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeException.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TReflectionUtils.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftByteStreamTypedSerDe.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftDeserializer.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TypedSerDe.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/InspectableObject.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectInspector.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MapObjectInspector.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MetadataListStructObjectInspector.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardListObjectInspector.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardMapObjectInspector.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardPrimitiveObjectInspector.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardStructObjectInspector.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StructField.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StructObjectInspector.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ThriftStructObjectInspector.java
    hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/UnionStructObjectInspector.java
    hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/
    hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/
    hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/MyStruct.java
    hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java
    hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestReflectionObjectInspectors.java
    hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java
    hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java
    hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestUnionStructObjectInspector.java
    hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/
    hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/Complex.java
    hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/Constants.java
    hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/IntString.java
Modified:
    hadoop/core/trunk/CHANGES.txt

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=696525&r1=696524&r2=696525&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Wed Sep 17 17:09:17 2008
@@ -591,6 +591,10 @@
 
     HADOOP-4197. Update DATA_TRANSFER_VERSION for HADOOP-3981. (szetszwo)
 
+    HADOOP-4138. Refactor the Hive SerDe library to better structure
+    the interfaces to the serializer and de-serializer.
+    (Zheng Shao via dhruba)
+
 Release 0.18.1 - 2008-09-17
 
   IMPROVEMENTS

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.io.*;
+
+/**
+ * Extensions to bytearrayinput/output streams
+ *
+ */
+public class ByteStream {
+  public static class Input extends ByteArrayInputStream {
+    public byte[] getData() { return buf; }
+    public int getCount() { return count;}
+    public void reset(byte [] argBuf, int argCount) {
+      buf = argBuf; mark = pos = 0; count = argCount;
+    }
+    public Input() {
+      super(new byte [1]);
+    }
+
+    public Input(byte[] buf) {
+      super(buf);
+    }
+    public Input(byte[] buf, int offset, int length) {
+      super(buf, offset, length);
+    }
+  }
+    
+  public static class Output extends ByteArrayOutputStream {
+    public byte[] getData() { return buf; }
+    public int getCount() { return count;}
+
+    public Output() { super(); }
+    public Output(int size) { super(size); }
+  }
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStreamTypedSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStreamTypedSerDe.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStreamTypedSerDe.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ByteStreamTypedSerDe.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.lang.reflect.Type;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+public abstract class ByteStreamTypedSerDe extends TypedSerDe {
+
+  protected ByteStream.Input bis;
+  protected ByteStream.Output bos;
+
+  public ByteStreamTypedSerDe(Type objectType) throws SerDeException {
+    super(objectType);
+    bos = new ByteStream.Output();
+    bis = new ByteStream.Input();
+  }
+
+  public Object deserialize(Writable field) throws SerDeException {
+    Object retObj = super.deserialize(field);
+    BytesWritable b = (BytesWritable)field;
+    bis.reset(b.get(), b.getSize());
+    return (retObj);
+  }
+
+
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ColumnSet.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ColumnSet.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ColumnSet.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ColumnSet.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.util.ArrayList;
+
+public class ColumnSet {
+  public ArrayList<String> col;
+
+  public ColumnSet() {
+  }
+
+  public ColumnSet(ArrayList<String> col)
+  {
+    this();
+    this.col = col;
+  }
+
+  public String toString() {
+    return col.toString();
+  }
+  
+}
+

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.conf.Configuration;
+import java.util.Properties;
+
+/**
+ * HiveDeserializer is used to deserialize the data from hadoop Writable to a 
+ * custom java object that can be of any type that the developer wants.
+ * 
+ * HiveDeserializer also provides the ObjectInspector which can be used to inspect 
+ * the internal structure of the object (that is returned by deserialize function).
+ *
+ */
+public interface Deserializer {
+
+  /**
+   * Initialize the HiveDeserializer.
+   * @param conf System properties
+   * @param tbl  table properties
+   * @throws SerDeException
+   */
+  public void initialize(Configuration conf, Properties tbl) throws SerDeException;
+  
+  /**
+   * Deserialize an object out of a Writable blob.
+   * In most cases, the return value of this function will be constant since the function
+   * will reuse the returned object.
+   * If the client wants to keep a copy of the object, the client needs to clone the
+   * returned value by calling ObjectInspectorUtils.getStandardObject().
+   * @param blob The Writable object containing a serialized object
+   * @return A Java object representing the contents in the blob.
+   */
+  public Object deserialize(Writable blob) throws SerDeException;
+
+  /**
+   * Get the object inspector that can be used to navigate through the internal
+   * structure of the Object returned from deserialize(...).
+   */
+  public ObjectInspector getObjectInspector() throws SerDeException;
+
+  public String getShortName();
+  
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/MetadataTypedColumnsetSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/MetadataTypedColumnsetSerDe.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/MetadataTypedColumnsetSerDe.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/MetadataTypedColumnsetSerDe.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,189 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.CharacterCodingException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.objectinspector.MetadataListStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+
+public class MetadataTypedColumnsetSerDe implements SerDe {
+
+  public static final Log LOG = LogFactory.getLog(MetadataTypedColumnsetSerDe.class.getName());
+
+  public String getShortName() {
+    return shortName();
+  }
+
+
+  public static String shortName() {
+    return "simple_meta";
+  }
+
+  static {
+    StackTraceElement[] sTrace = new Exception().getStackTrace();
+    String className = sTrace[0].getClassName();
+    try {
+      SerDeUtils.registerSerDe(shortName(), Class.forName(className));
+      // For backward compatibility: this class replaces the following class.
+      SerDeUtils.registerSerDe("org.apache.hadoop.hive.serde.simple_meta.MetadataTypedColumnsetSerDe", 
+          Class.forName(className));
+    } catch(Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+  
+  final public static String DefaultSeparator = "\001";
+
+  private String separator;
+  // constant for now, will make it configurable later.
+  private String nullString = "\\N"; 
+  private List<String> columnNames;
+  private ObjectInspector cachedObjectInspector;
+
+  public String toString() {
+    return "MetaDataTypedColumnsetSerDe[" + separator + "," + columnNames + "]";
+  }
+
+  public MetadataTypedColumnsetSerDe() throws SerDeException {
+    separator = DefaultSeparator;
+  }
+
+  public void initialize(Configuration job, Properties tbl) throws SerDeException {
+    separator = DefaultSeparator;
+    String alt_sep = tbl.getProperty(Constants.SERIALIZATION_FORMAT);
+    if(alt_sep != null && alt_sep.length() > 0) {
+      try {
+        byte b [] = new byte[1];
+        b[0] = Byte.valueOf(alt_sep).byteValue();
+        separator = new String(b);
+      } catch(NumberFormatException e) {
+        separator = alt_sep;
+      }
+    }
+    String columnProperty = tbl.getProperty("columns");
+    if (columnProperty == null || columnProperty.length() == 0) {
+      // Hack for tables with no columns
+      // Treat it as a table with a single column called "col" 
+      cachedObjectInspector = ObjectInspectorFactory.getReflectionObjectInspector(
+          ColumnSet.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    } else {
+      columnNames = Arrays.asList(columnProperty.split(","));
+      cachedObjectInspector = MetadataListStructObjectInspector.getInstance(columnNames);
+    }
+    System.out.println(getClass().getName() + ": initialized with columnNames: " + columnNames );
+  }
+
+  public static Object deserialize(ColumnSet c, String row, String sep, String nullString) throws Exception {
+    if (c.col == null) {
+      c.col = new ArrayList<String>();
+    } else {
+      c.col.clear();
+    }
+    String [] l1 = row.split(sep, -1);
+
+    for(String s: l1) {
+      if (s.equals(nullString)) {
+        c.col.add(null);
+      } else {
+        c.col.add(s);
+      }
+    }
+    return (c);
+  }
+  
+  ColumnSet deserializeCache = new ColumnSet();
+  public Object deserialize(Writable field) throws SerDeException {
+    String row = null;
+    if (field instanceof BytesWritable) {
+      BytesWritable b = (BytesWritable)field;
+      try {
+        row = Text.decode(b.get(), 0, b.getSize());
+      } catch (CharacterCodingException e) {
+        throw new SerDeException(e);
+      }
+    } else if (field instanceof Text) {
+      row = field.toString();
+    }
+    try {
+      deserialize(deserializeCache, row, separator, nullString);
+      if (columnNames != null) {
+        assert(columnNames.size() == deserializeCache.col.size());
+      }
+      return deserializeCache;
+    } catch (ClassCastException e) {
+      throw new SerDeException( this.getClass().getName() + " expects Text or BytesWritable", e);
+    } catch (Exception e) {
+      throw new SerDeException(e);
+    }
+  }
+  
+  
+  public ObjectInspector getObjectInspector() throws SerDeException {
+    return cachedObjectInspector;
+  }
+
+  public Class<? extends Writable> getSerializedClass() {
+    return Text.class;
+  }
+  
+  Text serializeCache = new Text();
+  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
+
+    if (objInspector.getCategory() != Category.STRUCT) {
+      throw new SerDeException(getClass().toString() 
+          + " can only serialize struct types, but we got: " + objInspector.getTypeName());
+    }
+    StructObjectInspector soi = (StructObjectInspector) objInspector;
+    List<? extends StructField> fields = soi.getAllStructFieldRefs();
+    
+    StringBuilder sb = new StringBuilder();
+    for(int i=0; i<fields.size(); i++) {
+      if (i>0) sb.append(separator);
+      Object column = soi.getStructFieldData(obj, fields.get(i));
+      if (fields.get(i).getFieldObjectInspector().getCategory() == Category.PRIMITIVE) {
+        // For primitive object, serialize to plain string
+        sb.append(column == null ? nullString : column.toString());
+      } else {
+        // For complex object, serialize to JSON format
+        sb.append(SerDeUtils.getJSONString(column, fields.get(i).getFieldObjectInspector()));
+      }
+    }
+    serializeCache.set(sb.toString());
+    return serializeCache;
+  }
+
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDe.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDe.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDe.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDe.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+/**
+ * A union of HiveDeserializer and HiveSerializer interface.
+ * 
+ * If a developer wants his hive table to be read-only, then he just want to 
+ * return 
+ * 
+ * both readable and writable, then
+ *   
+ *
+ */
+public interface SerDe extends Deserializer, Serializer {
+
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeException.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeException.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeException.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeException.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+/**
+ * Generic exception class for SerDes
+ *
+ */
+
+public class SerDeException extends Exception {
+  private static final long serialVersionUID = 1L;
+
+  public SerDeException() {
+    super();
+  }
+
+  public SerDeException(String message) {
+    super(message);
+  }
+
+  public SerDeException(Throwable cause) {
+    super(cause);
+  }
+
+  public SerDeException(String message, Throwable cause) {
+    super(message, cause);
+  }
+}
+

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,242 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.util.*;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+public class SerDeUtils {
+
+
+  public static final char QUOTE = '"';
+  public static final char COLON = ':';
+  public static final char COMMA = ',';
+  public static final String LBRACKET = "[";
+  public static final String RBRACKET = "]";
+  public static final String LBRACE = "{";
+  public static final String RBRACE = "}";
+
+  private static HashMap<String, Class<?>> serdes = new HashMap<String, Class<?>> ();
+
+  public static void registerSerDe(String name, Class<?> serde) {
+    if(serdes.containsKey(name)) {
+      throw new RuntimeException("double registering serde " + name);
+    }
+    serdes.put(name, serde);
+  }
+
+  public static Deserializer lookupDeserializer(String name) throws SerDeException {
+    Class<?> c;
+    if(serdes.containsKey(name)) {
+        c = serdes.get(name);
+    } else {
+      try {
+        c = Class.forName(name);
+      } catch(ClassNotFoundException e) {
+        throw new SerDeException("SerDe " + name + " does not exist");
+      }
+    }
+    try {
+      return (Deserializer)c.newInstance();
+    } catch(Exception e) {
+      throw new SerDeException(e);
+    }
+  }
+
+
+  private static boolean initCoreSerDes = registerCoreSerDes();
+  
+  protected static boolean registerCoreSerDes() {
+    // Eagerly load SerDes so they will register their symbolic names even on Lazy Loading JVMs
+    try {
+      // loading these classes will automatically register the short names
+      Class.forName(org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe.class.getName());
+      Class.forName(org.apache.hadoop.hive.serde2.ThriftDeserializer.class.getName());
+    } catch (ClassNotFoundException e) {
+      throw new RuntimeException("IMPOSSIBLE Exception: Unable to initialize core serdes", e);
+    }
+    return true;
+  }
+
+  public static String escapeString(String str) {
+    int length = str.length();
+    StringBuilder escape = new StringBuilder(length + 16);
+
+    for (int i = 0; i < length; ++i) {
+      char c = str.charAt(i);
+      switch (c) {
+      case '"':
+      case '\\':
+        escape.append('\\');
+        escape.append(c);
+        break;
+      case '\b':
+        escape.append('\\');
+        escape.append('b');
+        break;
+      case '\f':
+        escape.append('\\');
+        escape.append('f');
+        break;
+      case '\n':
+        escape.append('\\');
+        escape.append('n');
+        break;
+      case '\r':
+        escape.append('\\');
+        escape.append('r');
+        break;
+      case '\t':
+        escape.append('\\');
+        escape.append('t');
+        break;
+      default:
+        // Control characeters! According to JSON RFC u0020
+        if (c < ' ') {
+          String hex = Integer.toHexString(c);
+          escape.append('\\');
+          escape.append('u');
+          for (int j = 4; j > hex.length(); --j) {
+            escape.append('0');
+          }
+          escape.append(hex);
+        } else {
+          escape.append(c);
+        }
+        break;
+      }
+    }
+    return (escape.toString());
+  }
+
+
+  public static String lightEscapeString(String str) {
+    int length = str.length();
+    StringBuilder escape = new StringBuilder(length + 16);
+
+    for (int i = 0; i < length; ++i) {
+      char c = str.charAt(i);
+      switch (c) {
+      case '\n':
+        escape.append('\\');
+        escape.append('n');
+        break;
+      case '\r':
+        escape.append('\\');
+        escape.append('r');
+        break;
+      case '\t':
+        escape.append('\\');
+        escape.append('t');
+        break;
+      default:
+        escape.append(c);
+        break;
+      }
+    }
+    return (escape.toString());
+  }
+
+  public static String getJSONString(Object o, ObjectInspector oi) {
+    StringBuilder sb = new StringBuilder();
+    buildJSONString(sb, o, oi);
+    return sb.toString();
+  }
+
+  
+  static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi) {
+
+    switch(oi.getCategory()) {
+      case PRIMITIVE: {
+        if (o == null) {
+          sb.append("\\N");
+        } else if (o instanceof String) {
+          sb.append(QUOTE);
+          sb.append(escapeString((String)o));
+          sb.append(QUOTE);
+        } else if (o instanceof Boolean) {
+          sb.append(((Boolean)o).booleanValue() ? "True" : "False");
+        } else {
+          // it's a number - so doesn't need to be escaped.
+          sb.append(o.toString());
+        }
+        break;
+      }
+      case LIST: {
+        sb.append(LBRACKET);
+        ListObjectInspector loi = (ListObjectInspector)oi;
+        ObjectInspector listElementObjectInspector = loi.getListElementObjectInspector();
+        List<?> olist = loi.getList(o);
+        for (int i=0; i<olist.size(); i++) {
+          if (i>0) sb.append(COMMA);
+          buildJSONString(sb, olist.get(i), listElementObjectInspector);
+        }
+        sb.append(RBRACKET);
+        break;
+      }
+      case MAP: {
+        sb.append(LBRACE);
+        MapObjectInspector moi = (MapObjectInspector)oi;
+        ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
+        ObjectInspector mapValueObjectInspector = moi.getMapValueObjectInspector();
+        Map<?,?> omap = moi.getMap(o);
+        boolean first = true;
+        for(Object entry : omap.entrySet()) {
+          if (first) {
+            first = false;
+          } else {
+            sb.append(COMMA);
+          }
+          Map.Entry<?,?> e = (Map.Entry<?,?>)entry;
+          buildJSONString(sb, e.getKey(), mapKeyObjectInspector);
+          sb.append(COLON);
+          buildJSONString(sb, e.getValue(), mapValueObjectInspector);
+        }
+        sb.append(RBRACE);
+        break;
+      }
+      case STRUCT: {
+        sb.append(LBRACE);
+        StructObjectInspector soi = (StructObjectInspector)oi;
+        List<? extends StructField> structFields = soi.getAllStructFieldRefs();
+        for(int i=0; i<structFields.size(); i++) {
+          if (i>0) {
+            sb.append(COMMA);
+          }
+          sb.append(QUOTE);
+          sb.append(structFields.get(i).getFieldName());
+          sb.append(QUOTE);
+          sb.append(COLON);
+          buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)), 
+              structFields.get(i).getFieldObjectInspector());          
+        }
+        sb.append(RBRACE);
+        break;
+      }
+      default:
+        throw new RuntimeException("Unknown type in ObjectInspector!");
+    };
+    
+  }  
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/Serializer.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.conf.Configuration;
+import java.util.Properties;
+
+/**
+ * HiveSerializer is used to serialize data to a Hadoop Writable object.
+ * The serialize 
+ * In addition to the interface below, all implementations are assume to have a ctor
+ * that takes a single 'Table' object as argument.
+ *
+ */
+public interface Serializer {
+
+  /**
+   * Initialize the HiveSerializer.
+   * @param conf System properties
+   * @param tbl  table properties
+   * @throws SerDeException
+   */
+  public void initialize(Configuration conf, Properties tbl) throws SerDeException;
+  
+  /**
+   * Returns the Writable class that would be returned by the serialize method.
+   * This is used to initialize SequenceFile header.
+   */
+  public Class<? extends Writable> getSerializedClass();
+  /**
+   * Serialize an object by navigating inside the Object with the ObjectInspector.
+   * In most cases, the return value of this function will be constant since the function
+   * will reuse the Writable object.
+   * If the client wants to keep a copy of the Writable, the client needs to clone the
+   * returned value.
+   */
+  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException;
+
+  public String getShortName();
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TReflectionUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TReflectionUtils.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TReflectionUtils.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TReflectionUtils.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import com.facebook.thrift.protocol.TProtocolFactory;
+
+
+public class TReflectionUtils {
+  public static final String thriftReaderFname = "read";
+  public static final String thriftWriterFname = "write";
+
+  public static final Class<?> [] thriftRWParams;
+  static {
+    try {
+      thriftRWParams = new Class [] {
+      Class.forName("com.facebook.thrift.protocol.TProtocol")
+      };
+    } catch (ClassNotFoundException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public static TProtocolFactory getProtocolFactoryByName(String protocolName)
+    throws Exception {
+    Class<?> protoClass = Class.forName(protocolName + "$Factory");
+    return ((TProtocolFactory)protoClass.newInstance());
+  }
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftByteStreamTypedSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftByteStreamTypedSerDe.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftByteStreamTypedSerDe.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftByteStreamTypedSerDe.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.lang.reflect.Type;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+
+import com.facebook.thrift.TBase;
+import com.facebook.thrift.protocol.TProtocol;
+import com.facebook.thrift.protocol.TProtocolFactory;
+import com.facebook.thrift.transport.TIOStreamTransport;
+
+public class ThriftByteStreamTypedSerDe extends ByteStreamTypedSerDe {
+
+  protected TIOStreamTransport outTransport, inTransport;
+  protected TProtocol outProtocol, inProtocol;
+
+  private void init(TProtocolFactory inFactory, TProtocolFactory outFactory) throws Exception {
+    outTransport = new TIOStreamTransport(bos);
+    inTransport = new TIOStreamTransport(bis);
+    outProtocol = outFactory.getProtocol(outTransport);
+    inProtocol = inFactory.getProtocol(inTransport);
+  }
+
+  public void initialize(Configuration job, Properties tbl) throws SerDeException {
+    throw new SerDeException("ThriftByteStreamTypedSerDe is still semi-abstract");
+  }
+
+  public static String shortName() {
+    return "thriftbytestream";
+  }
+
+  public  String getShortName() {
+    return shortName();
+  }
+
+
+  public ThriftByteStreamTypedSerDe(Type objectType, TProtocolFactory inFactory,
+                                    TProtocolFactory outFactory) throws SerDeException {
+    super(objectType);
+    try {
+      init(inFactory, outFactory);
+    } catch (Exception e) {
+      throw new SerDeException(e);
+    }
+  }
+
+  protected ObjectInspectorFactory.ObjectInspectorOptions getObjectInspectorOptions() {
+    return ObjectInspectorFactory.ObjectInspectorOptions.THRIFT;
+  }
+  
+  public Object deserialize(Writable field) throws SerDeException {
+    Object obj = super.deserialize(field);
+    try {
+      ((TBase)obj).read(inProtocol);
+    } catch (Exception e) {
+      throw new SerDeException(e);
+    }
+    return obj;
+  }
+
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftDeserializer.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftDeserializer.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftDeserializer.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/ThriftDeserializer.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+import com.facebook.thrift.protocol.TProtocolFactory;
+
+public class ThriftDeserializer implements Deserializer {
+
+  public static String shortName() {
+    return "thrift";
+  }
+
+  public  String getShortName() {
+    return shortName();
+  }
+
+  static {
+    StackTraceElement[] sTrace = new Exception().getStackTrace();
+    String className = sTrace[0].getClassName();
+    try {
+      SerDeUtils.registerSerDe(shortName(), Class.forName(className));
+      // For backward compatibility: this class replaces the following class.
+      SerDeUtils.registerSerDe("org.apache.hadoop.hive.serde.thrift.ThriftSerDe", Class.forName(className));
+    } catch(Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+  
+  private ThriftByteStreamTypedSerDe tsd;
+
+  public ThriftDeserializer() { }
+  
+  public void initialize(Configuration job, Properties tbl) throws SerDeException {
+    try {
+      // both the classname and the protocol name are Table properties
+      // the only hardwired assumption is that records are fixed on a
+      // per Table basis
+
+      String className = tbl.getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_CLASS);
+      Class<?> recordClass = Class.forName(className);
+
+      String protoName = tbl.getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT);
+      if (protoName == null) {
+        protoName = "TBinaryProtocol";
+      }
+
+      TProtocolFactory tp = TReflectionUtils.getProtocolFactoryByName(protoName);
+      tsd = new ThriftByteStreamTypedSerDe(recordClass, tp, tp);
+      
+    } catch (Exception e) {
+      throw new SerDeException(e);
+    }
+  }
+
+  public Object deserialize(Writable field) throws SerDeException {
+    return tsd.deserialize(field);
+  }
+  
+  public ObjectInspector getObjectInspector() throws SerDeException {
+    return tsd.getObjectInspector();
+  }
+
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TypedSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TypedSerDe.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TypedSerDe.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/TypedSerDe.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.ReflectionUtils;
+
+public abstract class TypedSerDe implements SerDe {
+
+  protected Type objectType;
+  protected Class<?> objectClass;
+
+  public TypedSerDe(Type objectType) throws SerDeException {
+    this.objectType = objectType;
+    if (objectType instanceof Class) {
+      objectClass = (Class<?>)objectType;
+    } else if (objectType instanceof ParameterizedType) {
+      objectClass = (Class<?>)(((ParameterizedType)objectType).getRawType());
+    } else {
+      throw new SerDeException("Cannot create TypedSerDe with type " + objectType);
+    }
+  }
+
+  protected Object deserializeCache;
+  public Object deserialize(Writable blob) throws SerDeException {
+    if (deserializeCache == null) {
+      return ReflectionUtils.newInstance(objectClass, null);
+    } else {
+      assert(deserializeCache.getClass().equals(objectClass));
+      return deserializeCache;
+    }
+  }
+
+  public ObjectInspector getObjectInspector() throws SerDeException {
+    return ObjectInspectorFactory.getReflectionObjectInspector(objectType,
+        getObjectInspectorOptions());
+  }
+
+  protected ObjectInspectorFactory.ObjectInspectorOptions getObjectInspectorOptions() {
+    return ObjectInspectorFactory.ObjectInspectorOptions.JAVA;
+  }
+  
+  public void initialize(Configuration job, Properties tbl)
+      throws SerDeException {
+    // do nothing
+  }
+
+  public Class<? extends Writable> getSerializedClass() {
+    return BytesWritable.class;
+  }
+  public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
+    throw new RuntimeException("not supported");
+  }
+
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/InspectableObject.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/InspectableObject.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/InspectableObject.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/InspectableObject.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+/**
+ * Simple wrapper of object with ObjectInspector.
+ * 
+ * TODO: we need to redefine the hashCode and equals methods, so that
+ * it can be put into a HashMap as a key.
+ * 
+ * This class also serves as a facility for a function that returns
+ * both an object and an ObjectInspector.
+ */
+public class InspectableObject {
+
+  public Object o;
+  public ObjectInspector oi;
+  
+  public InspectableObject() {
+    this(null,null);
+  }
+  public InspectableObject(Object o, ObjectInspector oi) {
+    this.o = o;
+    this.oi = oi;
+  }
+  
+
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectInspector.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectInspector.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectInspector.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+import java.util.List;
+
+
+public interface ListObjectInspector extends ObjectInspector {
+
+  // ** Methods that does not need a data object **
+  public ObjectInspector getListElementObjectInspector();
+
+  // ** Methods that need a data object **
+  /** returns null for null list, out-of-the-range index.
+   */
+  public Object getListElement(Object data, int index);
+
+  /** returns -1 for data = null.
+   */
+  public int getListLength(Object data);
+  
+  /** returns null for data = null.
+   */
+  public List<?> getList(Object data);
+
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MapObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MapObjectInspector.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MapObjectInspector.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MapObjectInspector.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+import java.util.Map;
+
+
+public interface MapObjectInspector extends ObjectInspector {
+
+  // ** Methods that does not need a data object **
+  // Map Type
+  public ObjectInspector getMapKeyObjectInspector();
+
+  public ObjectInspector getMapValueObjectInspector();
+
+  // ** Methods that need a data object **
+  // In this function, key has to be of the same structure as the Map expects.
+  // Most cases key will be primitive type, so it's OK.
+  // In rare cases that key is not primitive, the user is responsible for defining 
+  // the hashCode() and equals() methods of the key class.
+  public Object getMapValueElement(Object data, Object key);
+
+  /** returns null for data = null.
+   */
+  public Map<?,?> getMap(Object data);
+
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MetadataListStructObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MetadataListStructObjectInspector.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MetadataListStructObjectInspector.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/MetadataListStructObjectInspector.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.ColumnSet;
+
+/**
+ * StructObjectInspector works on struct data that is stored as a Java List or Java Array object.
+ * Basically, the fields are stored sequentially in the List object.
+ * 
+ * The names of the struct fields and the internal structure of the struct fields are specified in 
+ * the ctor of the StructObjectInspector.
+ * 
+ */
+public class MetadataListStructObjectInspector extends StandardStructObjectInspector {
+
+  static HashMap<List<String>, MetadataListStructObjectInspector> cached
+     = new HashMap<List<String>, MetadataListStructObjectInspector>();
+  public static MetadataListStructObjectInspector getInstance(int fields) {
+    return getInstance(ObjectInspectorUtils.getIntegerArray(fields));
+  }
+  public static MetadataListStructObjectInspector getInstance(List<String> columnNames) {
+    MetadataListStructObjectInspector result = cached.get(columnNames);
+    if (result == null) {
+      result = new MetadataListStructObjectInspector(columnNames);
+      cached.put(columnNames, result);
+    }
+    return result;
+  }
+
+  static ArrayList<ObjectInspector> getFieldObjectInspectors(int fields) {
+    ArrayList<ObjectInspector> r = new ArrayList<ObjectInspector>(fields);
+    for(int i=0; i<fields; i++) {
+      r.add(ObjectInspectorFactory.getStandardPrimitiveObjectInspector(String.class));
+    }
+    return r;
+  }
+  
+  MetadataListStructObjectInspector(List<String> columnNames) {
+    super(columnNames, getFieldObjectInspectors(columnNames.size()));
+  }
+  
+  // Get col object out
+  public Object getStructFieldData(Object data, StructField fieldRef) {
+    if (data instanceof ColumnSet) {
+      data = ((ColumnSet)data).col;
+    }
+    return super.getStructFieldData(data, fieldRef);
+  }
+  // Get col object out
+  public List<Object> getStructFieldsDataAsList(Object data) {
+    if (data instanceof ColumnSet) {
+      data = ((ColumnSet)data).col;
+    }
+    return super.getStructFieldsDataAsList(data);
+  }
+
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+/**
+ * ObjectInspector helps us to look into the internal structure of a complex
+ * object.
+ *
+ * A (probably configured) ObjectInspector instance stands for a specific type
+ * and a specific way to store the data of that type in the memory.
+ * 
+ * For native java Object, we can directly access the internal structure through 
+ * member fields and methods.  ObjectInspector is a way to delegate that functionality
+ * away from the Object, so that we have more control on the behavior of those actions.
+ * 
+ * An efficient implementation of ObjectInspector should rely on factory, so that we can 
+ * make sure the same ObjectInspector only has one instance.  That also makes sure
+ * hashCode() and equals() methods of java.lang.Object directly works for ObjectInspector
+ * as well.
+ */
+public interface ObjectInspector {
+
+  public static enum Category {
+    PRIMITIVE, LIST, MAP, STRUCT
+  };
+
+  /**
+   * Returns the name of the data type that is inspected by this ObjectInspector.
+   * This is used to display the type information to the user.
+   * 
+   * For primitive types, the type name is standardized.
+   * For other types, the type name can be something like "list<int>", "map<int,string>",
+   * java class names, or user-defined type names similar to typedef. 
+   */
+  public String getTypeName();
+  
+  /**
+   * An ObjectInspector must inherit from one of the following interfaces
+   * if getCategory() returns:
+   * PRIMITIVE:  PrimitiveObjectInspector 
+   * LIST:       ListObjectInspector 
+   * MAP:        MapObjectInspector 
+   * STRUCT:     StructObjectInspector 
+   */
+  public Category getCategory();
+
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,196 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.GenericArrayType;
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+/**
+ * ObjectInspectorFactory is the primary way to create new ObjectInspector instances.
+ * 
+ * SerDe classes should call the static functions in this library to create an ObjectInspector
+ * to return to the caller of SerDe2.getObjectInspector(). 
+ */
+public class ObjectInspectorFactory {
+
+
+  /**
+   * ObjectInspectorOptions describes what ObjectInspector to use. 
+   * JAVA is to use pure JAVA reflection. THRIFT is to use JAVA reflection and filter out __isset fields.
+   * New ObjectInspectorOptions can be added here when available.
+   * 
+   * We choose to use a single HashMap objectInspectorCache to cache all situations for efficiency and code 
+   * simplicity.  And we don't expect a case that a user need to create 2 or more different types of 
+   * ObjectInspectors for the same Java type.
+   */
+  public enum ObjectInspectorOptions {
+    JAVA,
+    THRIFT
+  };
+  
+  private static HashMap<Type, ObjectInspector> objectInspectorCache = new HashMap<Type, ObjectInspector>();
+  
+  public static ObjectInspector getReflectionObjectInspector(Type t, ObjectInspectorOptions options) {
+    ObjectInspector oi = objectInspectorCache.get(t);
+    if (oi == null) {
+      oi = getReflectionObjectInspectorNoCache(t, options);
+      objectInspectorCache.put(t, oi);
+    }
+    if ((options.equals(ObjectInspectorOptions.JAVA) && oi.getClass().equals(ThriftStructObjectInspector.class))
+        || (options.equals(ObjectInspectorOptions.THRIFT) && oi.getClass().equals(ReflectionStructObjectInspector.class))) {
+      throw new RuntimeException("Cannot call getObjectInspectorByReflection with both JAVA and THRIFT !");
+    }
+    return oi;
+  }
+  
+  private static ObjectInspector getReflectionObjectInspectorNoCache(Type t, ObjectInspectorOptions options) {
+    if (t instanceof GenericArrayType) {
+      GenericArrayType at = (GenericArrayType)t;
+      return getStandardListObjectInspector(
+          getReflectionObjectInspector(at.getGenericComponentType(), options));
+    }
+
+    if (t instanceof ParameterizedType) {
+      ParameterizedType pt = (ParameterizedType)t;
+      // List?
+      if (List.class.isAssignableFrom((Class<?>)pt.getRawType())) {
+        return getStandardListObjectInspector(
+            getReflectionObjectInspector(pt.getActualTypeArguments()[0], options));
+      }
+      // Map?
+      if (Map.class.isAssignableFrom((Class<?>)pt.getRawType())) {
+        return getStandardMapObjectInspector(
+            getReflectionObjectInspector(pt.getActualTypeArguments()[0], options),
+            getReflectionObjectInspector(pt.getActualTypeArguments()[1], options));
+      }
+      // Otherwise convert t to RawType so we will fall into the following if block.
+      t = pt.getRawType();
+    }
+    
+    // Must be a class.
+    if (!(t instanceof Class)) {
+      throw new RuntimeException(ObjectInspectorFactory.class.getName() + ": internal error."); 
+    }
+    Class<?> c = (Class<?>)t;
+    
+    // Primitive?
+    if (ObjectInspectorUtils.isPrimitiveClass(c)) {
+      return getStandardPrimitiveObjectInspector(c);
+    }
+    
+    // Must be struct because List and Map need to be ParameterizedType
+    assert(!List.class.isAssignableFrom(c));
+    assert(!Map.class.isAssignableFrom(c));
+    
+    // Create StructObjectInspector
+    ReflectionStructObjectInspector oi;
+    switch(options) {
+    case JAVA: 
+      oi = new ReflectionStructObjectInspector();
+      break;
+    case THRIFT: 
+      oi = new ThriftStructObjectInspector();
+      break;
+    default:
+      throw new RuntimeException(ObjectInspectorFactory.class.getName() + ": internal error."); 
+    }
+    // put it into the cache BEFORE it is initialized to make sure we can catch recursive types. 
+    objectInspectorCache.put(t, oi);
+    Field[] fields = c.getDeclaredFields();
+    ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>(fields.length);
+    for(int i=0; i<fields.length; i++) {
+      if (!oi.shouldIgnoreField(fields[i].getName())) {
+        structFieldObjectInspectors.add(getReflectionObjectInspector(fields[i].getGenericType(), options));
+      }
+    }
+    oi.init(c, structFieldObjectInspectors);
+    return oi;
+  }
+  
+  
+  private static HashMap<Class<?>, StandardPrimitiveObjectInspector> cachedStandardPrimitiveInspectorCache = new HashMap<Class<?>, StandardPrimitiveObjectInspector>();
+  public static StandardPrimitiveObjectInspector getStandardPrimitiveObjectInspector(Class<?> c) {
+    c = ObjectInspectorUtils.generalizePrimitive(c);
+    StandardPrimitiveObjectInspector result = cachedStandardPrimitiveInspectorCache.get(c);
+    if (result == null) {
+      result = new StandardPrimitiveObjectInspector(c);
+      cachedStandardPrimitiveInspectorCache.put(c, result);
+    }
+    return result;
+  }
+  
+  static HashMap<ObjectInspector, StandardListObjectInspector> cachedStandardListObjectInspector =
+    new HashMap<ObjectInspector, StandardListObjectInspector>(); 
+  public static StandardListObjectInspector getStandardListObjectInspector(ObjectInspector listElementObjectInspector) {
+    StandardListObjectInspector result = cachedStandardListObjectInspector.get(listElementObjectInspector);
+    if (result == null) {
+      result = new StandardListObjectInspector(listElementObjectInspector);
+      cachedStandardListObjectInspector.put(listElementObjectInspector, result);
+    }
+    return result;
+  }
+
+  static HashMap<List<ObjectInspector>, StandardMapObjectInspector> cachedStandardMapObjectInspector =
+    new HashMap<List<ObjectInspector>, StandardMapObjectInspector>(); 
+  public static StandardMapObjectInspector getStandardMapObjectInspector(ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector) {
+    ArrayList<ObjectInspector> signature = new ArrayList<ObjectInspector>(2);
+    signature.add(mapKeyObjectInspector);
+    signature.add(mapValueObjectInspector);
+    StandardMapObjectInspector result = cachedStandardMapObjectInspector.get(signature);
+    if (result == null) {
+      result = new StandardMapObjectInspector(mapKeyObjectInspector, mapValueObjectInspector);
+      cachedStandardMapObjectInspector.put(signature, result);
+    }
+    return result;
+  }
+  
+  static HashMap<ArrayList<List<?>>, StandardStructObjectInspector> cachedStandardStructObjectInspector =
+    new HashMap<ArrayList<List<?>>, StandardStructObjectInspector>(); 
+  public static StandardStructObjectInspector getStandardStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
+    ArrayList<List<?>> signature = new ArrayList<List<?>>();
+    signature.add(structFieldNames);
+    signature.add(structFieldObjectInspectors);
+    StandardStructObjectInspector result = cachedStandardStructObjectInspector.get(signature);
+    if (result == null) {
+      result = new StandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);
+      cachedStandardStructObjectInspector.put(signature, result);
+    }
+    return result;
+  }
+  
+  static HashMap<List<StructObjectInspector>, UnionStructObjectInspector> cachedUnionStructObjectInspector =
+    new HashMap<List<StructObjectInspector>, UnionStructObjectInspector>(); 
+  public static UnionStructObjectInspector getUnionStructObjectInspector(List<StructObjectInspector> structObjectInspectors) {
+    UnionStructObjectInspector result = cachedUnionStructObjectInspector.get(structObjectInspectors);
+    if (result == null) {
+      result = new UnionStructObjectInspector(structObjectInspectors);
+      cachedUnionStructObjectInspector.put(structObjectInspectors, result);
+    }
+    return result;
+  }
+  
+  
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,225 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * ObjectInspectorFactory is the primary way to create new ObjectInspector instances.
+ * 
+ * SerDe classes should call the static functions in this library to create an ObjectInspector
+ * to return to the caller of SerDe2.getObjectInspector(). 
+ */
+public class ObjectInspectorUtils {
+
+  /** This function defines the list of PrimitiveClasses that we support. 
+   *  A PrimitiveClass should support java serialization/deserialization.
+   */
+  public static boolean isPrimitiveClass(Class<?> c) {
+    return ((c == String.class) || (c == Boolean.class) ||
+            (c == Character.class) || (c == java.sql.Date.class) || 
+            java.lang.Number.class.isAssignableFrom(c) ||
+            c.isPrimitive());
+  }
+  
+  /**
+   * Generalize the Java primitive types to the corresponding 
+   * Java Classes.  
+   */
+  public static Class<?> generalizePrimitive(Class<?> primitiveClass) {
+    if (primitiveClass == Boolean.TYPE)   primitiveClass = Boolean.class;
+    if (primitiveClass == Byte.TYPE)      primitiveClass = Byte.class;
+    if (primitiveClass == Character.TYPE) primitiveClass = Character.class;
+    if (primitiveClass == Short.TYPE)     primitiveClass = Short.class;
+    if (primitiveClass == Integer.TYPE)   primitiveClass = Integer.class;
+    if (primitiveClass == Long.TYPE)      primitiveClass = Long.class;
+    if (primitiveClass == Float.TYPE)     primitiveClass = Float.class;
+    if (primitiveClass == Double.TYPE)    primitiveClass = Double.class;
+    if (primitiveClass == Void.TYPE)      primitiveClass = Void.class;
+    return primitiveClass;
+  }
+  
+
+  static ArrayList<ArrayList<String>> integerArrayCache = new ArrayList<ArrayList<String>>();
+  /**
+   * Returns an array of Integer strings, starting from "0".
+   * This function caches the arrays to provide a better performance. 
+   */
+  public static ArrayList<String> getIntegerArray(int size) {
+    while (integerArrayCache.size() <= size) {
+      integerArrayCache.add(null);
+    }
+    ArrayList<String> result = integerArrayCache.get(size);
+    if (result == null) {
+      result = new ArrayList<String>();
+      for (int i=0; i<size; i++) {
+        result.add(Integer.valueOf(i).toString());
+      }
+      integerArrayCache.set(size, result);
+    }
+    return result;
+  }
+
+  static ArrayList<String> integerCSVCache = new ArrayList<String>(); 
+  public static String getIntegerCSV(int size) {
+    while (integerCSVCache.size() <= size) {
+      integerCSVCache.add(null);
+    }
+    String result = integerCSVCache.get(size);
+    if (result == null) {
+      StringBuilder sb = new StringBuilder();
+      for(int i=0; i<size; i++) {
+        if (i>0) sb.append(",");
+        sb.append("" + i);
+      }
+      result = sb.toString();
+      integerCSVCache.set(size, result);
+    }
+    return result;
+  }
+  
+
+  /**
+   * Get the standard ObjectInspector for an ObjectInspector.
+   * 
+   * The returned ObjectInspector can be used to inspect the standard object.
+   */
+  public static ObjectInspector getStandardObjectInspector(ObjectInspector oi) {
+    ObjectInspector result = null;
+    switch (oi.getCategory()) {
+      case PRIMITIVE: {
+        PrimitiveObjectInspector poi =(PrimitiveObjectInspector)oi;
+        result = poi;
+        break;
+      }
+      case LIST: {
+        ListObjectInspector loi = (ListObjectInspector)oi;
+        result = ObjectInspectorFactory.getStandardListObjectInspector(loi.getListElementObjectInspector());
+        break;
+      }
+      case MAP: {
+        MapObjectInspector moi = (MapObjectInspector)oi;
+        result = ObjectInspectorFactory.getStandardMapObjectInspector(
+            moi.getMapKeyObjectInspector(),
+            moi.getMapValueObjectInspector());
+        break;
+      }
+      case STRUCT: {
+        StructObjectInspector soi = (StructObjectInspector)oi;
+        List<? extends StructField> fields = soi.getAllStructFieldRefs();
+        List<String> fieldNames = new ArrayList<String>(fields.size());
+        List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fields.size());
+        for(StructField f : fields) {
+          fieldNames.add(f.getFieldName());
+          fieldObjectInspectors.add(f.getFieldObjectInspector());
+        }
+        result = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldObjectInspectors);
+        break;
+      }
+      default: {
+        throw new RuntimeException("Unknown ObjectInspector category!");
+      }
+    }
+    return result;
+  }
+  
+  // TODO: should return o if the ObjectInspector is a standard ObjectInspector hierarchy
+  // (all internal ObjectInspector needs to be standard ObjectInspectors)
+  public static Object getStandardObject(Object o, ObjectInspector oi) {
+    if (o == null) {
+      return null;
+    }
+    
+    Object result = null;
+    switch (oi.getCategory()) {
+      case PRIMITIVE: {
+        result = o;
+        break;
+      }
+      case LIST: {
+        ListObjectInspector loi = (ListObjectInspector)oi;
+        int length = loi.getListLength(o);
+        ArrayList<Object> list = new ArrayList<Object>(length);
+        for(int i=0; i<length; i++) {
+          list.add(getStandardObject(
+              loi.getListElement(o, i),
+              loi.getListElementObjectInspector()));
+        }
+        result = list;
+        break;
+      }
+      case MAP: {
+        MapObjectInspector moi = (MapObjectInspector)oi;
+        HashMap<Object, Object> map = new HashMap<Object, Object>();
+        Map<? extends Object, ? extends Object> omap = moi.getMap(o);
+        for(Map.Entry<? extends Object, ? extends Object> entry: omap.entrySet()) {
+          map.put(getStandardObject(entry.getKey(), moi.getMapKeyObjectInspector()),
+              getStandardObject(entry.getValue(), moi.getMapValueObjectInspector()));
+        }
+        result = map;
+        break;
+      }
+      case STRUCT: {
+        StructObjectInspector soi = (StructObjectInspector)oi;
+        List<? extends StructField> fields = soi.getAllStructFieldRefs();
+        ArrayList<Object> struct = new ArrayList<Object>(fields.size()); 
+        for(StructField f : fields) {
+          struct.add(getStandardObject(soi.getStructFieldData(o, f), f.getFieldObjectInspector()));
+        }
+        result = struct;
+        break;
+      }
+      default: {
+        throw new RuntimeException("Unknown ObjectInspector category!");
+      }
+    }
+    return result;
+  }  
+  
+  public static String getStandardStructTypeName(StructObjectInspector soi) {
+    StringBuilder sb = new StringBuilder();
+    sb.append("struct{");
+    List<? extends StructField> fields = soi.getAllStructFieldRefs(); 
+    for(int i=0; i<fields.size(); i++) {
+      if (i>0) sb.append(",");
+      sb.append(fields.get(i).getFieldName());
+      sb.append(":");
+      sb.append(fields.get(i).getFieldObjectInspector().getTypeName());
+    }
+    sb.append("}");
+    return sb.toString();
+  }
+  
+  public static StructField getStandardStructFieldRef(String fieldName, List<? extends StructField> fields) {
+    fieldName = fieldName.toLowerCase();
+    for(int i=0; i<fields.size(); i++) {
+      if (fields.get(i).getFieldName().equals(fieldName)) {
+        return fields.get(i);
+      }
+    }
+    throw new RuntimeException("cannot find field " + fieldName + " from " + fields); 
+    // return null;
+  }
+
+  
+  
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java?rev=696525&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java Wed Sep 17 17:09:17 2008
@@ -0,0 +1,24 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.objectinspector;
+
+public interface PrimitiveObjectInspector extends ObjectInspector{
+
+  public Class<?> getPrimitiveClass();
+
+}



Mime
View raw message