incubator-hcatalog-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ga...@apache.org
Subject svn commit: r1292568 - in /incubator/hcatalog/trunk: CHANGES.txt build.xml ivy.xml ivy/libraries.properties src/java/org/apache/hcatalog/data/JsonSerDe.java src/test/org/apache/hcatalog/data/TestJsonSerDe.java
Date Wed, 22 Feb 2012 22:56:49 GMT
Author: gates
Date: Wed Feb 22 22:56:49 2012
New Revision: 1292568

URL: http://svn.apache.org/viewvc?rev=1292568&view=rev
Log:
HCATALOG-249 Rework JSON StorageDriver into a JSON SerDe

Added:
    incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/JsonSerDe.java
    incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestJsonSerDe.java
Modified:
    incubator/hcatalog/trunk/CHANGES.txt
    incubator/hcatalog/trunk/build.xml
    incubator/hcatalog/trunk/ivy.xml
    incubator/hcatalog/trunk/ivy/libraries.properties

Modified: incubator/hcatalog/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/CHANGES.txt?rev=1292568&r1=1292567&r2=1292568&view=diff
==============================================================================
--- incubator/hcatalog/trunk/CHANGES.txt (original)
+++ incubator/hcatalog/trunk/CHANGES.txt Wed Feb 22 22:56:49 2012
@@ -28,6 +28,8 @@ Trunk (unreleased changes)
   HCAT-240. Changes to HCatOutputFormat to make it use SerDes instead of StorageDriver (toffer)
 
   NEW FEATURES
+  HCAT-249 Rework JSON StorageDriver into a JSON SerDe (khorgath via gates)
+
   HCAT-255 Define hadoop properties on the hcat command line (ctdean via gates)
 
   HCAT-2 Support nested schema conversion between Hive an Pig (julienledem via hashutosh)

Modified: incubator/hcatalog/trunk/build.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/build.xml?rev=1292568&r1=1292567&r2=1292568&view=diff
==============================================================================
--- incubator/hcatalog/trunk/build.xml (original)
+++ incubator/hcatalog/trunk/build.xml Wed Feb 22 22:56:49 2012
@@ -471,6 +471,8 @@
         <include name="commons-logging-*.jar"/>
         <include name="commons-logging-api-*.jar"/>
         <include name="commons-pool-*.jar"/>
+        <include name="jackson-mapper-asl-*.jar"/>
+        <include name="jackson-core-asl-*.jar"/>
         <include name="datanucleus-connectionpool-*.jar"/>
         <include name="datanucleus-core-*.jar"/>
         <include name="datanucleus-enhancer-*.jar"/>

Modified: incubator/hcatalog/trunk/ivy.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/ivy.xml?rev=1292568&r1=1292567&r2=1292568&view=diff
==============================================================================
--- incubator/hcatalog/trunk/ivy.xml (original)
+++ incubator/hcatalog/trunk/ivy.xml Wed Feb 22 22:56:49 2012
@@ -51,5 +51,7 @@
         <dependency org="javax.management.j2ee" name="management-api" rev="${javax-mgmt.version}"
conf="common->master" /> 
         <dependency org="com.google.code.p.arat" name="rat-lib" rev="${rats-lib.version}"
conf="releaseaudit->default"/>
         <dependency org="org.vafer" name="jdeb" rev="${jdeb.version}" conf="package->master"/>
+        <dependency org="org.codehaus.jackson" name="jackson-mapper-asl" rev="${jackson.version}"
conf="common->master"/>
+        <dependency org="org.codehaus.jackson" name="jackson-core-asl" rev="${jackson.version}"
conf="common->master"/>
     </dependencies>
 </ivy-module>

Modified: incubator/hcatalog/trunk/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/ivy/libraries.properties?rev=1292568&r1=1292567&r2=1292568&view=diff
==============================================================================
--- incubator/hcatalog/trunk/ivy/libraries.properties (original)
+++ incubator/hcatalog/trunk/ivy/libraries.properties Wed Feb 22 22:56:49 2012
@@ -24,3 +24,4 @@ activemq.version=5.5.0
 javax-mgmt.version=1.1-rev-1
 rats-lib.version=0.5.1
 jdeb.version=0.8
+jackson.version=1.7.3

Added: incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/JsonSerDe.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/JsonSerDe.java?rev=1292568&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/JsonSerDe.java (added)
+++ incubator/hcatalog/trunk/src/java/org/apache/hcatalog/data/JsonSerDe.java Wed Feb 22 22:56:49
2012
@@ -0,0 +1,517 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hcatalog.data;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hcatalog.common.HCatException;
+import org.apache.hcatalog.common.HCatUtil;
+import org.apache.hcatalog.data.schema.HCatFieldSchema;
+import org.apache.hcatalog.data.schema.HCatFieldSchema.Type;
+import org.apache.hcatalog.data.schema.HCatSchema;
+import org.apache.hcatalog.data.schema.HCatSchemaUtils;
+
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonParseException;
+import org.codehaus.jackson.JsonParser;
+import org.codehaus.jackson.JsonToken;
+
+public class JsonSerDe implements SerDe {
+
+  public static final Log LOG = LogFactory
+      .getLog(JsonSerDe.class.getName());
+
+  private List<String> columnNames;
+  private List<TypeInfo> columnTypes;
+  
+  private StructTypeInfo rowTypeInfo;
+  private HCatSchema schema;
+
+  private JsonFactory jsonFactory = null;
+      
+  private HCatRecordObjectInspector cachedObjectInspector;
+
+  @Override
+  public void initialize(Configuration conf, Properties tbl)
+      throws SerDeException {
+
+    if (LOG.isDebugEnabled()){
+      LOG.debug("Initializing JsonSerDe");
+      HCatUtil.logEntrySet(LOG, "props to serde", tbl.entrySet());
+    }
+    
+    // Get column names and types
+    String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
+    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
+    
+    // all table column names
+    if (columnNameProperty.length() == 0) {
+      columnNames = new ArrayList<String>();
+    } else {
+      columnNames = Arrays.asList(columnNameProperty.split(","));
+    }
+    
+    // all column types
+    if (columnTypeProperty.length() == 0) {
+      columnTypes = new ArrayList<TypeInfo>();
+    } else {
+      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+    }
+
+    if (LOG.isDebugEnabled()){
+      LOG.debug("columns:" + columnNameProperty);
+      for (String s : columnNames){
+        LOG.debug("cn:"+s);
+      }
+      LOG.debug("types: " + columnTypeProperty);
+      for (TypeInfo t : columnTypes){
+        LOG.debug("ct:"+t.getTypeName()+",type:"+t.getCategory());
+      }
+    }
+    
+    assert (columnNames.size() == columnTypes.size());
+    
+    rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
+
+    cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
+    try {
+      schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema();
+      if (LOG.isDebugEnabled()){
+        LOG.debug("schema : "+ schema);
+        LOG.debug("\tfields : "+schema.getFieldNames());
+      }
+    } catch (HCatException e) {
+      throw new SerDeException(e);
+    }
+
+    jsonFactory = new JsonFactory();
+  }
+
+  /**
+   * Takes JSON string in Text form, and has to return an object representation above 
+   * it that's readable by the corresponding object inspector.
+   * 
+   * For this implementation, since we're using the jackson parser, we can construct
+   * our own object implementation, and we use HCatRecord for it
+   */
+  @Override
+  public Object deserialize(Writable blob) throws SerDeException {
+    
+    Text t = (Text)blob;
+    JsonParser p;
+    List<Object> r = new ArrayList<Object>(Collections.nCopies(columnNames.size(),
null));
+    try {
+      p = jsonFactory.createJsonParser(new ByteArrayInputStream((t.getBytes())));
+      if (p.nextToken() != JsonToken.START_OBJECT) {
+        throw new IOException("Start token not found where expected");
+      }
+      JsonToken token;
+      while( ((token = p.nextToken()) != JsonToken.END_OBJECT)&&(token != null)){
+        // iterate through each token, and create appropriate object here.
+        populateRecord(r,token,p,schema);
+      }
+    } catch (JsonParseException e) {
+      LOG.warn("Error ["+ e.getMessage()+"] parsing json text ["+t+"]");
+      throw new SerDeException(e);
+    } catch (IOException e) {
+      LOG.warn("Error ["+ e.getMessage()+"] parsing json text ["+t+"]");
+      throw new SerDeException(e);
+    }
+    
+    return new DefaultHCatRecord(r);
+  }
+
+  private void populateRecord(List<Object> r, JsonToken token, JsonParser p, HCatSchema
s) throws IOException {
+    if (token != JsonToken.FIELD_NAME){
+      throw new IOException("Field name expected");
+    }
+    String fieldName = p.getText();
+    int fpos = s.getPosition(fieldName);
+    HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos);
+
+    r.set(fpos,extractCurrentField(p, null, hcatFieldSchema,false));
+  }
+
+  /**
+   * Utility method to extract current expected field from given JsonParser
+   * 
+   * To get the field, we need either a type or a hcatFieldSchema(necessary for complex types)
+   * It is possible that one of them can be null, and so, if so, the other is instantiated

+   * from the other
+   * 
+   * isTokenCurrent is a boolean variable also passed in, which determines 
+   * if the JsonParser is already at the token we expect to read next, or 
+   * needs advancing to the next before we read.
+   */
+  private Object extractCurrentField(JsonParser p, Type t,
+      HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws IOException, JsonParseException,
+      HCatException {
+    Object val = null;
+    JsonToken valueToken;
+    if (isTokenCurrent){
+      valueToken = p.getCurrentToken();
+    } else {
+      valueToken = p.nextToken();
+    }
+
+    if (hcatFieldSchema != null){
+      t = hcatFieldSchema.getType();
+    }
+    switch(t) {
+    case INT:
+      val = (valueToken == JsonToken.VALUE_NULL)?null:p.getIntValue();
+      break;
+    case TINYINT:
+      val = (valueToken == JsonToken.VALUE_NULL)?null:p.getByteValue();
+      break;
+    case SMALLINT:
+      val = (valueToken == JsonToken.VALUE_NULL)?null:p.getShortValue();
+      break;
+    case BIGINT:
+      val = (valueToken == JsonToken.VALUE_NULL)?null:p.getLongValue();
+      break;
+    case BOOLEAN:
+      String bval = (valueToken == JsonToken.VALUE_NULL)?null:p.getText();
+      val = (bval.equalsIgnoreCase("true"));
+      break;
+    case FLOAT:
+      val = (valueToken == JsonToken.VALUE_NULL)?null:p.getFloatValue();
+      break;
+    case DOUBLE:
+      val = (valueToken == JsonToken.VALUE_NULL)?null:p.getDoubleValue();
+      break;
+    case STRING:
+      val = (valueToken == JsonToken.VALUE_NULL)?null:p.getText();
+      break;
+    case BINARY:
+      throw new IOException("JsonSerDe does not support BINARY type");
+    case ARRAY:
+      if (valueToken != JsonToken.START_ARRAY){
+        throw new IOException("Start of Array expected");
+      }
+      List<Object> arr = new ArrayList<Object>();
+      while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) {
+        arr.add(extractCurrentField(p, null,hcatFieldSchema.getArrayElementSchema().get(0),true));
+      }
+      val = arr;
+      break;
+    case MAP:
+      if (valueToken != JsonToken.START_OBJECT){
+        throw new IOException("Start of Object expected");
+      }
+      Map<Object,Object> map = new LinkedHashMap<Object,Object>();
+      Type keyType = hcatFieldSchema.getMapKeyType();
+      HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0);
+      while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
+        Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(),keyType);
+        Object v;
+        if (valueSchema.getType() == HCatFieldSchema.Type.STRUCT){
+          v = extractCurrentField(p,null, valueSchema,false); 
+        } else {
+          v = extractCurrentField(p,null, valueSchema,true);
+        }
+
+        map.put(k, v);
+      }
+      val = map;
+      break;
+    case STRUCT:
+      if (valueToken != JsonToken.START_OBJECT){
+        throw new IOException("Start of Object expected");
+      }
+      HCatSchema subSchema = hcatFieldSchema.getStructSubSchema();
+      int sz = subSchema.getFieldNames().size();
+      
+      List<Object> struct = new ArrayList<Object>(Collections.nCopies(sz, null));
+      while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) {
+        populateRecord(struct, valueToken, p, subSchema);
+      }
+      val = struct;
+      break;
+    }
+    return val;
+  }
+
+  private Object getObjectOfCorrespondingPrimitiveType(String s, Type t) throws IOException
{
+    switch(t) {
+    case INT:
+      return Integer.valueOf(s);
+    case TINYINT:
+      return Byte.valueOf(s);
+    case SMALLINT:
+      return Short.valueOf(s);
+    case BIGINT:
+      return Long.valueOf(s);
+    case BOOLEAN:
+      return (s.equalsIgnoreCase("true"));
+    case FLOAT:
+      return Float.valueOf(s);
+    case DOUBLE:
+      return Double.valueOf(s);
+    case STRING:
+      return s;
+    case BINARY:
+      throw new IOException("JsonSerDe does not support BINARY type");
+    }
+    throw new IOException("Could not convert from string to map type "+t);
+  }
+
+  /**
+   * Given an object and object inspector pair, traverse the object
+   * and generate a Text representation of the object.
+   */
+  @Override
+  public Writable serialize(Object obj, ObjectInspector objInspector)
+      throws SerDeException {
+    StringBuilder sb = new StringBuilder();
+    try {
+      buildJSONString(sb, obj, objInspector);
+    } catch (IOException e) {
+      LOG.warn("Error ["+ e.getMessage()+"] generating json text from object");
+      throw new SerDeException(e);
+    }
+    return new Text(sb.toString());
+  }
+
+  // TODO : code section copied over from SerDeUtils because of non-standard json production
there
+  // should use quotes for all field names. We should fix this there, and then remove this
copy.
+  // See http://jackson.codehaus.org/1.7.3/javadoc/org/codehaus/jackson/JsonParser.Feature.html#ALLOW_UNQUOTED_FIELD_NAMES
+  // for details - trying to enable Jackson to ignore that doesn't seem to work(compilation
failure
+  // when attempting to use that feature, so having to change the production itself.
+  // Also, throws IOException when Binary is detected.
+  private static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi) throws
IOException {
+
+    switch (oi.getCategory()) {
+    case PRIMITIVE: {
+      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+      if (o == null) {
+        sb.append("null");
+      } else {
+        switch (poi.getPrimitiveCategory()) {
+        case BOOLEAN: {
+          boolean b = ((BooleanObjectInspector) poi).get(o);
+          sb.append(b ? "true" : "false");
+          break;
+        }
+        case BYTE: {
+          sb.append(((ByteObjectInspector) poi).get(o));
+          break;
+        }
+        case SHORT: {
+          sb.append(((ShortObjectInspector) poi).get(o));
+          break;
+        }
+        case INT: {
+          sb.append(((IntObjectInspector) poi).get(o));
+          break;
+        }
+        case LONG: {
+          sb.append(((LongObjectInspector) poi).get(o));
+          break;
+        }
+        case FLOAT: {
+          sb.append(((FloatObjectInspector) poi).get(o));
+          break;
+        }
+        case DOUBLE: {
+          sb.append(((DoubleObjectInspector) poi).get(o));
+          break;
+        }
+        case STRING: {
+          sb.append('"');
+          sb.append(SerDeUtils.escapeString(((StringObjectInspector) poi)
+              .getPrimitiveJavaObject(o)));
+          sb.append('"');
+          break;
+        }
+        case TIMESTAMP: {
+          sb.append('"');
+          sb.append(((TimestampObjectInspector) poi)
+              .getPrimitiveWritableObject(o));
+          sb.append('"');
+          break;
+        }
+        case BINARY: {
+          throw new IOException("JsonSerDe does not support BINARY type");
+        }
+        default:
+          throw new RuntimeException("Unknown primitive type: "
+              + poi.getPrimitiveCategory());
+        }
+      }
+      break;
+    }
+    case LIST: {
+      ListObjectInspector loi = (ListObjectInspector) oi;
+      ObjectInspector listElementObjectInspector = loi
+          .getListElementObjectInspector();
+      List<?> olist = loi.getList(o);
+      if (olist == null) {
+        sb.append("null");
+      } else {
+        sb.append(SerDeUtils.LBRACKET);
+        for (int i = 0; i < olist.size(); i++) {
+          if (i > 0) {
+            sb.append(SerDeUtils.COMMA);
+          }
+          buildJSONString(sb, olist.get(i), listElementObjectInspector);
+        }
+        sb.append(SerDeUtils.RBRACKET);
+      }
+      break;
+    }
+    case MAP: {
+      MapObjectInspector moi = (MapObjectInspector) oi;
+      ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector();
+      ObjectInspector mapValueObjectInspector = moi
+          .getMapValueObjectInspector();
+      Map<?, ?> omap = moi.getMap(o);
+      if (omap == null) {
+        sb.append("null");
+      } else {
+        sb.append(SerDeUtils.LBRACE);
+        boolean first = true;
+        for (Object entry : omap.entrySet()) {
+          if (first) {
+            first = false;
+          } else {
+            sb.append(SerDeUtils.COMMA);
+          }
+          Map.Entry<?, ?> e = (Map.Entry<?, ?>) entry;
+          StringBuilder keyBuilder = new StringBuilder();
+          buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector);
+          String keyString = keyBuilder.toString().trim();
+          boolean doQuoting =  (!keyString.isEmpty()) && (keyString.charAt(0)!= SerDeUtils.QUOTE);
+          if (doQuoting ){
+            sb.append(SerDeUtils.QUOTE);
+          }
+          sb.append(keyString);
+          if (doQuoting ){
+            sb.append(SerDeUtils.QUOTE);
+          }
+          sb.append(SerDeUtils.COLON);
+          buildJSONString(sb, e.getValue(), mapValueObjectInspector);
+        }
+        sb.append(SerDeUtils.RBRACE);
+      }
+      break;
+    }
+    case STRUCT: {
+      StructObjectInspector soi = (StructObjectInspector) oi;
+      List<? extends StructField> structFields = soi.getAllStructFieldRefs();
+      if (o == null) {
+        sb.append("null");
+      } else {
+        sb.append(SerDeUtils.LBRACE);
+        for (int i = 0; i < structFields.size(); i++) {
+          if (i > 0) {
+            sb.append(SerDeUtils.COMMA);
+          }
+          sb.append(SerDeUtils.QUOTE);
+          sb.append(structFields.get(i).getFieldName());
+          sb.append(SerDeUtils.QUOTE);
+          sb.append(SerDeUtils.COLON);
+          buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)),
+              structFields.get(i).getFieldObjectInspector());
+        }
+        sb.append(SerDeUtils.RBRACE);
+      }
+      break;
+    }
+    case UNION: {
+      UnionObjectInspector uoi = (UnionObjectInspector) oi;
+      if (o == null) {
+        sb.append("null");
+      } else {
+        sb.append(SerDeUtils.LBRACE);
+        sb.append(uoi.getTag(o));
+        sb.append(SerDeUtils.COLON);
+        buildJSONString(sb, uoi.getField(o),
+              uoi.getObjectInspectors().get(uoi.getTag(o)));
+        sb.append(SerDeUtils.RBRACE);
+      }
+      break;
+    }
+    default:
+      throw new RuntimeException("Unknown type in ObjectInspector!");
+    }
+  }
+
+  
+  /**
+   *  Returns an object inspector for the specified schema that
+   *  is capable of reading in the object representation of the JSON string
+   */
+  @Override
+  public ObjectInspector getObjectInspector() throws SerDeException {
+    return cachedObjectInspector;
+  }
+
+  @Override
+  public Class<? extends Writable> getSerializedClass() {
+    return Text.class;
+  }
+
+  @Override
+  public SerDeStats getSerDeStats() {
+    // no support for statistics yet
+    return null;
+  }
+
+}

Added: incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestJsonSerDe.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestJsonSerDe.java?rev=1292568&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestJsonSerDe.java (added)
+++ incubator/hcatalog/trunk/src/test/org/apache/hcatalog/data/TestJsonSerDe.java Wed Feb
22 22:56:49 2012
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hcatalog.data;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.io.Writable;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+public class TestJsonSerDe extends TestCase{
+
+  public Map<Properties,HCatRecord> getData(){
+    Map<Properties,HCatRecord> data = new HashMap<Properties,HCatRecord>();
+
+    List<Object> rlist = new ArrayList<Object>(11);
+    rlist.add(new Byte("123"));
+    rlist.add(new Short("456"));
+    rlist.add(new Integer(789));
+    rlist.add(new Long(1000L));
+    rlist.add(new Double(5.3D));
+    rlist.add(new Float(2.39F));
+    rlist.add(new String("hcat and hadoop"));
+    rlist.add(null);
+
+    List<Object> innerStruct = new ArrayList<Object>(2);
+    innerStruct.add(new String("abc"));
+    innerStruct.add(new String("def"));
+    rlist.add(innerStruct);
+    
+    List<Integer> innerList = new ArrayList<Integer>();
+    innerList.add(314);
+    innerList.add(007);
+    rlist.add(innerList);
+    
+    Map<Short, String> map = new HashMap<Short, String>(3);
+    map.put(new Short("2"), "hcat is cool");
+    map.put(new Short("3"), "is it?");
+    map.put(new Short("4"), "or is it not?");
+    rlist.add(map);
+
+    rlist.add(new Boolean(true));
+    
+    List<Object> c1 = new ArrayList<Object>();
+      List<Object> c1_1 = new ArrayList<Object>();
+      c1_1.add(new Integer(12));
+        List<Object> i2 = new ArrayList<Object>();
+          List<Integer> ii1 = new ArrayList<Integer>();
+            ii1.add(new Integer(13));
+            ii1.add(new Integer(14));
+          i2.add(ii1);
+          Map<String,List<?>> ii2 = new HashMap<String,List<?>>();
+            List<Integer> iii1 = new ArrayList<Integer>();
+              iii1.add(new Integer(15));
+            ii2.put("phew", iii1);
+          i2.add(ii2);
+      c1_1.add(i2);
+      c1.add(c1_1);
+    rlist.add(c1);
+
+    String typeString = 
+        "tinyint,smallint,int,bigint,double,float,string,string,"
+        + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean,"
+        + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>";
+    Properties props = new Properties();
+    
+    props.put(Constants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1");
+    props.put(Constants.LIST_COLUMN_TYPES, typeString);
+//    props.put(Constants.SERIALIZATION_NULL_FORMAT, "\\N");
+//    props.put(Constants.SERIALIZATION_FORMAT, "1");
+
+    data.put(props, new DefaultHCatRecord(rlist));
+    return data;
+  }
+
+  public void testRW() throws Exception {
+
+    Configuration conf = new Configuration();
+
+    for (Entry<Properties,HCatRecord> e : getData().entrySet()){
+      Properties tblProps = e.getKey();
+      HCatRecord r = e.getValue();
+      
+      HCatRecordSerDe hrsd = new HCatRecordSerDe();
+      hrsd.initialize(conf, tblProps);
+
+      JsonSerDe jsde = new JsonSerDe();
+      jsde.initialize(conf, tblProps);
+      
+      System.out.println("ORIG:"+r.toString());
+
+      Writable s = hrsd.serialize(r,hrsd.getObjectInspector());
+      System.out.println("ONE:"+s.toString());
+      
+      Object o1 = hrsd.deserialize(s);
+      assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o1));
+      
+      Writable s2 = jsde.serialize(o1, hrsd.getObjectInspector());
+      System.out.println("TWO:"+s2.toString());
+      Object o2 = jsde.deserialize(s2);
+      System.out.println("deserialized TWO : "+o2);
+      
+      assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2));
+    }
+
+  }
+
+}



Mime
View raw message