hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dhr...@apache.org
Subject svn commit: r706704 [23/23] - in /hadoop/core/trunk: ./ src/contrib/hive/ src/contrib/hive/bin/ src/contrib/hive/cli/src/java/org/apache/hadoop/hive/cli/ src/contrib/hive/common/src/java/org/apache/hadoop/hive/conf/ src/contrib/hive/conf/ src/contrib/h...
Date Tue, 21 Oct 2008 18:11:18 GMT
Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/thrift_grammar.jjt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/thrift_grammar.jjt?rev=706704&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/thrift_grammar.jjt (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/thrift_grammar.jjt Tue Oct 21 11:11:05 2008
@@ -0,0 +1,866 @@
+options {
+ MULTI=true;
+ STATIC = false;
+ NODE_PREFIX = "DynamicSerDe";
+}
+
+
+PARSER_BEGIN(thrift_grammar)
+
+package org.apache.hadoop.hive.serde2.dynamic_type;
+
+import java.util.*;
+import java.io.*;
+import java.net.*;
+import com.facebook.thrift.protocol.*;
+import com.facebook.thrift.transport.*;
+import org.apache.hadoop.hive.serde2.dynamic_type.*;
+
+public class thrift_grammar {
+
+    private List<String> include_path = null;
+
+    // for computing the autogenerated field ids in thrift
+    private  int field_val;
+
+    // store types and tables
+    // separately because one cannot use a table (ie service.method) as a Struct like type.
+    protected Map<String,DynamicSerDeSimpleNode> types;
+    protected Map<String,DynamicSerDeSimpleNode> tables;
+
+    // system include path
+    final private static String default_include_path[] = {  "/usr/local/include","/usr/include","/usr/local/include/thrift/if","/usr/local/include/fb303/if" };
+
+    // need three params to differentiate between this and 2 param method auto generated since
+    // some calls in the autogenerated code use null param for 2nd param and thus ambiguous.
+    protected thrift_grammar(InputStream is, List<String> include_path, boolean junk) {
+        this(is,null);
+        this.types = new HashMap<String,DynamicSerDeSimpleNode> () ;
+        this.tables = new HashMap<String,DynamicSerDeSimpleNode> () ;
+        this.include_path = include_path;
+        this.field_val = -1;
+    }
+
+    // find the file on the include path
+    private static File findFile(String fname, List<String> include_path) {
+        for(String path: include_path) {
+            final String full = path + "/" + fname;
+            File f = new File(full);
+            if(f.exists()) {
+                return f;
+            }
+        }
+        return null;
+    }
+
+    public static void main(String args[]) {
+        String filename = null;
+        List<String> include_path = new ArrayList<String>();
+
+        for(String path: default_include_path)  {
+            include_path.add(path);
+        }
+        for(int i = 0; i < args.length; i++) {
+            String arg = args[i];
+            if(arg.equals("--include") && i + 1 < args.length) {
+                include_path.add(args[++i]);
+            }
+            if(arg.equals("--file") && i + 1 < args.length) {
+                filename = args[++i];
+            }
+        }
+
+        InputStream is = System.in;
+        if(filename != null) {
+            try {
+                  is = new FileInputStream(findFile(filename, include_path));
+            } catch(IOException e) {
+            }
+        }
+        thrift_grammar t = new thrift_grammar(is,include_path,false);
+
+          try {
+            t.Start();
+        } catch (Exception e) {
+            System.out.println("Parse error.");
+            System.out.println(e.getMessage());
+            e.printStackTrace();
+        }
+    }
+}
+
+PARSER_END(thrift_grammar)
+
+
+
+SKIP :
+{
+  " "
+| "\t"
+| "\n"
+| "\r"
+| <"#"(~["\n"])* ("\n"|"\r"|"\r\n")>
+| <"//" (~["\n","\r"])* ("\n"|"\r"|"\r\n")>
+| <"/*" (~["*"])* "*" (~["/"] (~["*"])* "*")* "/">
+}
+
+
+/**
+ * HELPER DEFINITIONS, COMMENTS, CONSTANTS, AND WHATNOT
+ */
+
+TOKEN:
+{
+<tok_const: "const">|
+<tok_namespace : "namespace"> |
+<tok_cpp_namespace: "cpp_namespace">|
+<tok_cpp_include : "cpp_include">|
+<tok_cpp_type: "cpp_type">|
+<tok_java_package : "java_package">|
+<tok_cocoa_prefix: "cocoa_prefix">|
+<tok_csharp_namespace: "csharp_namespace">|
+<tok_php_namespace: "php_namespace">|
+<tok_py_module: "py_module">|
+<tok_perl_package: "perl_package">|
+<tok_ruby_namespace: "ruby_namespace">|
+<tok_smalltalk_category: "smalltalk_category">|
+<tok_smalltalk_prefix: "smalltalk_prefix">|
+<tok_xsd_all: "xsd_all">|
+<tok_xsd_optional: "xsd_optional">|
+<tok_xsd_nillable: "xsd_nillable">|
+<tok_xsd_namespace: "xsd_namespace">|
+<tok_xsd_attrs: "xsd_attrs">|
+<tok_include : "include">|
+<tok_void : "void">|
+<tok_bool : "bool">|
+<tok_byte: "byte">|
+<tok_i16: "i16">|
+<tok_i32: "i32">|
+<tok_i64: "i64">|
+<tok_double: "double">|
+<tok_string: "string">|
+<tok_slist : "slist">|
+<tok_senum: "senum">|
+<tok_map: "map"> |
+<tok_list: "list"> |
+<tok_set: "set"> |
+<tok_async: "async"> |
+<tok_typedef: "typedef"> |
+<tok_struct: "struct"> |
+<tok_exception: "exception"> |
+<tok_extends: "extends"> |
+<tok_throws: "throws"> |
+<tok_service: "service"> |
+<tok_enum: "enum"> |
+<tok_required: "required"> |
+<tok_optional: "optional">
+}
+
+TOKEN: {
+
+<tok_int_constant :  (["+","-"])?(["0"-"9"])+>
+|
+<tok_double_constant:   ["+","-"](<DIGIT>)*"."(<DIGIT>)+(["e","E"](["+","-"])?(<DIGIT>)+)?>
+|
+<IDENTIFIER: <LETTER>(<LETTER>|<DIGIT>|"."|"_")*>
+|
+<#LETTER: (["a"-"z", "A"-"Z" ]) >
+|
+<#DIGIT: ["0"-"9"] >
+|
+<tok_literal:        "\""(~["\""])*"\""|"'"(~["'"])*"'">
+|
+<tok_st_identifier: ["a"-"z","A"-"Z","-"]([".","a"-"z","A"-"Z","_","0"-"9","-"])*>
+}
+
+
+SimpleNode Start() : {}
+{
+  HeaderList()  (Definition())+
+ {
+    return jjtThis;
+  }
+}
+
+SimpleNode HeaderList() : {}
+{
+  (Header())*
+ {
+    return jjtThis;
+  }
+
+}
+
+SimpleNode Header() : {}
+{
+  Include()
+    {
+    return jjtThis;
+    }
+| Namespace()
+    {
+    return jjtThis;
+    }
+}
+
+SimpleNode Namespace() : {}
+{
+  <tok_namespace> <IDENTIFIER> <IDENTIFIER>
+{
+  return jjtThis;
+}
+|
+<tok_cpp_namespace> <IDENTIFIER>
+{
+  return jjtThis;
+}
+|
+<tok_cpp_include> <tok_literal>
+{
+  return jjtThis;
+}
+|
+<tok_php_namespace> <IDENTIFIER>
+{
+  return jjtThis;
+}
+|
+<tok_py_module> <IDENTIFIER>
+{
+  return jjtThis;
+}
+|
+<tok_perl_package> <IDENTIFIER>
+{
+  return jjtThis;
+}
+|
+<tok_ruby_namespace> <IDENTIFIER>
+{
+  return jjtThis;
+}
+|
+<tok_smalltalk_category> <tok_st_identifier>
+{
+  return jjtThis;
+}
+|
+<tok_smalltalk_prefix> <IDENTIFIER>
+{
+  return jjtThis;
+}
+|
+<tok_java_package> <IDENTIFIER>
+{
+  return jjtThis;
+}
+|
+<tok_cocoa_prefix> <IDENTIFIER>
+{
+  return jjtThis;
+}
+|
+<tok_xsd_namespace> <tok_literal>
+{
+  return jjtThis;
+}
+|
+<tok_csharp_namespace> <IDENTIFIER>
+{
+  return jjtThis;
+}
+}
+
+
+SimpleNode Include() :  {
+ String fname;
+ boolean found = false;
+}
+{
+   <tok_include>
+   fname=<tok_literal>.image
+{
+    // bugbug somewhat fragile below substring expression
+    fname = fname.substring(1,fname.length() - 1);
+
+    // try to find the file on the include path
+    File f = thrift_grammar.findFile(fname, this.include_path);
+    if(f != null) {
+        found = true;
+        try {
+            FileInputStream fis = new FileInputStream(f);
+            thrift_grammar t = new thrift_grammar(fis,this.include_path, false);
+            t.Start();
+            fis.close();
+            found = true;
+            // add in what we found to our type and table tables.
+            this.tables.putAll(t.tables);
+            this.types.putAll(t.types);
+        } catch (Exception e) {
+            System.out.println("File: " + fname + " - Oops.");
+            System.out.println(e.getMessage());
+            e.printStackTrace();
+        }
+    }
+    if(!found) {
+        throw new RuntimeException("include file not found: " + fname);
+    }
+    return jjtThis;
+}
+}
+
+
+SimpleNode Definition() : {}
+{
+  Const()
+    {
+    return jjtThis;
+    }
+| Service()
+    {
+    return jjtThis;
+    }
+| TypeDefinition()
+    {
+    return jjtThis;
+    }
+}
+
+SimpleNode TypeDefinition() : {}
+{
+  Typedef()
+    {
+   return jjtThis;
+    }
+| Enum()
+    {
+   return jjtThis;
+    }
+| Senum()
+    {
+   return jjtThis;
+    }
+| Struct()
+    {
+   return jjtThis;
+    }
+| Xception()
+    {
+   return jjtThis;
+    }
+
+}
+
+DynamicSerDeTypedef Typedef() : {}
+{
+  <tok_typedef>
+  DefinitionType()
+  jjtThis.name = <IDENTIFIER>.image
+    {
+        // store the type for later retrieval
+        this.types.put(jjtThis.name, jjtThis);
+        return jjtThis;
+    }
+}
+
+
+// returning void because we ignore this production.
+void CommaOrSemicolon() : {}
+{
+  ","
+|
+  ";"
+{
+}
+}
+
+SimpleNode Enum() : {}
+{
+  <tok_enum> <IDENTIFIER> "{" EnumDefList() "}"
+    {
+    return jjtThis;
+    }
+}
+
+SimpleNode EnumDefList() : {}
+{
+  (EnumDef())+
+    {
+    return jjtThis;
+    }
+}
+
+SimpleNode EnumDef() : {}
+{
+  <IDENTIFIER> ["=" <tok_int_constant>] [CommaOrSemicolon()]
+    {
+    return jjtThis;
+    }
+}
+
+SimpleNode Senum() : {}
+{
+  <tok_senum> <IDENTIFIER> "{" SenumDefList() "}"
+    {
+    return jjtThis;
+    }
+}
+
+SimpleNode SenumDefList() : {}
+{
+  (SenumDef())+
+    {
+    return jjtThis;
+    }
+}
+
+SimpleNode SenumDef() : {}
+{
+  <tok_literal> [CommaOrSemicolon()]
+    {
+    return jjtThis;
+    }
+}
+
+
+SimpleNode Const() : {}
+{
+  <tok_const> FieldType() <IDENTIFIER> "=" ConstValue() [CommaOrSemicolon()]
+    {
+    return jjtThis;
+    }
+}
+
+SimpleNode ConstValue() : {}
+{
+  <tok_int_constant>
+    {
+    }
+| <tok_double_constant>
+    {
+    }
+| <tok_literal>
+    {
+    }
+| <IDENTIFIER>
+    {
+    }
+| ConstList()
+    {
+    }
+| ConstMap()
+    {
+    return jjtThis;
+    }
+}
+
+SimpleNode ConstList() : {}
+{
+  "[" ConstListContents() "]"
+    {
+    return jjtThis;
+    }
+}
+
+SimpleNode ConstListContents() : {}
+{
+  (ConstValue() [CommaOrSemicolon()])+
+    {
+    return jjtThis;
+    }
+}
+
+SimpleNode ConstMap() : {}
+{
+  "{" ConstMapContents() "}"
+    {
+    return jjtThis;
+    }
+}
+
+SimpleNode ConstMapContents() : {}
+{
+  (ConstValue() ":" ConstValue() [CommaOrSemicolon()])+
+    {
+    }
+|
+    {
+    return jjtThis;
+    }
+}
+
+DynamicSerDeStruct Struct() :  {
+
+}
+{
+  <tok_struct>
+  jjtThis.name = <IDENTIFIER>.image
+  "{"
+  FieldList()
+  "}"
+    {
+   this.types.put(jjtThis.name,jjtThis);
+    return jjtThis;
+    }
+}
+
+
+SimpleNode Xception() : {}
+{
+  <tok_exception> <IDENTIFIER> "{" FieldList() "}"
+    {
+    return jjtThis;
+    }
+}
+
+
+SimpleNode Service() : {}
+{
+  <tok_service>
+  <IDENTIFIER>
+  Extends()
+  "{"
+  FlagArgs()
+  (Function())+
+  UnflagArgs()
+  "}"
+    {
+        // at some point, these should be inserted as a "db"
+        return jjtThis;
+    }
+}
+
+SimpleNode FlagArgs() : {}
+{
+    {
+    return jjtThis;
+    }
+}
+
+SimpleNode UnflagArgs() : {}
+{
+    {
+    return jjtThis;
+    }
+}
+
+SimpleNode Extends() : {}
+{
+  <tok_extends> <IDENTIFIER>
+    {
+    return jjtThis;
+    }
+|
+    {
+    return jjtThis;
+    }
+}
+
+
+DynamicSerDeFunction Function() : {}
+{
+  // metastore ignores async and type
+  Async()
+  FunctionType()
+
+  // the name of the function/table
+  jjtThis.name = <IDENTIFIER>.image
+  "("
+  FieldList()
+  ")"
+  Throws()
+  [CommaOrSemicolon()]
+
+    {
+        this.tables.put(jjtThis.name, jjtThis);
+        return jjtThis;
+    }
+}
+
+void Async() : {}
+{
+  <tok_async>
+|
+{}
+}
+
+void Throws() : {}
+{
+  <tok_throws> "(" FieldList() ")"
+|
+{}
+}
+
+
+// nothing special - just use the DynamicSerDeFieldList's children methods to access the fields
+DynamicSerDeFieldList FieldList() : {
+   this.field_val = -1;
+}
+{
+  (Field())*    {
+    return jjtThis;
+  }
+}
+
+
+DynamicSerDeField Field() : {
+
+ String fidnum = "";
+ String fid;
+}
+{
+
+  // parse the field id which is optional
+  [fidnum=<tok_int_constant>.image ":"]
+
+  // is this field required or optional? default is optional
+  FieldRequiredness()
+
+  // field type - obviously not optional
+  FieldType()
+
+   // the name of the field - not optional
+  jjtThis.name = <IDENTIFIER>.image
+
+  // does it have = some value?
+  FieldValue()
+
+  // take it or leave it
+  [CommaOrSemicolon()]
+
+    {
+    if(fidnum.length() > 0) {
+       int fidInt = Integer.valueOf(fidnum);
+       jjtThis.fieldid = fidInt;
+    } else {
+       jjtThis.fieldid = this.field_val--;
+    }
+    return jjtThis;
+    }
+}
+
+
+
+SimpleNode FieldRequiredness() : {}
+{
+  <tok_required>
+    {
+        return jjtThis;
+    }
+| <tok_optional>
+    {
+        return jjtThis;
+    }
+|
+   {
+    return jjtThis;
+   }
+}
+
+SimpleNode FieldValue() : {}
+{
+  "="
+     ConstValue()
+    {
+        return jjtThis;
+    }
+|
+{
+        return jjtThis;
+}
+}
+
+SimpleNode DefinitionType() : {}
+{
+//  BaseType() xxx
+  TypeString()
+    {
+    return jjtThis;
+    }
+| TypeBool()
+    {
+    return jjtThis;
+    }
+| Typei16()
+    {
+    return jjtThis;
+    }
+| Typei32()
+    {
+    return jjtThis;
+    }
+| Typei64()
+    {
+    return jjtThis;
+    }
+| TypeDouble()
+    {
+    return jjtThis;
+    }
+|  TypeMap()
+    {
+    return jjtThis;
+    }
+| TypeSet()
+    {
+    return jjtThis;
+    }
+| TypeList()
+    {
+    return jjtThis;
+    }
+}
+
+void FunctionType() : {}
+{
+  FieldType()
+| <tok_void>
+{}
+}
+
+DynamicSerDeFieldType FieldType() : {
+}
+
+{
+  TypeString()
+    {
+    return jjtThis;
+    }
+| TypeBool()
+    {
+    return jjtThis;
+    }
+| Typei16()
+    {
+    return jjtThis;
+    }
+| Typei32()
+    {
+    return jjtThis;
+    }
+| Typei64()
+    {
+    return jjtThis;
+    }
+| TypeDouble()
+    {
+    return jjtThis;
+    }
+|
+   TypeMap()
+    {
+    return jjtThis;
+    }
+|
+   TypeSet()
+    {
+    return jjtThis;
+    }
+|
+   TypeList()
+    {
+    return jjtThis;
+    }
+|
+  jjtThis.name = <IDENTIFIER>.image
+    {
+    return jjtThis;
+    }
+}
+
+DynamicSerDeTypeString TypeString() : {}
+{
+  <tok_string>
+  {
+  return jjtThis;
+  }
+}
+
+DynamicSerDeTypeByte TypeByte() : {
+}
+{
+   <tok_byte>
+   {
+   return jjtThis;
+   }
+}
+
+DynamicSerDeTypei16 Typei16() : {
+}
+{
+  <tok_i16>
+  {
+   return jjtThis;
+  }
+}
+
+DynamicSerDeTypei32 Typei32() : {}
+{
+  <tok_i32>
+  {
+   return jjtThis;
+  }
+}
+
+DynamicSerDeTypei64 Typei64() : {}
+{
+  <tok_i64>
+  {
+   return jjtThis;
+  }
+}
+
+DynamicSerDeTypeDouble TypeDouble() : {}
+{
+  <tok_double>
+  {
+   return jjtThis;
+  }
+}
+
+DynamicSerDeTypeBool TypeBool() : {}
+{
+  <tok_bool>
+  {
+   return jjtThis;
+  }
+}
+
+DynamicSerDeTypeMap TypeMap() : {}
+{
+  <tok_map>
+   "<"
+  FieldType()
+  ","
+  FieldType()
+   ">"
+    {
+    return jjtThis;
+    }
+}
+
+DynamicSerDeTypeSet TypeSet() : {}
+{
+  <tok_set>
+  "<"
+
+  FieldType()
+
+  ">"
+    {
+    return jjtThis;
+    }
+}
+
+DynamicSerDeTypeList TypeList() : {}
+{
+  <tok_list>
+  "<"
+
+  FieldType()
+
+  ">"
+    {
+    return jjtThis;
+    }
+}

Modified: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java?rev=706704&r1=706703&r2=706704&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java Tue Oct 21 11:11:05 2008
@@ -116,11 +116,16 @@
     if (data == null) {
       return null;
     }
+    if (!(fieldRef instanceof MyField)) {
+      throw new RuntimeException("fieldRef has to be of MyField");
+    }
+    MyField f = (MyField) fieldRef;
     try {
-      MyField f = (MyField) fieldRef;
-      return f.field.get(data);
+      Object r = f.field.get(data);
+      return r;
     } catch (Exception e) {
-      throw new RuntimeException(e); 
+      throw new RuntimeException("cannot get field " + f.field + " from " 
+    		  + data.getClass() + " " + data); 
     }
   }
   public List<Object> getStructFieldsDataAsList(Object data) {

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java?rev=706704&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ConfigurableTProtocol.java Tue Oct 21 11:11:05 2008
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.thrift;
+
+import org.apache.hadoop.conf.Configuration;
+import java.util.Properties;
+import com.facebook.thrift.TException;
+
+/**
+ * An interface for TProtocols that need to have properties passed in to 
+ *  initialize them. e.g., separators for TCTLSeparatedProtocol.
+ *  If there was a regex like deserializer, the regex could be passed in
+ *  in this manner.
+ */
+public interface ConfigurableTProtocol {
+  /**
+   * Initialize the TProtocol
+   * @param conf System properties
+   * @param tbl  table properties
+   * @throws TException
+   */
+  public void initialize(Configuration conf, Properties tbl) throws TException;
+
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java?rev=706704&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/java/org/apache/hadoop/hive/serde2/thrift/TCTLSeparatedProtocol.java Tue Oct 21 11:11:05 2008
@@ -0,0 +1,576 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.hadoop.hive.serde2.thrift;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde.Constants;
+import com.facebook.thrift.TException;
+import com.facebook.thrift.transport.*;
+import com.facebook.thrift.*;
+import com.facebook.thrift.protocol.*;
+import java.util.*;
+import java.util.regex.Pattern;
+import java.io.*;
+import org.apache.hadoop.conf.Configuration;
+import java.util.Properties;
+
+/**
+ *
+ * An implementation of the Thrift Protocol for ctl separated
+ * records.
+ * This is not thrift compliant in that it doesn't write out field ids
+ * so things cannot actually be versioned.
+ */
+public class TCTLSeparatedProtocol extends TProtocol implements ConfigurableTProtocol {
+
+  final static Log LOG = LogFactory.getLog(TCTLSeparatedProtocol.class.getName());
+  
+  /**
+   * Factory for JSON protocol objects
+   */
+  public static class Factory implements TProtocolFactory {
+
+    public TProtocol getProtocol(TTransport trans) {
+      return new TCTLSeparatedProtocol(trans);
+    }
+
+  }
+
+  /**
+   * These are defaults, but for now leaving them like this
+   */
+  final static protected byte defaultPrimarySeparatorByte = 1;
+  final static protected byte defaultSecondarySeparatorByte = 2;
+  final static protected byte defaultRowSeparatorByte = (byte)'\n';
+  final static protected byte defaultMapSeparatorByte = 3;
+
+  /**
+   * The separators for this instance
+   */
+  protected byte primarySeparatorByte;
+  protected byte secondarySeparatorByte;
+  protected byte rowSeparatorByte;
+  protected byte mapSeparatorByte;
+  protected Pattern primaryPattern;
+  protected Pattern secondaryPattern;
+  protected Pattern mapPattern;
+
+  /**
+   * Inspect the separators this instance is configured with.
+   */
+  public byte getPrimarySeparator() { return primarySeparatorByte; }
+  public byte getSecondarySeparator() { return secondarySeparatorByte; }
+  public byte getRowSeparator() { return rowSeparatorByte; }
+  public byte getMapSeparator() { return mapSeparatorByte; }
+
+
+  /**
+   * The transport stream is tokenized on the row separator
+   */
+  protected SimpleTransportTokenizer transportTokenizer;
+
+  /**
+   * For a single row, the split on the primary separator
+   */
+  protected String columns[];
+
+  /**
+   * An index into what column we're on
+   */
+
+  protected int index;
+
+  /**
+   * For a single column, a split on the secondary separator
+   */
+  protected String fields[];
+
+  /**
+   * An index into what field within a column we're on
+   */
+  protected int innerIndex;
+
+
+  /**
+   * Is this the first field we're writing
+   */
+  protected boolean firstField;
+
+  /**
+   * Is this the first list/map/set field we're writing for the current element
+   */
+  protected boolean firstInnerField;
+
+
+  /**
+   * Are we writing a map and need to worry about k/v separator?
+   */
+  protected boolean isMap;
+
+
+  /**
+   * For writes, on what element are we on so we know when to use normal list separator or 
+   * for a map know when to use the k/v separator
+   */
+  protected long elemIndex;
+
+
+  /**
+   * Are we currently on the top-level columns or parsing a column itself
+   */
+  protected boolean inner;
+
+
+  /**
+   * For places where the separators are back to back, should we return a null or an empty string since it is ambiguous.
+   * This also applies to extra columns that are read but aren't in the current record.
+   */
+  protected boolean returnNulls;
+
+  /**
+   * The transport being wrapped.
+   *
+   */
+  final protected TTransport innerTransport;
+
+
+  /**
+   * Strings used to lookup the various configurable paramaters of this protocol.
+   */
+  public final static String ReturnNullsKey = "separators.return_nulls";
+  public final static String BufferSizeKey = "separators.buffer_size";
+
+  /**
+   * The size of the internal buffer to use.
+   */
+  protected int bufferSize;
+
+  /**
+   * A convenience class for tokenizing a TTransport
+   */
+
+  class SimpleTransportTokenizer {
+
+    TTransport trans;
+    StringTokenizer tokenizer;
+    final String separator;
+    byte buf[];
+
+    public SimpleTransportTokenizer(TTransport trans, byte separator, int buffer_length) {
+      this.trans = trans;
+      byte [] separators = new byte[1];
+      separators[0] = separator;
+      this.separator = new String(separators);
+      buf = new byte[buffer_length];
+      fillTokenizer();
+    }
+
+    private boolean fillTokenizer() {
+      try {
+          int length = trans.read(buf, 0, buf.length);
+          if(length <=0 ) {
+            tokenizer = new StringTokenizer("", separator, true);
+            return false;
+          }
+          String row = new String(buf, 0, length);
+          tokenizer = new StringTokenizer(row, new String(separator), true);
+        } catch(TTransportException e) {
+          e.printStackTrace();
+          tokenizer = null;
+          return false;
+        }
+        return true;
+    }
+
+    public String nextToken() throws EOFException {
+      StringBuffer ret = null;
+      boolean done = false;
+
+      while(! done) {
+
+        if(! tokenizer.hasMoreTokens()) {
+          if(! fillTokenizer()) {
+            break;
+          }
+        }
+
+        try {
+          final String nextToken = tokenizer.nextToken();
+
+          if(nextToken.equals(separator)) {
+            done = true;
+          } else if(ret == null) {
+            ret = new StringBuffer(nextToken);
+          } else {
+            ret.append(nextToken);
+          }
+        } catch(NoSuchElementException e) {
+          if (ret == null) {
+            throw new EOFException(e.getMessage());
+          }
+          done = true;
+        }
+      } // while ! done
+      return ret == null ? null : ret.toString();
+    }
+  };
+
+
+  /**
+   * The simple constructor which assumes ctl-a, ctl-b and '\n' separators and to return empty strings for empty fields.
+   *
+   * @param trans - the ttransport to use as input or output
+   *
+   */
+
+  public TCTLSeparatedProtocol(TTransport trans) {
+    this(trans, defaultPrimarySeparatorByte, defaultSecondarySeparatorByte, defaultMapSeparatorByte, defaultRowSeparatorByte, false, 4096);
+  }
+
+  public TCTLSeparatedProtocol(TTransport trans, int buffer_size) {
+    this(trans, defaultPrimarySeparatorByte, defaultSecondarySeparatorByte, defaultMapSeparatorByte, defaultRowSeparatorByte, false, buffer_size);
+  }
+
+  /**
+   * @param trans - the ttransport to use as input or output
+   * @param primarySeparatorByte the separator between columns (aka fields)
+   * @param secondarySeparatorByte the separator within a field for things like sets and maps and lists
+   * @param mapSeparatorByte - the key/value separator
+   * @param rowSeparatorByte - the record separator
+   * @param returnNulls - whether to return a null or an empty string for fields that seem empty (ie two primary separators back to back)
+   */
+
+  public TCTLSeparatedProtocol(TTransport trans, byte primarySeparatorByte, byte secondarySeparatorByte, byte mapSeparatorByte, byte rowSeparatorByte, 
+                               boolean returnNulls,
+                               int bufferSize) {
+    super(trans);
+
+    returnNulls = returnNulls;
+
+
+    this.primarySeparatorByte = primarySeparatorByte;
+    this.secondarySeparatorByte = secondarySeparatorByte;
+    this.rowSeparatorByte = rowSeparatorByte;
+    this.mapSeparatorByte = mapSeparatorByte;
+
+    this.innerTransport = trans;
+    this.bufferSize = bufferSize;
+
+    internalInitialize();
+  }
+
+
+  /**
+   * Sets the internal separator patterns and creates the internal tokenizer.
+   */
+  protected void internalInitialize() {
+    byte []primarySeparator = new byte[1];
+    byte []secondarySeparator = new byte[1];
+    primarySeparator[0] = primarySeparatorByte;
+    secondarySeparator[0] = secondarySeparatorByte;
+
+    primaryPattern = Pattern.compile(new String(primarySeparator));
+    secondaryPattern = Pattern.compile(new String(secondarySeparator));
+    mapPattern = Pattern.compile("\\0" + secondarySeparatorByte + "|\\0" + mapSeparatorByte);
+
+    transportTokenizer = new SimpleTransportTokenizer(innerTransport, rowSeparatorByte, bufferSize);
+  }
+
+  /**
+   * Initialize the TProtocol
+   * @param conf System properties
+   * @param tbl  table properties
+   * @throws TException
+   */
+  public void initialize(Configuration conf, Properties tbl) throws TException {
+    primarySeparatorByte = Byte.valueOf(tbl.getProperty(Constants.FIELD_DELIM, String.valueOf(primarySeparatorByte))).byteValue();
+    LOG.debug("collections delim=<" + tbl.getProperty(Constants.COLLECTION_DELIM) + ">" );
+    secondarySeparatorByte = Byte.valueOf(tbl.getProperty(Constants.COLLECTION_DELIM, String.valueOf(secondarySeparatorByte))).byteValue();
+    rowSeparatorByte = Byte.valueOf(tbl.getProperty(Constants.LINE_DELIM, String.valueOf(rowSeparatorByte))).byteValue();
+    mapSeparatorByte = Byte.valueOf(tbl.getProperty(Constants.MAPKEY_DELIM, String.valueOf(mapSeparatorByte))).byteValue();
+    returnNulls = Boolean.valueOf(tbl.getProperty(ReturnNullsKey, String.valueOf(returnNulls))).booleanValue();
+    bufferSize =  Integer.valueOf(tbl.getProperty(BufferSizeKey, String.valueOf(bufferSize))).intValue();
+
+    internalInitialize();
+
+  }
+
+  public void writeMessageBegin(TMessage message) throws TException {
+  }
+
+  public void writeMessageEnd() throws TException {
+  }
+
+  public void writeStructBegin(TStruct struct) throws TException {
+    firstField = true;
+  }
+
+  public void writeStructEnd() throws TException {
+    // We don't write rowSeparatorByte because that should be handled by file format.
+  }
+
+  public void writeFieldBegin(TField field) throws TException {
+    if(! firstField) {
+      writeByte(primarySeparatorByte);
+    }
+    firstField = false;
+  }
+
+  public void writeFieldEnd() throws TException {
+  }
+
+  public void writeFieldStop() {
+  }
+
+  public void writeMapBegin(TMap map) throws TException {
+    // nesting not allowed!
+    if(map.keyType == TType.STRUCT ||
+       map.keyType == TType.MAP ||
+       map.keyType == TType.LIST ||
+       map.keyType == TType.SET) {
+      throw new TException("Not implemented: nested structures");
+    }
+    // nesting not allowed!
+    if(map.valueType == TType.STRUCT ||
+       map.valueType == TType.MAP ||
+       map.valueType == TType.LIST ||
+       map.valueType == TType.SET) {
+      throw new TException("Not implemented: nested structures");
+    }
+
+    firstInnerField = true;
+    isMap = true;
+    inner = true;
+    elemIndex = 0;
+  }
+
+  public void writeMapEnd() throws TException {
+    isMap = false;
+    inner = false;
+  }
+
+  public void writeListBegin(TList list) throws TException {
+    if(list.elemType == TType.STRUCT ||
+       list.elemType == TType.MAP ||
+       list.elemType == TType.LIST ||
+       list.elemType == TType.SET) {
+      throw new TException("Not implemented: nested structures");
+    }
+    firstInnerField = true;
+    inner = true;
+  }
+
+  public void writeListEnd() throws TException {
+    inner = false;
+  }
+  
+  public void writeSetBegin(TSet set) throws TException {
+    if(set.elemType == TType.STRUCT ||
+       set.elemType == TType.MAP ||
+       set.elemType == TType.LIST ||
+       set.elemType == TType.SET) {
+      throw new TException("Not implemented: nested structures");
+    }
+    firstInnerField = true;
+    inner = true;
+  }
+
+  public void writeSetEnd() throws TException {
+    inner = false;
+  }
+
+  public void writeBool(boolean b) throws TException {
+    writeString(String.valueOf(b));
+  }
+
+  // for writing out single byte
+  private byte buf[] = new byte[1];
+  public void writeByte(byte b) throws TException {
+    buf[0] = b;
+    trans_.write(buf);
+  }
+
+  public void writeI16(short i16) throws TException {
+    writeString(String.valueOf(i16));
+  }
+
+  public void writeI32(int i32) throws TException {
+    writeString(String.valueOf(i32));
+  }
+
+  public void writeI64(long i64) throws TException {
+    writeString(String.valueOf(i64));
+  }
+
+  public void writeDouble(double dub) throws TException {
+    writeString(String.valueOf(dub));
+  }
+
+  public void writeString(String str) throws TException {
+    if(inner) {
+      if(!firstInnerField) {
+        // super hack city notice the mod plus only happens after firstfield hit, so == 0 is right.
+        if(isMap && elemIndex++ % 2 == 0) {
+          writeByte(mapSeparatorByte);
+        } else {
+          writeByte(secondarySeparatorByte);
+        }
+      } else {
+        firstInnerField = false;
+      }
+    }
+    final byte buf[] = str.getBytes();
+    trans_.write(buf, 0, buf.length);
+  }
+
+  public void writeBinary(byte[] bin) throws TException {
+    throw new TException("Ctl separated protocol cannot support writing Binary data!");
+  }
+
+  public TMessage readMessageBegin() throws TException {
+    return new TMessage(); 
+  }
+
+  public void readMessageEnd() throws TException {
+  }
+
+ public TStruct readStructBegin() throws TException {
+   assert(!inner);
+   try {
+     final String tmp = transportTokenizer.nextToken();
+     columns = primaryPattern.split(tmp);
+     index = 0;
+     return new TStruct();
+   } catch(EOFException e) {
+     return null;
+   }
+  }
+
+  public void readStructEnd() throws TException {
+    columns = null;
+  }
+
+  public TField readFieldBegin() throws TException {
+    assert( !inner);
+    TField f = new TField();
+    // slight hack to communicate to DynamicSerDe that the field ids are not being set but things are ordered.
+    f.type = -1;
+    return  f;
+  }
+
+  public void readFieldEnd() throws TException {
+    fields = null;
+  }
+
+  public TMap readMapBegin() throws TException {
+    assert( !inner);
+    TMap map = new TMap();
+    fields = mapPattern.split(columns[index++]);
+    if(fields != null) {
+      map.size = fields.length/2;
+    } else {
+      map.size = 0;
+    }
+    innerIndex = 0;
+    inner = true;
+    isMap = true;
+    return map;
+  }
+
+  public void readMapEnd() throws TException {
+    inner = false;
+    isMap = false;
+  }
+
+  public TList readListBegin() throws TException {
+    assert( !inner);
+    TList list = new TList();
+    fields = secondaryPattern.split(columns[index++]);
+    if(fields != null) {
+      list.size = fields.length ;
+    } else {
+      list.size = 0;
+    }
+    innerIndex = 0;
+    inner = true;
+    return list;
+  }
+
+  public void readListEnd() throws TException {
+    inner = false;
+  }
+
+  public TSet readSetBegin() throws TException {
+    assert( !inner);
+    TSet set = new TSet();
+    fields = secondaryPattern.split(columns[index++]);
+    if(fields != null) {
+      set.size = fields.length ;
+    } else {
+      set.size = 0;
+    }
+    inner = true;
+    innerIndex = 0;
+    return set;
+  }
+
+  public void readSetEnd() throws TException {
+    inner = false;
+  }
+  public boolean readBool() throws TException {
+    return Boolean.valueOf(readString()).booleanValue();
+  }
+
+  public byte readByte() throws TException {
+    return Byte.valueOf(readString()).byteValue();
+  }
+
+  public short readI16() throws TException {
+    return Short.valueOf(readString()).shortValue();
+  }
+
+  public int readI32() throws TException {
+    return Integer.valueOf(readString()).intValue();
+  }
+
+  public long readI64() throws TException {
+    return Long.valueOf(readString()).longValue();
+  }
+
+  public double readDouble() throws TException {
+    return Double.valueOf(readString()).doubleValue();
+  }
+
+  protected String [] curMapPair;
+  public String readString() throws TException {
+    String ret;
+    if(!inner) {
+      ret = columns != null && index < columns.length ? columns[index++] : null;
+    } else {
+      ret = fields != null && innerIndex < fields.length ? fields[innerIndex++] : null;
+    }
+    return ret == null && ! returnNulls ? "" : ret;
+  }
+
+  public byte[] readBinary() throws TException {
+    throw new TException("Not implemented for control separated data");
+  }
+}

Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde/TestSerDeUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde/TestSerDeUtils.java?rev=706704&r1=706703&r2=706704&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde/TestSerDeUtils.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde/TestSerDeUtils.java Tue Oct 21 11:11:05 2008
@@ -31,7 +31,7 @@
 
   public void testLookupSerDe() throws Exception {
     try {
-      String name = ThriftSerDe.shortName();
+      String name = ThriftSerDe.class.getName();
       SerDe s = SerDeUtils.lookupSerDe(name);
       assertTrue(s.getClass().getName().equals(org.apache.hadoop.hive.serde.thrift.ThriftSerDe.class.getName()));
     } catch(Exception e) {

Added: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java?rev=706704&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/TestTCTLSeparatedProtocol.java Tue Oct 21 11:11:05 2008
@@ -0,0 +1,245 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2;
+
+import junit.framework.TestCase;
+import java.io.*;
+import org.apache.hadoop.hive.serde2.*;
+import org.apache.hadoop.hive.serde2.thrift.*;
+import java.util.*;
+import com.facebook.thrift.TException;
+import com.facebook.thrift.transport.*;
+import com.facebook.thrift.*;
+import com.facebook.thrift.protocol.*;
+
+public class TestTCTLSeparatedProtocol extends TestCase {
+
+  public TestTCTLSeparatedProtocol() throws Exception {
+  }
+
+  public void testReads() throws Exception {
+    try {
+    TMemoryBuffer trans = new TMemoryBuffer(1024);
+    String foo = "Hello";
+    String bar = "World!";
+
+    String key = "22";
+    String  value = "TheValue";
+    String key2 = "24";
+    String  value2 = "TheValueAgain";
+
+    byte columnSeparator [] = { 1 };
+    byte elementSeparator [] = { 2 };
+    byte kvSeparator [] = { 3 };
+
+
+    trans.write(foo.getBytes(), 0, foo.getBytes().length);
+    trans.write(columnSeparator, 0, 1);
+
+    trans.write(columnSeparator, 0, 1);
+
+    trans.write(bar.getBytes(), 0, bar.getBytes().length);
+    trans.write(columnSeparator, 0, 1);
+
+    trans.write(key.getBytes(), 0, key.getBytes().length); 
+    trans.write(kvSeparator, 0, 1);
+    trans.write(value.getBytes(), 0, value.getBytes().length);
+    trans.write(elementSeparator, 0, 1);
+
+    trans.write(key2.getBytes(), 0, key2.getBytes().length); 
+    trans.write(kvSeparator, 0, 1);
+    trans.write(value2.getBytes(), 0, value2.getBytes().length);
+    
+
+    trans.flush();
+
+
+    // use 3 as the row buffer size to force lots of re-buffering.
+    TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 3);
+
+    prot.readStructBegin();
+
+    prot.readFieldBegin();
+    String hello = prot.readString();
+    prot.readFieldEnd();
+
+    assertTrue(hello.equals(foo));
+
+    prot.readFieldBegin();
+    assertTrue(prot.readString().equals(""));
+    prot.readFieldEnd();
+
+    prot.readFieldBegin();
+    assertTrue(prot.readString().equals(bar));
+    prot.readFieldEnd();
+
+    prot.readFieldBegin();
+    TMap mapHeader = prot.readMapBegin();
+    assertTrue(mapHeader.size == 2);
+
+    assertTrue(prot.readI32() == 22);
+    assertTrue(prot.readString().equals(value));
+    assertTrue(prot.readI32() == 24);
+    assertTrue(prot.readString().equals(value2));
+    prot.readMapEnd();
+    prot.readFieldEnd();
+
+    prot.readFieldBegin();
+    hello = prot.readString();
+    prot.readFieldEnd();
+    assertTrue(hello.length() == 0);
+
+    prot.readStructEnd();
+
+    } catch(Exception e) {
+      e.printStackTrace();
+    }
+  }
+
+
+  public void testWrites() throws Exception {
+    try {
+    TMemoryBuffer trans = new TMemoryBuffer(1024);
+    TCTLSeparatedProtocol prot = new TCTLSeparatedProtocol(trans, 3);
+
+    prot.writeStructBegin(new TStruct());
+    prot.writeFieldBegin(new TField());
+    prot.writeI32(100);
+    prot.writeFieldEnd();
+
+    prot.writeFieldBegin(new TField());
+    prot.writeListBegin(new TList());
+    prot.writeDouble(348.55);
+    prot.writeDouble(234.22);
+    prot.writeListEnd();
+    prot.writeFieldEnd();
+
+    prot.writeFieldBegin(new TField());
+    prot.writeString("hello world!");
+    prot.writeFieldEnd();
+
+    prot.writeFieldBegin(new TField());
+    prot.writeMapBegin(new TMap());
+    prot.writeString("key1");
+    prot.writeString("val1");
+    prot.writeString("key2");
+    prot.writeString("val2");
+    prot.writeString("key3");
+    prot.writeString("val3");
+    prot.writeMapEnd();
+    prot.writeFieldEnd();
+
+    prot.writeFieldBegin(new TField());
+    prot.writeListBegin(new TList());
+    prot.writeString("elem1");
+    prot.writeString("elem2");
+    prot.writeListEnd();
+    prot.writeFieldEnd();
+
+    
+    prot.writeFieldBegin(new TField());
+    prot.writeString("bye!");
+    prot.writeFieldEnd();
+
+    prot.writeStructEnd();
+    trans.flush();
+    byte b[] = new byte[3*1024];
+    int len = trans.read(b,0,b.length);
+    String test = new String(b, 0, len);
+
+    String testRef = "100348.55234.22hello world!key1val1key2val2key3val3elem1elem2bye!";
+    //    System.err.println("test=" + test + ">");
+    //    System.err.println(" ref=" + testRef + ">");
+    assertTrue(test.equals(testRef));
+
+    trans = new TMemoryBuffer(1023);
+    trans.write(b, 0, len);
+
+    //
+    // read back!
+    //
+
+    prot = new TCTLSeparatedProtocol(trans, 10);
+
+    // 100 is the start
+    prot.readStructBegin();
+    prot.readFieldBegin();
+    assertTrue(prot.readI32() == 100);
+    prot.readFieldEnd();
+
+    // let's see if doubles work ok
+    prot.readFieldBegin();
+    TList l = prot.readListBegin();
+    assertTrue(l.size == 2);
+    assertTrue(prot.readDouble() == 348.55);
+    assertTrue(prot.readDouble() == 234.22);
+    prot.readListEnd();
+    prot.readFieldEnd();
+
+    // nice message
+    prot.readFieldBegin();
+    assertTrue(prot.readString().equals("hello world!"));
+    prot.readFieldEnd();
+
+    // 3 element map
+    prot.readFieldBegin();
+    TMap m = prot.readMapBegin();
+    assertTrue(m.size == 3);
+    assertTrue(prot.readString().equals("key1"));
+    assertTrue(prot.readString().equals("val1"));
+    assertTrue(prot.readString().equals("key2"));
+    assertTrue(prot.readString().equals("val2"));
+    assertTrue(prot.readString().equals("key3"));
+    assertTrue(prot.readString().equals("val3"));
+    prot.readMapEnd();
+    prot.readFieldEnd();
+
+    // the 2 element list
+    prot.readFieldBegin();
+    l = prot.readListBegin();
+    assertTrue(l.size == 2);
+    assertTrue(prot.readString().equals("elem1"));
+    assertTrue(prot.readString().equals("elem2"));
+    prot.readListEnd();
+    prot.readFieldEnd();
+
+    // final string
+    prot.readFieldBegin();
+    assertTrue(prot.readString().equals("bye!"));
+    prot.readFieldEnd();
+
+    // shouldl return nulls at end
+    prot.readFieldBegin();
+    assertTrue(prot.readString().equals(""));
+    prot.readFieldEnd();
+
+    // shouldl return nulls at end
+    prot.readFieldBegin();
+    assertTrue(prot.readString().equals(""));
+    prot.readFieldEnd();
+
+    prot.readStructEnd();
+
+
+    } catch(Exception e) {
+      e.printStackTrace();
+    }
+  }
+
+}

Added: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java?rev=706704&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java (added)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/dynamic_type/TestDynamicSerDe.java Tue Oct 21 11:11:05 2008
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.dynamic_type;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol;
+import org.apache.hadoop.hive.serde.Constants;
+
+import junit.framework.TestCase;
+import org.apache.hadoop.io.BytesWritable;
+
+public class TestDynamicSerDe extends TestCase {
+
+  public void testDynamicSerDe() throws Throwable {
+    try {
+
+      // Try to construct an object
+      ArrayList<String> bye = new ArrayList<String>();
+      bye.add("firstString");
+      bye.add("secondString");
+      HashMap<String, Integer> another = new HashMap<String, Integer>();  
+      another.put("firstKey", 1);
+      another.put("secondKey", 2);
+      ArrayList<Object> struct = new ArrayList<Object>();
+      struct.add(Integer.valueOf(234));
+      struct.add(bye);
+      struct.add(another);
+      
+      // All protocols
+      ArrayList<String> protocols = new ArrayList<String>();
+      ArrayList<Boolean> isBinaries = new ArrayList<Boolean>();
+      
+      protocols.add(com.facebook.thrift.protocol.TBinaryProtocol.class.getName());
+      isBinaries.add(true);
+      
+      protocols.add(com.facebook.thrift.protocol.TJSONProtocol.class.getName());
+      isBinaries.add(false);
+
+      // TSimpleJSONProtocol does not support deserialization.
+      // protocols.add(com.facebook.thrift.protocol.TSimpleJSONProtocol.class.getName());
+      // isBinaries.add(false);
+      
+      // TCTLSeparatedProtocol is not done yet.
+      protocols.add(org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
+      isBinaries.add(false);
+      
+      System.out.println("input struct = " + struct);
+        
+      for(int pp = 0; pp<protocols.size(); pp++) {
+        
+        String protocol = protocols.get(pp);
+        boolean isBinary = isBinaries.get(pp);
+        
+        System.out.println("Testing protocol: " + protocol);
+        Properties schema = new Properties();
+        schema.setProperty(Constants.SERIALIZATION_FORMAT, protocol);
+        schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+        schema.setProperty(Constants.SERIALIZATION_DDL,
+            "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+        schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+  
+        DynamicSerDe serde = new DynamicSerDe();
+        serde.initialize(new Configuration(), schema);
+        
+        // Try getObjectInspector
+        ObjectInspector oi = serde.getObjectInspector();
+        System.out.println("TypeName = " + oi.getTypeName());
+
+        
+        // Try to serialize
+        BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+        
+        StringBuilder sb = new StringBuilder();
+        for (int i=0; i<bytes.getSize(); i++) {
+          byte b = bytes.get()[i];
+          int v = (b<0 ? 256 + b : b);
+          sb.append(String.format("x%02x", v));
+        }
+        System.out.println("bytes =" + sb);
+        
+        if (!isBinary) {
+          System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()));
+        }
+        
+        // Try to deserialize
+        Object o = serde.deserialize(bytes);
+        System.out.println("o class = " + o.getClass());
+        List<?> olist = (List<?>)o;
+        System.out.println("o size = " + olist.size());
+        System.out.println("o[0] class = " + olist.get(0).getClass());
+        System.out.println("o[1] class = " + olist.get(1).getClass());
+        System.out.println("o[2] class = " + olist.get(2).getClass());
+        System.out.println("o = " + o);
+        
+        assertEquals(o, struct);
+      }
+      
+    } catch (Throwable e) {
+      e.printStackTrace();
+      throw e;
+    }
+    
+  }  
+
+
+
+
+  public void testConfigurableTCTLSeparated() throws Throwable {
+    try {
+
+
+      // Try to construct an object
+      ArrayList<String> bye = new ArrayList<String>();
+      bye.add("firstString");
+      bye.add("secondString");
+      HashMap<String, Integer> another = new HashMap<String, Integer>();  
+      another.put("firstKey", 1);
+      another.put("secondKey", 2);
+      ArrayList<Object> struct = new ArrayList<Object>();
+      struct.add(Integer.valueOf(234));
+      struct.add(bye);
+      struct.add(another);
+
+      Properties schema = new Properties();
+      schema.setProperty(Constants.SERIALIZATION_FORMAT, org.apache.hadoop.hive.serde2.thrift.TCTLSeparatedProtocol.class.getName());
+      schema.setProperty(org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_NAME, "test");
+      schema.setProperty(Constants.SERIALIZATION_DDL,
+                         "struct test { i32 hello, list<string> bye, map<string,i32> another}");
+      schema.setProperty(Constants.SERIALIZATION_LIB, new DynamicSerDe().getClass().toString());
+
+      schema.setProperty(Constants.FIELD_DELIM, "9");
+      schema.setProperty(Constants.COLLECTION_DELIM, "1");
+      schema.setProperty(Constants.LINE_DELIM, "2");
+      schema.setProperty(Constants.MAPKEY_DELIM, "4");
+
+      DynamicSerDe serde = new DynamicSerDe();
+      serde.initialize(new Configuration(), schema);
+
+      TCTLSeparatedProtocol prot = (TCTLSeparatedProtocol)serde.oprot_;
+      assertTrue(prot.getPrimarySeparator() == 9);
+      
+      ObjectInspector oi = serde.getObjectInspector();
+
+      // Try to serialize
+      BytesWritable bytes = (BytesWritable) serde.serialize(struct, oi);
+        
+      StringBuilder sb = new StringBuilder();
+      for (int i=0; i<bytes.getSize(); i++) {
+        byte b = bytes.get()[i];
+        int v = (b<0 ? 256 + b : b);
+        sb.append(String.format("x%02x", v));
+      }
+      System.out.println("bytes =" + sb);
+
+      String compare = "234" + "\u0009" + "firstString" + "\u0001" + "secondString" + "\u0009" + "firstKey" + "\u0004" + "1" + "\u0001" + "secondKey" + "\u0004" + "2";
+
+      System.out.println("bytes in text =" + new String(bytes.get(), 0, bytes.getSize()) + ">");
+      System.out.println("compare to    =" + compare + ">");
+        
+      assertTrue(compare.equals( new String(bytes.get(), 0, bytes.getSize())));
+      
+      // Try to deserialize
+      Object o = serde.deserialize(bytes);
+      System.out.println("o class = " + o.getClass());
+      List<?> olist = (List<?>)o;
+      System.out.println("o size = " + olist.size());
+      System.out.println("o[0] class = " + olist.get(0).getClass());
+      System.out.println("o[1] class = " + olist.get(1).getClass());
+      System.out.println("o[2] class = " + olist.get(2).getClass());
+      System.out.println("o = " + o);
+        
+      assertEquals(o, struct);
+      
+    } catch (Throwable e) {
+      e.printStackTrace();
+      throw e;
+    }
+    
+  }  
+}

Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java?rev=706704&r1=706703&r2=706704&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java Tue Oct 21 11:11:05 2008
@@ -22,8 +22,8 @@
 import java.util.List;
 
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.thrift_test.Complex;
-import org.apache.hadoop.hive.serde2.thrift_test.IntString;
+import org.apache.hadoop.hive.serde2.thrift.test.Complex;
+import org.apache.hadoop.hive.serde2.thrift.test.IntString;
 
 import junit.framework.TestCase;
 
@@ -50,14 +50,14 @@
       // real object
       Complex cc = new Complex();
       cc.aint = 1;
-      cc.astring = "test";
+      cc.aString = "test";
       List<Integer> c2 = Arrays.asList(new Integer[]{1,2,3}); 
       cc.lint =  c2;
       List<String> c3 = Arrays.asList(new String[]{"one", "two"});
-      cc.lstring =  c3;
+      cc.lString =  c3;
       List<IntString> c4 = new ArrayList<IntString>(); 
-      cc.lintstring = c4;
-      cc.mstringstring = null; 
+      cc.lintString = c4;
+      cc.mStringString = null; 
       // standard object
       Object c = ObjectInspectorUtils.getStandardObject(cc, oi1);
       

Modified: hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java?rev=706704&r1=706703&r2=706704&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java (original)
+++ hadoop/core/trunk/src/contrib/hive/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java Tue Oct 21 11:11:05 2008
@@ -22,8 +22,8 @@
 import java.util.List;
 
 import junit.framework.TestCase;
-import org.apache.hadoop.hive.serde2.thrift_test.Complex;
-import org.apache.hadoop.hive.serde2.thrift_test.IntString;
+import org.apache.hadoop.hive.serde2.thrift.test.Complex;
+import org.apache.hadoop.hive.serde2.thrift.test.IntString;
 
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 
@@ -53,14 +53,14 @@
       // real object
       Complex c = new Complex();
       c.aint = 1;
-      c.astring = "test";
+      c.aString = "test";
       List<Integer> c2 = Arrays.asList(new Integer[]{1,2,3}); 
       c.lint =  c2;
       List<String> c3 = Arrays.asList(new String[]{"one", "two"});
-      c.lstring =  c3;
+      c.lString =  c3;
       List<IntString> c4 = new ArrayList<IntString>(); 
-      c.lintstring = c4;
-      c.mstringstring = null; 
+      c.lintString = c4;
+      c.mStringString = null; 
       
       assertEquals(1, soi.getStructFieldData(c, fields.get(0)));
       assertEquals("test", soi.getStructFieldData(c, fields.get(1)));

Modified: hadoop/core/trunk/src/contrib/hive/testutils/run_tests
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/testutils/run_tests?rev=706704&r1=706703&r2=706704&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/testutils/run_tests (original)
+++ hadoop/core/trunk/src/contrib/hive/testutils/run_tests Tue Oct 21 11:11:05 2008
@@ -2,9 +2,11 @@
 d=`mktemp -d /tmp/hivetest_XXXX`
 for i in `find . -name Test\*\.* | grep -v svn | egrep "java$|vm$" | sed 's/.*\/Test/Test/g' | sed 's/\.java//g' | sed 's/\.vm//g' | sort`;
 do 
-  cmd="ant -lib ../../../lib -Dtestcase=$i clean-test test -logfile $d/$i.log"
-  echo $cmd;
-  $cmd;
+  if [ "$i" != "TestSerDe" ]; then
+    cmd="ant -lib ../../../lib -Dtestcase=$i clean-test test -logfile $d/$i.log"
+    echo $cmd;
+    $cmd;
+  fi
 done
 cat $d/*.log | grep junit | egrep "Running org|Tests run"
 echo Logs at $d



Mime
View raw message