hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zs...@apache.org
Subject svn commit: r697291 [12/31] - in /hadoop/core/trunk: ./ src/contrib/hive/cli/src/java/org/apache/hadoop/hive/cli/ src/contrib/hive/metastore/if/ src/contrib/hive/metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/ src/contrib/hive/metastor...
Date Fri, 19 Sep 2008 23:56:35 GMT
Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java Fri Sep 19 16:56:30 2008
@@ -19,12 +19,22 @@
 package org.apache.hadoop.hive.ql.exec;
 
 import java.io.*;
+import java.util.ArrayList;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.exprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.reduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.tableDesc;
 import org.apache.hadoop.hive.ql.io.*;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.Serializer;
+import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.io.Text;
 
 /**
  * Reduce Sink Operator sends output to the reduce stage
@@ -34,10 +44,14 @@
   private static final long serialVersionUID = 1L;
   transient protected ExprNodeEvaluator[] keyEval;
   transient protected ExprNodeEvaluator[] valueEval;
-  transient WritableComparableHiveObject wcho;
-  transient WritableHiveObject who;
-  transient boolean keyIsSingleton, valueIsSingleton;
-
+  
+  // TODO: we use MetadataTypedColumnsetSerDe for now, till DynamicSerDe is ready
+  transient Serializer keySerializer;
+  transient Serializer valueSerializer;
+  transient int tag;
+  transient byte[] tagByte = new byte[1];
+  transient int numPartitionFields; 
+  
   public void initialize(Configuration hconf) throws HiveException {
     super.initialize(hconf);
     try {
@@ -46,61 +60,99 @@
       for(exprNodeDesc e: conf.getKeyCols()) {
         keyEval[i++] = ExprNodeEvaluatorFactory.get(e);
       }
-      keyIsSingleton = false; //(i == 1);
 
       valueEval = new ExprNodeEvaluator[conf.getValueCols().size()];
       i=0;
       for(exprNodeDesc e: conf.getValueCols()) {
         valueEval[i++] = ExprNodeEvaluatorFactory.get(e);
       }
-      valueIsSingleton = false; //(i == 1);
-
-      // TODO: Use NaiiveSerializer for now 
-      // Once we make sure CompositeHiveObject.getJavaObject() returns a Java List,
-      // we will use MetadataTypedSerDe to serialize the data, instead of using
-      // NaiiveSerializer.
-      int tag = conf.getTag();
-      if(tag == -1) {
-        who = new NoTagWritableHiveObject(null, new NaiiveSerializer());
-        wcho = new NoTagWritableComparableHiveObject(null, new NaiiveSerializer());
-        l4j.info("Using tag = -1");
-      } else {
-        l4j.info("Using tag = " + tag);
-        who = new WritableHiveObject(tag, null, new NaiiveSerializer());
-        wcho = new WritableComparableHiveObject(tag, null, new NaiiveSerializer());
-      }
 
+      tag = conf.getTag();
+      tagByte[0] = (byte)tag;
+      LOG.info("Using tag = " + tag);
+
+      tableDesc keyTableDesc = conf.getKeySerializeInfo();
+      keySerializer = (Serializer)keyTableDesc.getDeserializerClass().newInstance();
+      keySerializer.initialize(null, keyTableDesc.getProperties());
+      
+      tableDesc valueTableDesc = conf.getValueSerializeInfo();
+      valueSerializer = (Serializer)valueTableDesc.getDeserializerClass().newInstance();
+      valueSerializer.initialize(null, valueTableDesc.getProperties());
+      
       // Set the number of key fields to be used in the partitioner.
-      WritableComparableHiveObject.setNumPartitionFields(conf.getNumPartitionFields());
+      numPartitionFields = conf.getNumPartitionFields();
     } catch (Exception e) {
       e.printStackTrace();
       throw new RuntimeException(e);
     }
   }
 
-  public void process(HiveObject r) throws HiveException {
-    if(keyIsSingleton) {
-      wcho.setHo(keyEval[0].evaluate(r));
-    } else {
-      CompositeHiveObject nr = new CompositeHiveObject (keyEval.length);
+  transient InspectableObject tempInspectableObject = new InspectableObject();
+  transient HiveKey keyWritable = new HiveKey();
+  transient Text valueText;
+  
+  transient ObjectInspector keyObjectInspector;
+  transient ObjectInspector valueObjectInspector;
+  transient ArrayList<ObjectInspector> keyFieldsObjectInspectors = new ArrayList<ObjectInspector>();
+  transient ArrayList<ObjectInspector> valueFieldsObjectInspectors = new ArrayList<ObjectInspector>();
+  
+  public void process(Object row, ObjectInspector rowInspector) throws HiveException {
+    // TODO: use DynamicSerDe when that is ready
+    try {
+      // Generate hashCode for the tuple
+      int keyHashCode = 0;
+      if (numPartitionFields == -1) {
+        keyHashCode = (int)(Math.random() * Integer.MAX_VALUE);
+      }
+      ArrayList<Object> keys = new ArrayList<Object>(keyEval.length);
       for(ExprNodeEvaluator e: keyEval) {
-        nr.addHiveObject(e.evaluate(r));
+        e.evaluate(row, rowInspector, tempInspectableObject);
+        keys.add(tempInspectableObject.o);
+        if (numPartitionFields == keys.size()) {
+          keyHashCode = keys.hashCode();
+        }
+        if (keyObjectInspector == null) {
+          keyFieldsObjectInspectors.add(tempInspectableObject.oi);
+        }
       }
-      wcho.setHo(nr);
-    }
-
-    if(valueIsSingleton) {
-      who.setHo(valueEval[0].evaluate(r));
-    } else {
-      CompositeHiveObject nr = new CompositeHiveObject (valueEval.length);
+      if (numPartitionFields > keys.size()) {
+        keyHashCode = keys.hashCode();
+      }
+      if (keyObjectInspector == null) {
+        keyObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
+            ObjectInspectorUtils.getIntegerArray(keyFieldsObjectInspectors.size()),
+            keyFieldsObjectInspectors);
+      }
+      Text key = (Text)keySerializer.serialize(keys, keyObjectInspector);
+      if (tag == -1) {
+        keyWritable.set(key.getBytes(), 0, key.getLength());
+      } else {
+        int keyLength = key.getLength();
+        keyWritable.setSize(keyLength+1);
+        System.arraycopy(key.getBytes(), 0, keyWritable.get(), 0, keyLength);
+        keyWritable.get()[keyLength] = tagByte[0];
+      }
+      keyWritable.setHashCode(keyHashCode);
+      
+      ArrayList<String> values = new ArrayList<String>(valueEval.length);
       for(ExprNodeEvaluator e: valueEval) {
-        nr.addHiveObject(e.evaluate(r));
+        e.evaluate(row, rowInspector, tempInspectableObject);
+        values.add(tempInspectableObject.o == null ? null : tempInspectableObject.o.toString());
+        if (valueObjectInspector == null) {
+          valueFieldsObjectInspectors.add(tempInspectableObject.oi);
+        }
+      }
+      if (valueObjectInspector == null) {
+        valueObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
+            ObjectInspectorUtils.getIntegerArray(valueFieldsObjectInspectors.size()),
+            valueFieldsObjectInspectors);
       }
-      who.setHo(nr);
+      valueText = (Text)valueSerializer.serialize(values, valueObjectInspector);
+    } catch (SerDeException e) {
+      throw new HiveException(e);
     }
-
     try {
-      out.collect(wcho, who);
+      out.collect(keyWritable, valueText);
     } catch (IOException e) {
       throw new HiveException (e);
     }

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java Fri Sep 19 16:56:30 2008
@@ -25,10 +25,12 @@
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.ql.plan.scriptDesc;
-import org.apache.hadoop.hive.ql.plan.tableDesc;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.serde.*;
-import org.apache.hadoop.hive.ql.io.*;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.Serializer;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.mapred.LineRecordReader.LineReader;
 import org.apache.hadoop.util.StringUtils;
@@ -50,8 +52,10 @@
   transient Thread errThread;
   transient Process scriptPid;
   transient Configuration hconf;
-  transient SerDe decoder;
-  transient HiveObjectSerializer hos;
+  // Input to the script
+  transient Serializer scriptInputSerializer;
+  // Output from the script
+  transient Deserializer scriptOutputDeserializer;
   transient volatile Throwable scriptError = null;
 
   /**
@@ -93,24 +97,18 @@
     try {
       this.hconf = hconf;
 
-      tableDesc td = conf.getScriptOutputInfo();
-      if(td == null) {
-        td = Utilities.defaultTabTd;
-      }
-      decoder = td.getSerdeClass().newInstance();
-      decoder.initialize(hconf, td.getProperties());
+      scriptOutputDeserializer = conf.getScriptOutputInfo().getDeserializerClass().newInstance();
+      scriptOutputDeserializer.initialize(hconf, conf.getScriptOutputInfo().getProperties());
 
-      hos = new NaiiveJSONSerializer();
-      Properties p = new Properties ();
-      p.setProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, ""+Utilities.tabCode);
-      hos.initialize(p);
+      scriptInputSerializer = (Serializer)conf.getScriptInputInfo().getDeserializerClass().newInstance();
+      scriptInputSerializer.initialize(hconf, conf.getScriptInputInfo().getProperties());
 
       String [] cmdArgs = splitArgs(conf.getScriptCmd());
       String [] wrappedCmdArgs = addWrapper(cmdArgs);
-      l4j.info("Executing " + Arrays.asList(wrappedCmdArgs));
-      l4j.info("tablename=" + hconf.get(HiveConf.ConfVars.HIVETABLENAME.varname));
-      l4j.info("partname=" + hconf.get(HiveConf.ConfVars.HIVEPARTITIONNAME.varname));
-      l4j.info("alias=" + alias);
+      LOG.info("Executing " + Arrays.asList(wrappedCmdArgs));
+      LOG.info("tablename=" + hconf.get(HiveConf.ConfVars.HIVETABLENAME.varname));
+      LOG.info("partname=" + hconf.get(HiveConf.ConfVars.HIVEPARTITIONNAME.varname));
+      LOG.info("alias=" + alias);
 
       ProcessBuilder pb = new ProcessBuilder(wrappedCmdArgs);
       Map<String, String> env = pb.environment();
@@ -121,7 +119,8 @@
       scriptOut = new DataOutputStream(new BufferedOutputStream(scriptPid.getOutputStream()));
       scriptIn = new DataInputStream(new BufferedInputStream(scriptPid.getInputStream()));
       scriptErr = new DataInputStream(new BufferedInputStream(scriptPid.getErrorStream()));
-      outThread = new StreamThread(scriptIn, new OutputStreamProcessor (), "OutputProcessor");
+      outThread = new StreamThread(scriptIn, new OutputStreamProcessor(
+          scriptOutputDeserializer.getObjectInspector()), "OutputProcessor");
       outThread.start();
       errThread = new StreamThread(scriptErr, new ErrorStreamProcessor (), "ErrorProcessor");
       errThread.start();
@@ -132,14 +131,22 @@
     }
   }
 
-  public void process(HiveObject r) throws HiveException {
+  Text text = new Text();
+  public void process(Object row, ObjectInspector rowInspector) throws HiveException {
     if(scriptError != null) {
       throw new HiveException(scriptError);
     }
     try {
-      hos.serialize(r, scriptOut);
+      text = (Text) scriptInputSerializer.serialize(row, rowInspector);
+      scriptOut.write(text.getBytes(), 0, text.getLength());
+      scriptOut.write(Utilities.newLineCode);
+    } catch (SerDeException e) {
+      LOG.error("Error in serializing the row: " + e.getMessage());
+      scriptError = e;
+      serialize_error_count.set(serialize_error_count.get() + 1);
+      throw new HiveException(e);
     } catch (IOException e) {
-      l4j.error("Error in writing to script: " + e.getMessage());
+      LOG.error("Error in writing to script: " + e.getMessage());
       scriptError = e;
       throw new HiveException(e);
     }
@@ -155,7 +162,7 @@
         scriptOut.close();
         int exitVal = scriptPid.waitFor();
         if (exitVal != 0) {
-          l4j.error("Script failed with code " + exitVal);
+          LOG.error("Script failed with code " + exitVal);
           new_abort = true;
         };
       } catch (IOException e) {
@@ -163,11 +170,6 @@
       } catch (InterruptedException e) { }
     }
 
-    // the underlying hive object serializer keeps track of serialization errors
-    if(hos != null) {
-      serialize_error_count.set(hos.getWriteErrorCount());
-    }
-
     try {
       // try these best effort
       outThread.join(0);
@@ -190,17 +192,19 @@
 
 
   class OutputStreamProcessor implements StreamProcessor {
-    public OutputStreamProcessor () {}
+    Object row;
+    ObjectInspector rowInspector;
+    public OutputStreamProcessor(ObjectInspector rowInspector) {
+      this.rowInspector = rowInspector;
+    }
     public void processLine(Text line) throws HiveException {
-      HiveObject ho;
       try {
-        Object ev = decoder.deserialize(line);
-        ho = new TableHiveObject(ev, decoder);
+        row = scriptOutputDeserializer.deserialize(line);
       } catch (SerDeException e) {
         deserialize_error_count.set(deserialize_error_count.get()+1);
         return;
       }
-      forward(ho);
+      forward(row, rowInspector);
     }
     public void close() {
     }
@@ -243,11 +247,11 @@
           }
           proc.processLine(row);
         }
-        l4j.info("StreamThread "+name+" done");
+        LOG.info("StreamThread "+name+" done");
 
       } catch (Throwable th) {
         scriptError = th;
-        l4j.warn(StringUtils.stringifyException(th));
+        LOG.warn(StringUtils.stringifyException(th));
       } finally {
         try {
           if(lineReader != null) {
@@ -256,8 +260,8 @@
           in.close();
           proc.close();
         } catch (Exception e) {
-          l4j.warn(name + ": error in closing ..");
-          l4j.warn(StringUtils.stringifyException(e));
+          LOG.warn(name + ": error in closing ..");
+          LOG.warn(StringUtils.stringifyException(e));
         }
       }
     }

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java Fri Sep 19 16:56:30 2008
@@ -19,10 +19,14 @@
 package org.apache.hadoop.hive.ql.exec;
 
 import java.io.*;
+import java.util.ArrayList;
 
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.exprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.selectDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.conf.Configuration;
 
 /**
@@ -33,26 +37,54 @@
   private static final long serialVersionUID = 1L;
   transient protected ExprNodeEvaluator[] eval;
 
+  transient ArrayList<Object> output;
+  transient ArrayList<ObjectInspector> outputFieldObjectInspectors;
+  transient ObjectInspector outputObjectInspector;
+  transient InspectableObject tempInspectableObject;
+  
+  boolean firstRow;
+  
   public void initialize(Configuration hconf) throws HiveException {
     super.initialize(hconf);
     try {
-      eval = new ExprNodeEvaluator[conf.getColList().size()];
-      int i=0;
-      for(exprNodeDesc e: conf.getColList()) {
-        eval[i++] = ExprNodeEvaluatorFactory.get(e);
+      ArrayList<exprNodeDesc> colList = conf.getColList();
+      eval = new ExprNodeEvaluator[colList.size()];
+      for(int i=0; i<colList.size(); i++) {
+        assert(colList.get(i) != null);
+        eval[i] = ExprNodeEvaluatorFactory.get(colList.get(i));
       }
+      output = new ArrayList<Object>(eval.length);
+      outputFieldObjectInspectors = new ArrayList<ObjectInspector>(eval.length);
+      for(int j=0; j<eval.length; j++) {
+        output.add(null);
+        outputFieldObjectInspectors.add(null);
+      }
+      tempInspectableObject = new InspectableObject();      
+      firstRow = true;
     } catch (Exception e) {
       e.printStackTrace();
       throw new RuntimeException(e);
     }
   }
 
-  public void process(HiveObject r) throws HiveException {
-    CompositeHiveObject nr = new CompositeHiveObject (eval.length);
-    for(ExprNodeEvaluator e: eval) {
-      HiveObject ho = e.evaluate(r);
-      nr.addHiveObject(ho);
+  public void process(Object row, ObjectInspector rowInspector)
+      throws HiveException {
+    for(int i=0; i<eval.length; i++) {
+      eval[i].evaluate(row, rowInspector, tempInspectableObject);
+      output.set(i, tempInspectableObject.o);
+      if (firstRow) {
+        outputFieldObjectInspectors.set(i, tempInspectableObject.oi);
+      }
+    }
+    if (firstRow) {
+      firstRow = false;
+      ArrayList<String> fieldNames = new ArrayList<String>(eval.length);
+      for(int i=0; i<eval.length; i++) {
+        fieldNames.add(Integer.valueOf(i).toString());
+      }
+      outputObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
+        fieldNames, outputFieldObjectInspectors);
     }
-    forward(nr);
+    forward(output, outputObjectInspector);
   }
 }

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/TableHiveObject.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/TableHiveObject.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/TableHiveObject.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/TableHiveObject.java Fri Sep 19 16:56:30 2008
@@ -1,167 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec;
-
-import java.util.*;
-
-import org.apache.hadoop.hive.serde.*;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-
-/**
- * A TableHiveObject is Hive encapsulation of Objects returned from a table
- * It allows navigation using the SerDe associated with the Table. They also
- * encapsulate the position relative to the row object that the current object
- * represents.
- */
-public class TableHiveObject extends HiveObject {
-
-  // References to the table and initial table Object
-  protected SerDe tableSerDe;
-  protected Object rowObject;
-  protected ArrayList<String> partCols;
-  protected ArrayList<SerDeField> partFields;
-
-  // References to the field of the row that the HiveObject refers to
-  protected SerDeField myField;
-
-  public TableHiveObject(Object javaObject, SerDe tableSerDe) {
-      if(javaObject == null) {
-          throw new RuntimeException("javaObject may not be null in TableHiveObject constructor");
-      }
-
-
-    this.javaObject = javaObject;
-    this.tableSerDe = tableSerDe;
-    this.rowObject = javaObject;
-    this.partCols = null;
-    this.partFields = null;
-  }
-
-
-  public TableHiveObject(Object javaObject, SerDe tableSerDe,
-                         ArrayList<String> partCols,
-                         ArrayList<SerDeField> partFields) {
-      if(javaObject == null) {
-          throw new RuntimeException("javaObject may not be null in TableHiveObject constructor");
-      }
-
-
-    this.javaObject = javaObject;
-    this.tableSerDe = tableSerDe;
-    this.rowObject = javaObject;
-    this.partCols = partCols;
-    this.partFields = partFields;
-  }
-
-  protected TableHiveObject(Object javaObject, SerDe tableSerDe,
-                            SerDeField myField, Object rowObject,
-                            ArrayList<String> partCols,
-                            ArrayList<SerDeField> partFields) {
-    if(javaObject == null) {
-      throw new RuntimeException("javaObject may not be null in TableHiveObject constructor");
-    }
-    this.javaObject = javaObject;
-    this.tableSerDe = tableSerDe;
-    this.myField = myField;
-    this.rowObject = rowObject;
-    this.partCols = partCols;
-    this.partFields = partFields;
-  }
-
-  public SerDeField getFieldFromExpression(String expr) throws HiveException {
-    try {
-      if(expr == null || expr.equals(""))
-        throw new RuntimeException("Need non empty expression");
-
-      // Check if this is a partition column
-      if (partCols != null) {
-        int pos = partCols.indexOf(expr);
-        if (pos != -1) {
-          return partFields.get(pos);
-        }
-      }
-
-      String realExpr;
-      if(myField != null) {
-        if (expr.charAt(0) == '[') {
-          realExpr = myField.getName() + expr;
-        } else {
-          realExpr = myField.getName() + "." + expr;
-        }
-      } else {
-        realExpr = expr;
-      }
-
-      if(!ExpressionUtils.isComplexExpression(realExpr)) {
-        return tableSerDe.getFieldFromExpression(null, realExpr);
-      } else {
-        return new ComplexSerDeField(null, realExpr, tableSerDe);
-      }
-    } catch (SerDeException e) {
-      e.printStackTrace();
-      throw new HiveException (e);
-    }
-  }
-
-  public HiveObject get(SerDeField field) throws HiveException {
-    try {
-      Object o = field.get(rowObject);
-      if (o == null) 
-        return new NullHiveObject();
-
-      if(field.isPrimitive())
-        return new PrimitiveHiveObject(o);
-      else 
-        return new TableHiveObject(o, tableSerDe, field, rowObject, partCols, partFields);
-    } catch (SerDeException e) {
-      throw new HiveException(e);
-    }
-  }
-
-  public List<SerDeField> getFields() throws HiveException {
-    try {
-      return tableSerDe.getFields(myField);
-    } catch (SerDeException e) {
-      e.printStackTrace();
-      throw new HiveException (e);
-    }
-  }
-
-  public boolean isPrimitive() { return false;}
-
-  
-  public String toString() {
-    try {
-      return tableSerDe.toJSONString(myField.get(rowObject), myField);
-    } catch (SerDeException e) {
-      throw new RuntimeException(e); 
-    }
-  }
-  
-  @Override
-  public boolean equals(Object other) {
-    throw new RuntimeException("not supported");
-  }
-
-  @Override
-  public int hashCode() {
-    throw new RuntimeException("not supported");
-  }
-
-}

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/UDAF.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/UDAF.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/UDAF.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/UDAF.java Fri Sep 19 16:56:30 2008
@@ -21,7 +21,6 @@
 
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 //import org.apache.hadoop.hive.serde.ReflectionSerDe;
-import org.apache.hadoop.hive.serde.SerDeException;
 
 /**
  * Base class for all User-defined Aggregation Function (UDAF) classes.

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/UDF.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/UDF.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/UDF.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/UDF.java Fri Sep 19 16:56:30 2008
@@ -18,8 +18,6 @@
 
 package org.apache.hadoop.hive.ql.exec;
 
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-
 /**
  * A dummy User-defined function (UDF) for the use with Hive.
  * 

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Fri Sep 19 16:56:30 2008
@@ -32,7 +32,11 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.hive.serde.simple_meta.MetadataTypedColumnsetSerDe;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.hadoop.util.ReflectionUtils;
 
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.ql.plan.*;
@@ -40,8 +44,11 @@
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.mapred.TextInputFormat;
-import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
 
 @SuppressWarnings("nls")
 public class Utilities {
@@ -49,9 +56,10 @@
   /**
    * The object in the reducer are composed of these top level fields
    */
-  public static enum ReduceField { KEY, VALUE, ALIAS };
 
+  public static enum ReduceField { KEY, VALUE, ALIAS };
   private static volatile mapredWork gWork = null;
+  static final private Log LOG = LogFactory.getLog("hive.ql.exec.Utilities");
 
   public static void clearMapRedWork (Configuration job) {
     try {
@@ -113,7 +121,7 @@
       protected Expression instantiate(Object oldInstance, Encoder out) {
       return new Expression(Enum.class,
                             "valueOf",
-                            new Object[] { oldInstance.getClass(), ((Enum) oldInstance).name() });
+                            new Object[] { oldInstance.getClass(), ((Enum<?>) oldInstance).name() });
     }
     protected boolean mutatesTo(Object oldInstance, Object newInstance) {
       return oldInstance == newInstance;
@@ -190,21 +198,13 @@
   public static tableDesc defaultTd;
   static {
     // by default we expect ^A separated strings
-    Properties p = new Properties ();
-    p.setProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, "" + Utilities.ctrlaCode);
-    defaultTd = new tableDesc(MetadataTypedColumnsetSerDe.class,
-        TextInputFormat.class, 
-        IgnoreKeyTextOutputFormat.class, p);
+    defaultTd = PlanUtils.getDefaultTableDesc("" + Utilities.ctrlaCode);
   }
 
   public static tableDesc defaultTabTd;
   static {
-    // by default we expect ^A separated strings
-    Properties p = new Properties ();
-    p.setProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, "" + Utilities.tabCode);
-    defaultTabTd = new tableDesc(MetadataTypedColumnsetSerDe.class,
-        TextInputFormat.class, 
-        IgnoreKeyTextOutputFormat.class, p);
+    // Default tab-separated tableDesc
+    defaultTabTd = PlanUtils.getDefaultTableDesc("" + Utilities.tabCode);
   }
   
   public final static int newLineCode = 10;
@@ -289,7 +289,7 @@
   }
 
   public static tableDesc getTableDesc(Table tbl) {
-    return (new tableDesc (tbl.getSerDe().getClass(), tbl.getInputFormatClass(), tbl.getOutputFormatClass(), tbl.getSchema()));
+    return (new tableDesc (tbl.getDeserializer().getClass(), tbl.getInputFormatClass(), tbl.getOutputFormatClass(), tbl.getSchema()));
   }
 
 
@@ -297,12 +297,12 @@
     return (new partitionDesc (getTableDesc(part.getTable()), part.getSpec()));
   }
 
-  public static void addMapWork(mapredWork mr, Table tbl, String alias, Operator work) {
+  public static void addMapWork(mapredWork mr, Table tbl, String alias, Operator<?> work) {
     mr.addMapWork(tbl.getDataLocation().getPath(), alias, work, 
                   new partitionDesc(getTableDesc(tbl), null));
   }
 
-  private static String getOpTreeSkel_helper(Operator op, String indent) {
+  private static String getOpTreeSkel_helper(Operator<?> op, String indent) {
     if (op == null)
       return "";
   
@@ -312,13 +312,13 @@
     sb.append("\n");
     if (op.getChildOperators() != null)
       for(Object child: op.getChildOperators()) {
-        sb.append(getOpTreeSkel_helper((Operator)child, indent + "  "));
+        sb.append(getOpTreeSkel_helper((Operator<?>)child, indent + "  "));
       }
 
     return sb.toString();
   }
 
-  public static String getOpTreeSkel(Operator op) {
+  public static String getOpTreeSkel(Operator<?> op) {
     return getOpTreeSkel_helper(op, "");
   }
 
@@ -380,4 +380,59 @@
 
     return prefix+suffix;
   }
+
+  public final static String NSTR = "";
+  public static enum streamStatus {EOF, TERMINATED, NORMAL}
+  public static streamStatus readColumn(DataInput in, OutputStream out) throws IOException {
+
+    while (true) {
+      int b;
+      try {
+        b = (int)in.readByte();
+      } catch (EOFException e) {
+        return streamStatus.EOF;
+      }
+
+      if (b == Utilities.newLineCode) {
+        return streamStatus.TERMINATED;
+      }
+
+      if (b == Utilities.ctrlaCode) {
+        return streamStatus.NORMAL;
+      }
+
+      out.write(b);
+    }
+    // Unreachable
+  }
+  
+  public static OutputStream createCompressedStream(JobConf jc,
+                                                    OutputStream out) throws IOException {
+    boolean isCompressed = FileOutputFormat.getCompressOutput(jc);
+    if(isCompressed) {
+      Class<? extends CompressionCodec> codecClass =
+        FileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class);
+      CompressionCodec codec = (CompressionCodec)
+        ReflectionUtils.newInstance(codecClass, jc);
+      return codec.createOutputStream(out);
+    } else {
+      return (out);
+    }
+  }
+
+  public static SequenceFile.Writer createSequenceWriter(JobConf jc, FileSystem fs,
+                                                         Path file, Class<?> keyClass,
+                                                         Class<?> valClass) throws IOException {
+    CompressionCodec codec = null;
+    CompressionType compressionType = CompressionType.NONE;
+    if (SequenceFileOutputFormat.getCompressOutput(jc)) {
+      compressionType = SequenceFileOutputFormat.getOutputCompressionType(jc);
+      Class codecClass = SequenceFileOutputFormat.getOutputCompressorClass(jc, DefaultCodec.class);
+      codec = (CompressionCodec) 
+        ReflectionUtils.newInstance(codecClass, jc);
+    }
+    return (SequenceFile.createWriter(fs, jc, file,
+                                      keyClass, valClass, compressionType, codec));
+
+  }
 }

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/HiveObjectComparator.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/HiveObjectComparator.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/HiveObjectComparator.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/HiveObjectComparator.java Fri Sep 19 16:56:30 2008
@@ -1,96 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io;
-
-import java.io.*;
-import java.util.*;
-
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableComparator;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.hive.serde.*;
-import org.apache.hadoop.hive.ql.exec.HiveObject;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.mapredWork;
-
-/** 
- * Comparator for WritableComparableHiveObjects
- * 
- * We pass this in explicitly as a output key comparator so that we get a chance to
- * initialize the comparator using the job configuration/Hive plan. This allows us
- * to pick up the right deserializer (if we register with WritableComparator - we get
- * no such shot).
- *
- * This class provides a sort implementation only. The grouping implementation uses
- * the base compare() implementation - which just invokes compareTo() on the underlying
- * WritableComparableHiveObject.
- *
- */ 
-
-public class HiveObjectComparator extends WritableComparator implements Configurable {
-
-  // the serializer used to compare hive objects
-  protected HiveObjectSerializer hos;
-  protected boolean isTagged;
-
-  public Configuration getConf() {
-    throw new RuntimeException ("Unexpected invocation");
-  }
-
-  public void setConf(Configuration conf) {
-    mapredWork gWork = Utilities.getMapRedWork (conf);
-    isTagged = gWork.getNeedsTagging(); 
-    hos = new NaiiveSerializer();
-  }
-
-
-  public HiveObjectComparator(Class c) {
-    super(c);
-  }
-
-
-  public HiveObjectComparator () {
-    this(WritableComparableHiveObject.class);
-  }
-
-  /**
-   * This is the interface used to sort WritableComparableHiveObjects
-   * If the objects are not tagged - then it's simple - we just sort them (for now)
-   * based on the serialized object. If it is tagged - then we need to use the
-   * serialized object as the higher order bits to sort on (so that grouping is
-   * maintained) and then use the tag to break the tie (so that things are ordered
-   * by tag in the same co-group
-   */
-  public int compare(byte[] b1, int s1, int l1,
-                     byte[] b2, int s2, int l2) {
-    if(!isTagged) {
-      return (hos.compare(b1, s1, l1, b2, s2, l2));
-    } else {
-      int ret = hos.compare(b1, s1+1, l1-1, b2, s2+1, l2-1);
-      if(ret == 0) {
-        // use tag to break tie
-        ret = ((int)(b1[s1] & 0xff)) - ((int)(b2[s2] & 0xff));
-      }
-      return (ret);
-    }
-  }
-}

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/HiveObjectSerializer.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/HiveObjectSerializer.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/HiveObjectSerializer.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/HiveObjectSerializer.java Fri Sep 19 16:56:30 2008
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io;
-
-import java.util.*;
-import java.io.*;
-
-import org.apache.hadoop.hive.ql.exec.HiveObject;
-import org.apache.hadoop.io.Writable;
-
-
-/**
- * A Serializer that is repeatedly invoked for Hive Objects that all share the
- * same schema.
- **/
-public interface HiveObjectSerializer <T extends Writable> {
-
-  public void initialize (Properties p);
-
-  public void serialize(HiveObject ho, DataOutput out) throws IOException;
-  public HiveObject deserialize(DataInput in)  throws IOException;
-
-  public int compare(byte [] b1, int s1, int l1, byte [] b2, int s2, int l2);
-
-  public long getReadErrorCount();
-  public long getWriteErrorCount();
-}

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NaiiveJSONSerializer.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NaiiveJSONSerializer.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NaiiveJSONSerializer.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NaiiveJSONSerializer.java Fri Sep 19 16:56:30 2008
@@ -1,220 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io;
-
-import java.util.*;
-import java.io.*;
-
-import org.apache.hadoop.hive.ql.exec.HiveObject;
-import org.apache.hadoop.hive.ql.exec.CompositeHiveObject;
-import org.apache.hadoop.hive.ql.exec.PrimitiveHiveObject;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.io.WritableComparator;
-import org.apache.hadoop.hive.serde.*;
-import org.apache.hadoop.hive.utils.ByteStream;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-
-/**
- * Serializes and deserializes Hive Objects as a delimited strings.
- *
- * Lots of code shamelessly copied from NaiiveSerializer. 
- **/
-public class NaiiveJSONSerializer implements HiveObjectSerializer {
-
-  List<SerDeField> topLevelFields;
-
-  int separator = Utilities.ctrlaCode;
-  int terminator = Utilities.newLineCode;
-
-  long writeErrorCount = 0, readErrorCount = 0;
-  ByteStream.Output bos = new ByteStream.Output ();
-  int width = -1;
-  ArrayList<String> slist = new ArrayList<String> ();
-  boolean isPrimitive, isTopLevel = true;
-
-  private void setSeparator (int separator) {
-    this.separator = separator;
-  }
-
-  private void setTerminator (int terminator) {
-    this.terminator = terminator;
-  }
-
-  private void setIsTopLevel(boolean value) {
-    isTopLevel = value;
-  }
-
-  public NaiiveJSONSerializer () {}
-
-  public void initialize (Properties p) {
-    String separator = p.getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT);
-    if(separator != null) {
-      setSeparator(Integer.parseInt(separator));
-    }
-  }
-
-  public void serialize(HiveObject ho, DataOutput os) throws IOException {
-    try {
-      if(topLevelFields == null) {
-        try {
-          if(ho.isPrimitive()) {
-            topLevelFields = HiveObject.nlist;
-            isPrimitive = true;
-          } else {
-            topLevelFields = ho.getFields();
-            isPrimitive = false;
-
-            //System.err.println("Naiive: Hive Object has "+topLevelFields.size()+" fields");
-          }
-        } catch (HiveException e) {
-          throw new RuntimeException ("Cannot get Fields from HiveObject");
-        }
-      }
-
-      if(isPrimitive) {
-        os.write(ho.getJavaObject().toString().getBytes("UTF-8"));
-      } else {
-        boolean first = true;
-        int i = -1;
-        for(SerDeField onef: topLevelFields) {
-          i++;
-
-          if(!first) {
-            os.write(separator);
-          } else {
-            first = false;
-          }
-          HiveObject nho = ho.get(onef);
-          if(nho == null)
-            continue;
-
-          os.write(nho.toString().getBytes("UTF-8"));
-        }
-      }
-
-      if(isTopLevel) {
-        os.write(terminator);
-      }
-    } catch (HiveException e) {
-      writeErrorCount++;
-    }
-  }
-
-  private final static String NSTR = "";
-  private static enum streamStatus {EOF, TERMINATED, NORMAL}
-  public HiveObject deserialize (DataInput in) throws IOException {
-    boolean more = true;
-    CompositeHiveObject nr = null;
-    int entries = 0;
-
-    if(width != -1) {
-      nr = new CompositeHiveObject (width);
-    } else {
-      slist.clear();
-    }
-
-    do {
-      bos.reset();
-      streamStatus ss = readColumn(in, bos);
-      if((ss == streamStatus.EOF) ||
-         (ss == streamStatus.TERMINATED)) {
-        // read off entire row/file
-        more = false;
-      }
-
-      entries ++;
-      String col;
-      if(bos.getCount() > 0) {
-        col = new String(bos.getData(), 0, bos.getCount(), "UTF-8");
-      } else {
-        col = NSTR;
-      }
-
-      if(width == -1) {
-        slist.add(col);
-      } else {
-        if(entries <= width) {
-          try {
-            nr.addHiveObject(new PrimitiveHiveObject(col));
-          } catch (HiveException e) {
-            e.printStackTrace();
-            throw new IOException (e.getMessage());
-          }
-        }
-      }
-    } while (more);
-
-    if (width == -1) {
-      width = entries;
-      nr = new CompositeHiveObject (width);
-      for(String col: slist) {
-        try {
-          nr.addHiveObject(new PrimitiveHiveObject(col));
-        } catch (HiveException e) {
-          e.printStackTrace();
-          throw new IOException (e.getMessage());
-        }
-      }
-      return (nr);
-    }
-
-    if(width > entries) {
-      // skip and move on ..
-      readErrorCount++;
-      return null;
-    } else {
-      return nr;
-    }
-  }
-
-  public long getReadErrorCount() {
-    return readErrorCount;
-  }
-
-  public long getWriteErrorCount() {
-    return writeErrorCount;
-  }
-
-  private streamStatus readColumn(DataInput in, OutputStream out) throws IOException {
-    while (true) {
-      int b;
-      try {
-        b = (int)in.readByte();
-      } catch (EOFException e) {
-        return streamStatus.EOF;
-      }
-
-      if (b == terminator) {
-        return streamStatus.TERMINATED;
-      }
-
-      if (b == separator) {
-        return streamStatus.NORMAL;
-      }
-
-      out.write(b);
-    }
-    // Unreachable
-  }
-
-  public int compare(byte [] b1, int s1, int l1, byte [] b2, int s2, int l2) {
-    // Since all data is strings - we just use lexicographic ordering
-    return WritableComparator.compareBytes(b1, s1, l2, b2, s2, l2);
-  }
-}

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NaiiveSerializer.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NaiiveSerializer.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NaiiveSerializer.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NaiiveSerializer.java Fri Sep 19 16:56:30 2008
@@ -1,253 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io;
-
-import java.util.*;
-import java.io.*;
-
-import org.apache.hadoop.hive.ql.exec.HiveObject;
-import org.apache.hadoop.hive.ql.exec.CompositeHiveObject;
-import org.apache.hadoop.hive.ql.exec.PrimitiveHiveObject;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.io.WritableComparator;
-import org.apache.hadoop.hive.serde.*;
-import org.apache.hadoop.hive.utils.ByteStream;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-
-/**
- * Serializes and deserializes Hive Objects as a delimited strings.
- **/
-public class NaiiveSerializer implements HiveObjectSerializer {
-
-  List<SerDeField> topLevelFields;
-  NaiiveSerializer [] topLevelSerializers;
-
-  int separator = Utilities.ctrlaCode;
-  int terminator = Utilities.newLineCode;
-  byte[] nullByteArray;
-
-  long writeErrorCount = 0, readErrorCount = 0;
-  ByteStream.Output bos = new ByteStream.Output ();
-  int width = -1;
-  ArrayList<String> slist = new ArrayList<String> ();
-  boolean isPrimitive, isTopLevel = true;
-
-  private void setSeparator (int separator) {
-    this.separator = separator;
-  }
-
-  private void setIsTopLevel(boolean value) {
-    isTopLevel = value;
-  }
-
-  private void setNullByteArray(byte[] nullByteArray) {
-    this.nullByteArray = nullByteArray;
-  }
-
-  public NaiiveSerializer () {
-    try {
-      setNullByteArray(Utilities.nullStringStorage.getBytes("UTF-8"));
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 should be supported", e);
-    }
-  }
-
-  public void initialize (Properties p) {
-    String separator = p.getProperty(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT);
-    if(separator != null) {
-      setSeparator(Integer.parseInt(separator));
-    }
-    // Will make this configurable when DDL (CREATE TABLE) supports customized null string.
-    String nullString = null;
-    try {
-      if (nullString != null) {
-        setNullByteArray(nullString.getBytes("UTF-8"));
-      } else {
-        setNullByteArray(Utilities.nullStringStorage.getBytes("UTF-8"));
-      }
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 should be supported", e);
-    }
-  }
-
-  public void serialize(HiveObject ho, DataOutput os) throws IOException {
-    try {
-      if (ho.getIsNull()) {
-        os.write(nullByteArray);
-      } else {
-        if(topLevelFields == null) {
-          try {
-            if(ho.isPrimitive()) {
-              topLevelFields = HiveObject.nlist;
-              isPrimitive = true;
-            } else {
-              topLevelFields = ho.getFields();
-              isPrimitive = false;
-              assert(topLevelFields != null);
-              topLevelSerializers = new NaiiveSerializer [topLevelFields.size()];
-              for(int i=0; i<topLevelFields.size(); i++) {
-                topLevelSerializers[i] = new NaiiveSerializer();
-                topLevelSerializers[i].setSeparator(separator+1);
-                topLevelSerializers[i].setIsTopLevel(false);
-                topLevelSerializers[i].setNullByteArray(nullByteArray);
-              }
-
-              //System.err.println("Naiive: Hive Object has "+topLevelFields.size()+" fields");
-            }
-          } catch (HiveException e) {
-            throw new RuntimeException ("Cannot get Fields from HiveObject", e);
-          }
-        }
-  
-        if(isPrimitive) {
-          os.write(ho.getJavaObject().toString().getBytes("UTF-8"));
-        } else {
-          boolean first = true;
-          int i = -1;
-          for(SerDeField onef: topLevelFields) {
-            i++;
-  
-            if(!first) {
-              os.write(separator);
-            } else {
-              first = false;
-            }
-            HiveObject nho = ho.get(onef);
-            if(nho == null)
-              continue;
-  
-            if(nho.isPrimitive()) {
-              os.write(nho.getJavaObject().toString().getBytes("UTF-8"));
-            } else {
-              topLevelSerializers[i].serialize(nho, os);
-            }
-          }
-        }
-      }
-      
-      if(isTopLevel) {
-        os.write(terminator);
-      }
-    } catch (HiveException e) {
-      writeErrorCount++;
-    }
-  }
-
-  private final static String NSTR = "";
-  private static enum streamStatus {EOF, TERMINATED, NORMAL}
-  public HiveObject deserialize (DataInput in) throws IOException {
-    boolean more = true;
-    CompositeHiveObject nr = null;
-    int entries = 0;
-
-    if(width != -1) {
-      nr = new CompositeHiveObject (width);
-    } else {
-      slist.clear();
-    }
-
-    do {
-      bos.reset();
-      streamStatus ss = readColumn(in, bos);
-      if((ss == streamStatus.EOF) ||
-         (ss == streamStatus.TERMINATED)) {
-        // read off entire row/file
-        more = false;
-      }
-
-      entries ++;
-      String col;
-      if(bos.getCount() > 0) {
-        col = new String(bos.getData(), 0, bos.getCount(), "UTF-8");
-      } else {
-        col = NSTR;
-      }
-
-      if(width == -1) {
-        slist.add(col);
-      } else {
-        if(entries <= width) {
-          try {
-            nr.addHiveObject(new PrimitiveHiveObject(col));
-          } catch (HiveException e) {
-            e.printStackTrace();
-            throw new IOException (e.getMessage());
-          }
-        }
-      }
-    } while (more);
-
-    if (width == -1) {
-      width = entries;
-      nr = new CompositeHiveObject (width);
-      for(String col: slist) {
-        try {
-          nr.addHiveObject(new PrimitiveHiveObject(col));
-        } catch (HiveException e) {
-          e.printStackTrace();
-          throw new IOException (e.getMessage());
-        }
-      }
-      return (nr);
-    }
-
-    if(width > entries) {
-      // skip and move on ..
-      readErrorCount++;
-      return null;
-    } else {
-      return nr;
-    }
-  }
-
-  public long getReadErrorCount() {
-    return readErrorCount;
-  }
-
-  public long getWriteErrorCount() {
-    return writeErrorCount;
-  }
-
-  private streamStatus readColumn(DataInput in, OutputStream out) throws IOException {
-    while (true) {
-      int b;
-      try {
-        b = (int)in.readByte();
-      } catch (EOFException e) {
-        return streamStatus.EOF;
-      }
-
-      if (b == terminator) {
-        return streamStatus.TERMINATED;
-      }
-
-      if (b == separator) {
-        return streamStatus.NORMAL;
-      }
-
-      out.write(b);
-    }
-    // Unreachable
-  }
-
-  public int compare(byte [] b1, int s1, int l1, byte [] b2, int s2, int l2) {
-    // Since all data is strings - we just use lexicographic ordering
-    return WritableComparator.compareBytes(b1, s1, l2, b2, s2, l2);
-  }
-}

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NoTagHiveObjectComparator.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NoTagHiveObjectComparator.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NoTagHiveObjectComparator.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NoTagHiveObjectComparator.java Fri Sep 19 16:56:30 2008
@@ -1,28 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io;
-
-public class NoTagHiveObjectComparator extends HiveObjectComparator {
-  /**
-   * Need to initialize base class with right key type
-   */
-  public NoTagHiveObjectComparator () {
-    super(NoTagWritableComparableHiveObject.class);
-  }
-}

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NoTagWritableComparableHiveObject.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NoTagWritableComparableHiveObject.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NoTagWritableComparableHiveObject.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NoTagWritableComparableHiveObject.java Fri Sep 19 16:56:30 2008
@@ -1,64 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io;
-
-import java.io.*;
-import java.util.*;
-
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde.*;
-import org.apache.hadoop.hive.ql.exec.HiveObject;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.mapredWork;
-
-/**
- * Extension of WritableComparableHiveObject that does not tag objects
- *
- */
-
-public class NoTagWritableComparableHiveObject extends WritableComparableHiveObject implements WritableComparable {
-
-  /**
-   * Constructor called by Hive on map output
-   */
-  public NoTagWritableComparableHiveObject (HiveObject ho,  HiveObjectSerializer hos) {
-    super(-1, ho, hos);
-  }
-
-  /**
-   * Default constructor invoked when map-reduce is constructing this object
-   */
-  public NoTagWritableComparableHiveObject () {
-    super();
-  }
-
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    ho = hos.deserialize(in);
-  }
-
-  @Override
-  public void write(DataOutput out) throws IOException {
-    hos.serialize(ho, out);
-  }
-
-}

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NoTagWritableHiveObject.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NoTagWritableHiveObject.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NoTagWritableHiveObject.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/NoTagWritableHiveObject.java Fri Sep 19 16:56:30 2008
@@ -1,82 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io;
-
-import java.io.*;
-import java.util.*;
-
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde.*;
-import org.apache.hadoop.hive.ql.exec.HiveObject;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.plan.mapredWork;
-
-
-/**
- * A wrapper over hive objects that allows interfacing with Map-Reduce
- * serialization layer.
- *
- * NoTag Writable Hive Objects are deserialized only in reduce phase. They are
- * used only when the 'value' fields in the reduce phase are homogenous and don't 
- * require tagging
- * 
- */
-
-public class NoTagWritableHiveObject extends WritableHiveObject implements Writable, Configurable {
-
-  public NoTagWritableHiveObject () { super(); }
-
-  /**
-   * This constructor will be invoked by hive when creating writable  objects.
-   */
-  public NoTagWritableHiveObject (int tag, HiveObject ho,  HiveObjectSerializer hos) {
-    throw new RuntimeException ("NoTagWritables should not be initialized with tags");
-  }
-
-  public NoTagWritableHiveObject (HiveObject ho,  HiveObjectSerializer hos) {
-    super(-1, ho, hos);
-  }
-
-  @Override
-  public void setConf(Configuration conf) {
-    super.setConf(conf);
-    // we need just one deserializer. Get the first of them!
-    hos = mapredDeserializer[0];
-  }
-
-  @Override
-  public int getTag() {
-    return -1;
-  }
-
-  @Override
-  public void write(DataOutput out) throws IOException {
-    // don't serialize tag
-    hos.serialize(ho, out);
-  }
-
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    // don't de-serialize tag
-    ho = hos.deserialize(in);
-  }
-}

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/WritableComparableHiveObject.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/WritableComparableHiveObject.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/WritableComparableHiveObject.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/WritableComparableHiveObject.java Fri Sep 19 16:56:30 2008
@@ -1,206 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io;
-
-import java.io.*;
-import java.util.*;
-
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde.*;
-import org.apache.hadoop.hive.ql.exec.HiveObject;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.mapredWork;
-
-/**
- * A wrapper over hive objects that allows interfacing with Map-Reduce
- * sorting/serialization layers. 
- *
- * WritableComparable Hive Objects are deserialized both in map and reduce phases.
- * Because they have a fixed schema - we just need to initialize a single deserializer
- * Either the serializer/deserializer is passed in at construction time (map output)
- * or it is obtained via JobConf at construction time
- *
- * The base version allows a tag to be serialized out alongside. But it is not used for
- * grouping/partitioning (only for sorting).
- *
- */
-
-public class WritableComparableHiveObject extends WritableHiveObject implements WritableComparable {
-
-  /**
-   * number of fields used in partition function.
-   * 0 - no fields will be used
-   * -1 - a random number will be used.
-   * Integer.MAX_VALUE - use all key fields.
-   */
-  static int numPartitionFields = Integer.MAX_VALUE;
-  static Random random = new Random();
-  
-  public static void setNumPartitionFields(int numPartitionFields) {
-    WritableComparableHiveObject.numPartitionFields = numPartitionFields;
-  }
-  
-  static List<SerDeField> topLevelFields = null;
-  static boolean isPrimitive;
-  static HiveObjectSerializer gHos;
-
-  /**
-   * Constructor called by Hive on map output
-   */
-  public WritableComparableHiveObject(int tag, HiveObject ho,  HiveObjectSerializer hos) {
-    super(tag, ho, hos);
-  }
-
-  /**
-   * Default constructor invoked when map-reduce is constructing this object
-   */
-  public WritableComparableHiveObject () {
-    super();
-  }
-
-  /**
-   * This function is invoked when map-reduce is constructing this object
-   * We construct one global deserializer for this case.
-   *
-   * TODO: how to do this initialization without making this configurable?
-   * Need to find a very early hook!
-   * 
-   * TODO: Replace NaiiveSerializer with MetadataTypedSerDe, and configure
-   * the MetadataTypedSerDe right here.
-   */
-  @Override
-  public void setConf(Configuration conf) {
-    if(gHos == null) {
-      mapredWork gWork = Utilities.getMapRedWork (conf);
-      gHos = new NaiiveSerializer();
-    }
-    hos = gHos;
-  }
-
-  /**
-   * Get tag out first like the base class - but use the same deserializer
-   * for getting the hive object (since the key schema is constant)
-   */
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    tag = (int) in.readByte();
-    ho = hos.deserialize(in);
-  }
-
-
-  public boolean equals (Object o) {
-    // afaik - this is never called
-    throw new RuntimeException("Not invented here");
-  }
-
-  /**
-   * This api should only be called during the reduce to check for group equality
-   * This asserts default grouping behavior (group by all columns). Note we don't
-   * use tags for grouping. Tags are only used for sorting and this behavior is
-   * controlled by HiveObjectComparator
-   * TODO (low priority): Make it possible to specify the grouping columns.
-   */
-  public int compareTo(Object o) {
-    HiveObject ho_rhs = ((WritableComparableHiveObject)o).getHo();
-    if(topLevelFields == null) {
-      try {
-        if(ho.isPrimitive()) {
-          topLevelFields = HiveObject.nlist;
-          isPrimitive = true;
-        } else {
-          topLevelFields = ho.getFields();
-          isPrimitive = false;
-        }
-      } catch (HiveException e) {
-        throw new RuntimeException ("Cannot get Fields from HiveObject");
-      }
-    }
-    try {
-
-      if(isPrimitive) {
-        Comparable a = (Comparable)ho.getJavaObject();
-        Comparable b = (Comparable)ho_rhs.getJavaObject();
-        return  a.compareTo(b);
-      }
-
-      // there is an assumption that the number of fields are the same.
-      // and that all the constituent fields are comparables.
-      // Assumption #1 has to be enforced by the deserializer.
-      // Assumption #2 has to be enforced by only allow primitive comparable types 
-      // as group fields.
-      for(SerDeField onef: topLevelFields) {
-        Comparable a = (Comparable)ho.get(onef).getJavaObject();
-        Comparable b = (Comparable)ho_rhs.get(onef).getJavaObject();
-        
-        int ret = a.compareTo(b);
-        if(ret != 0)
-          return (ret);
-      }
-    } catch (HiveException e) {
-      e.printStackTrace();
-      throw new RuntimeException ("HiveObject.get()/getJavaObject() methods failed");
-    }
-
-    // all fields are the same.
-    return (0);
-  }
-
-  public int hashCode() {
-    // This is a special case when we want the rows to be randomly distributed to  
-    // reducers for load balancing problem.  In this case, we use a random number 
-    // as the hashCode.
-    if (numPartitionFields == -1) {
-      return random.nextInt();
-    }
-    if(topLevelFields == null) {
-      try {
-        if(ho.isPrimitive()) {
-          topLevelFields = HiveObject.nlist;
-          isPrimitive = true;
-        } else {
-          topLevelFields = ho.getFields();
-          isPrimitive = false;
-        }
-      } catch (HiveException e) {
-        throw new RuntimeException ("Cannot get Fields from HiveObject");
-      }
-    }
-
-    int ret = 0;
-    try {
-      if(isPrimitive) {
-        return ho.getJavaObject().hashCode();
-      }
-      int numFields = 0;
-      for(SerDeField onef: topLevelFields) {
-        Object o = ho.get(onef).getJavaObject();
-        // TODO: replace with something smarter (borrowed from Text.java)
-        ret = ret * 31 + (o == null ? 0 : o.hashCode());
-        numFields ++;
-        if (numFields >= numPartitionFields) break;
-      }
-    } catch (HiveException e) {
-      e.printStackTrace();
-      throw new RuntimeException ("HiveObject.get()/getJavaObject() failed");
-    }
-    return (ret);
-  }
-}

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/WritableHiveObject.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/WritableHiveObject.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/WritableHiveObject.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/io/WritableHiveObject.java Fri Sep 19 16:56:30 2008
@@ -1,149 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.io;
-
-import java.io.*;
-import java.util.*;
-
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde.*;
-import org.apache.hadoop.hive.ql.exec.HiveObject;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.plan.mapredWork;
-
-
-/**
- * A wrapper over hive objects that allows interfacing with Map-Reduce
- * serialization layer.
- *
- * Writable Hive Objects are deserialized only in reduce phase. 'Value'
- * fields are encapsulated in WritableHiveObjects - and they are heterogenous.
- * Hence we use prefix a tag to the actual object to figure out different 
- * kinds of objects
- * 
- */
-
-public class WritableHiveObject implements Writable, Configurable {
-  protected int tag;
-  protected HiveObject ho;
-  protected HiveObjectSerializer hos;
-
-  protected static HiveObjectSerializer [] mapredDeserializer;
-
-
-  public WritableHiveObject () {}
-
-  /**
-   * Should not be called.
-   */
-  public Configuration getConf() {
-    throw new RuntimeException ("Unexpected invocation");
-  }
-
-  /**
-   * 
-   * 1. we come through this code path when being initialized by map-reduce
-   * 
-   *    In this case we fall back on one common deserializer - but create an
-   *    instance for each tag (since deserializers cannot deal with differing
-   *    schemas
-   *
-   *    It turns out that objects may be deserialized and again serialized. This
-   *    is done unnecessarily in some code paths in 0.15 - but is legitimate in 
-   *    in the case of combiner.
-   *
-   * TODO: how to do this initialization without making this configurable? Need to 
-   * find a very early hook!
-   */
-  public void setConf(Configuration conf) {
-    if(mapredDeserializer == null) {
-      mapredWork gWork = Utilities.getMapRedWork (conf);
-      setSerialFormat();
-    }
-  }
-  
-  /**
-   * Meant to be accessed directly from test code only 
-   * 
-   * TODO: this deserializers need to be initialized with the schema
-   */
-  public static void setSerialFormat() {
-    mapredDeserializer = 
-      new HiveObjectSerializer [(int) Byte.MAX_VALUE];
-    
-    for(int i=0; i<Byte.MAX_VALUE; i++) {
-      // we initialize a deserializer for each tag. in future we will
-      // pass the schema in the deserializer as well. For now ..
-      // TODO: use MetadataTypedSerDe to replace NaviiveSerializer.
-      mapredDeserializer[i] =  new NaiiveSerializer();
-    }
-  }
-
-
-  /**
-   * 2. when map-reduce initializes the object - we just read data.
-   *    for each row we read the tag and then use the deserializer for
-   *    that tag.
-   *
-   */
-  public void readFields(DataInput in) throws IOException {
-    tag = (int) in.readByte();
-    // stash away the serializer in case we are written out
-    hos = mapredDeserializer[tag];
-    ho = hos.deserialize(in);
-  }
-
-  /**
-   * 1. this constructor will be invoked by hive when creating writable  objects
-   */
-  public WritableHiveObject (int tag, HiveObject ho,  HiveObjectSerializer hos) {
-    this.tag = tag;
-    this.ho = ho;
-    this.hos = hos;
-  }
-
-  /**
-   * 2. when Hive instantiates Writable objects - we will repeatedly set a new object
-   */
-  public void setHo(HiveObject ho) {
-    this.ho = ho;
-  }
-
-  /**
-   * 3. and ask for the object to be serialized out
-   */
-  public void write(DataOutput out) throws IOException {
-    out.write(tag);
-    hos.serialize(ho, out);
-  }
-
-
-  public HiveObject getHo() {
-    return (ho);
-  }
-
-  public int getTag() {
-    return tag;
-  }
-}
-
-

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Fri Sep 19 16:56:30 2008
@@ -37,7 +37,6 @@
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.MetaStoreClient;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.Constants;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -158,7 +157,7 @@
     tbl.setOutputFormatClass(fileOutputFormat.getName());
 
     for (String col: columns) {
-      FieldSchema field = new FieldSchema(col, Constants.STRING_TYPE_NAME, "default string type");
+      FieldSchema field = new FieldSchema(col, org.apache.hadoop.hive.serde.Constants.STRING_TYPE_NAME, "default");
       tbl.getCols().add(field);
     }
 
@@ -166,11 +165,11 @@
       for (String partCol : partCols) {
         FieldSchema part = new FieldSchema();
         part.setName(partCol);
-        part.setType(Constants.STRING_TYPE_NAME); // default partition key
+        part.setType(org.apache.hadoop.hive.serde.Constants.STRING_TYPE_NAME); // default partition key
         tbl.getPartCols().add(part);
       }
     }
-    tbl.setSerializationLib(org.apache.hadoop.hive.serde.simple_meta.MetadataTypedColumnsetSerDe.shortName());
+    tbl.setSerializationLib(org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe.shortName());
     tbl.setNumBuckets(bucketCount);
     createTable(tbl);
   }
@@ -281,6 +280,8 @@
     try {
       // first get a schema (in key / vals)
       Properties p = MetaStoreUtils.getSchema(tTable);
+      // Map hive1 to hive3 class names, can be removed when migration is done.
+      p = MetaStoreUtils.hive1Tohive3ClassNames(p);
       table.setSchema(p);
       table.setTTable(tTable);
       table.setInputFormatClass((Class<? extends InputFormat<WritableComparable, Writable>>)
@@ -289,16 +290,18 @@
       table.setOutputFormatClass((Class<? extends OutputFormat<WritableComparable, Writable>>)
           Class.forName(table.getSchema().getProperty(org.apache.hadoop.hive.metastore.api.Constants.FILE_OUTPUT_FORMAT,
               org.apache.hadoop.mapred.SequenceFileOutputFormat.class.getName()))); 
-      table.setSerDe(MetaStoreUtils.getSerDe(getConf(), p));
+      table.setDeserializer(MetaStoreUtils.getDeserializer(getConf(), p));
       table.setDataLocation(new URI(tTable.getSd().getLocation()));
     } catch(Exception e) {
       LOG.error(StringUtils.stringifyException(e));
       throw new HiveException(e);
     }
-    String sf = table.getSerializationFormat();
-    char[] b = sf.toCharArray();
-    if ((b.length == 1) && (b[0] < 10)){ // ^A, ^B, ^C, ^D, \t
-      table.setSerializationFormat(Integer.toString(b[0]));
+    String sf = table.getSerdeParam(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT);
+    if(sf != null) {
+      char[] b = sf.toCharArray();
+      if ((b.length == 1) && (b[0] < 10)){ // ^A, ^B, ^C, ^D, \t
+        table.setSerdeParam(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, Integer.toString(b[0]));
+      }
     }
     table.checkValidity();
     return table;
@@ -459,6 +462,17 @@
     }
     return new Partition(tbl, tpart);
   }
+  
+  public List<String> getPartitionNames(String dbName, String tblName, short max) throws HiveException {
+    List names = null;
+    try {
+      names = msc.listPartitionNames(dbName, tblName, max);
+    } catch (Exception e) {
+      LOG.error(StringUtils.stringifyException(e));
+      throw new HiveException(e);
+    }
+    return names;
+  }
 
   /**
    * get all the partitions that the table has
@@ -494,7 +508,7 @@
       // create an empty partition. 
       // HACK, HACK. SemanticAnalyzer code requires that an empty partition when the table is not partitioned
       org.apache.hadoop.hive.metastore.api.Partition tPart = new org.apache.hadoop.hive.metastore.api.Partition();
-      tPart.setSd(tbl.getTTable().getSd());
+      tPart.setSd(tbl.getTTable().getSd()); // TODO: get a copy
       Partition part = new Partition(tbl, tPart);
       ArrayList<Partition> parts = new ArrayList<Partition>(1);
       parts.add(part);

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java Fri Sep 19 16:56:30 2008
@@ -18,13 +18,6 @@
 
 package org.apache.hadoop.hive.ql.metadata;
 
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-
-import org.apache.hadoop.hive.serde.*;
-
 /**
  * General collection of helper functions
  *
@@ -38,131 +31,6 @@
   public static final String LBRACE = "{";
   public static final String RBRACE = "}";
 
-  /**
-   * Common functionality for all SerDe libraries to handle list and primitive
-   * string serialization to json strings (who uses this?)
-   */
-  public static String toJSONString(Object obj, SerDeField hf, SerDe sd) throws HiveException {
-
-    if(hf.isList()) {
-      Class type = hf.getListElementType();
-      boolean is_string = (type == String.class);
-      boolean is_boolean = (type == Boolean.class);
-      boolean is_primitive = ReflectionSerDeField.isClassPrimitive(type);
-
-      Iterator iter = ((List)obj).iterator();
-      StringBuilder sb = new StringBuilder(LBRACKET);
-      String toPrefix = "";
-      boolean first = true;
-
-      while(iter.hasNext()) {
-        Object lobj = iter.next();
-
-        if(is_primitive) {
-          if(is_string) {
-            sb.append(toPrefix);
-            sb.append(QUOTE);
-            sb.append(escapeString((String)lobj));
-            sb.append(QUOTE);
-          } else if(is_boolean) {
-            sb.append(toPrefix+(((Boolean)lobj).booleanValue() ? "True" : "False"));
-          } else {
-            // it's a number - so doesn't need to be escaped.
-            sb.append(toPrefix+lobj.toString());
-          }
-        } else {
-          try {
-            sb.append(toPrefix+sd.toJSONString(lobj, null));
-          } catch(SerDeException e) {
-            throw new HiveException(e);
-          }
-        }
-
-        if(first) {
-          toPrefix = ",";
-          first = false;
-        }
-      }
-      sb.append(RBRACKET);
-      return (sb.toString());
-    } else if (hf.isMap()) {
-      Class keyType = hf.getMapKeyType();
-      Class valType = hf.getMapValueType();
-
-      boolean key_is_string = (keyType == String.class);
-      boolean key_is_boolean = (keyType == Boolean.class);
-      boolean key_is_primitive = ReflectionSerDeField.isClassPrimitive(keyType);
-
-      boolean val_is_string = (valType == String.class);
-      boolean val_is_boolean = (valType == Boolean.class);
-      boolean val_is_primitive = ReflectionSerDeField.isClassPrimitive(valType);
-
-
-
-      Iterator iter = ((Map)obj).keySet().iterator();
-      StringBuilder sb = new StringBuilder(LBRACE);
-      String toPrefix = "";
-      boolean first = true;
-
-      while(iter.hasNext()) {
-        Object lobj = iter.next();
-        Object robj = ((Map)obj).get(lobj);
-
-        // Emit key
-        if(key_is_primitive) {
-          if(key_is_string) {
-            sb.append(toPrefix);
-            sb.append(QUOTE);
-            sb.append(escapeString((String)lobj));
-            sb.append(QUOTE);
-          } else if(key_is_boolean) {
-            sb.append(toPrefix+(((Boolean)lobj).booleanValue() ? "True" : "False"));
-          } else {
-            // it's a number - so doesn't need to be escaped.
-            sb.append(toPrefix+lobj.toString());
-          }
-        } else {
-          try {
-            sb.append(toPrefix+sd.toJSONString(lobj, null));
-          } catch(SerDeException e) {
-            throw new HiveException(e);
-          }
-        }
-
-        sb.append(COLON);
-
-        // Emit val
-        if(val_is_primitive) {
-          if(val_is_string) {
-            sb.append(toPrefix);
-            sb.append(QUOTE);
-            sb.append(escapeString((String)robj));
-            sb.append(QUOTE);
-          } else if(val_is_boolean) {
-            sb.append(toPrefix+(((Boolean)robj).booleanValue() ? "True" : "False"));
-          } else {
-            // it's a number - so doesn't need to be escaped.
-            sb.append(toPrefix+robj.toString());
-          }
-        } else {
-          try {
-            sb.append(toPrefix+sd.toJSONString(robj, null));
-          } catch(SerDeException e) {
-            throw new HiveException(e);
-          }
-        }
-
-        if(first) {
-          toPrefix = ",";
-          first = false;
-        }
-      }
-      sb.append(RBRACE);
-      return (sb.toString());
-    } else {
-      throw new HiveException("HiveUtils.toJSONString only does lists");
-    }
-  }
 
   public static String escapeString(String str) {
     int length = str.length();

Modified: hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java?rev=697291&r1=697290&r2=697291&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (original)
+++ hadoop/core/trunk/src/contrib/hive/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java Fri Sep 19 16:56:30 2008
@@ -22,6 +22,7 @@
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.regex.Matcher;
@@ -35,6 +36,8 @@
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Order;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 
 /**
  * A Hive Table Partition: is a fundamental storage unit within a Table
@@ -46,6 +49,13 @@
 
     private Table table;
     private org.apache.hadoop.hive.metastore.api.Partition tPartition;
+    /**
+     * @return the tPartition
+     */
+    public org.apache.hadoop.hive.metastore.api.Partition getTPartition() {
+      return tPartition;
+    }
+
     private LinkedHashMap<String, String> spec;
     
     private Path partPath;
@@ -54,7 +64,7 @@
     Partition(Table tbl, org.apache.hadoop.hive.metastore.api.Partition tp) throws HiveException {
       this.table = tbl;
       this.tPartition = tp;
-      String partName = "";
+      partName = "";
       if(table.isPartitioned()) {
         try {
           partName = Warehouse.makePartName(tbl.getPartCols(), tp.getValues());
@@ -83,7 +93,14 @@
       this.table = tbl;
       // initialize the tPartition(thrift object) with the data from path and  table
       this.tPartition = new org.apache.hadoop.hive.metastore.api.Partition();
-      this.tPartition.setSd(tbl.getTTable().getSd());
+      this.tPartition.setDbName(tbl.getDbName());
+      this.tPartition.setTableName(tbl.getName());
+      StorageDescriptor sd = tbl.getTTable().getSd();
+      StorageDescriptor psd = new StorageDescriptor(
+          sd.getCols(), sd.getLocation(), sd.getInputFormat(), sd.getOutputFormat(),
+          sd.isCompressed(), sd.getNumBuckets(), sd.getSerdeInfo(), sd.getBucketCols(),
+          sd.getSortCols(), new HashMap<String, String>());
+      this.tPartition.setSd(psd);
       // change the partition location
       if(table.isPartitioned()) {
         this.partPath = path;
@@ -93,12 +110,17 @@
         this.partPath = table.getPath();
       }
       spec = makeSpecFromPath();
-      tPartition.getSd().setLocation(partPath.toString());
+      psd.setLocation(this.partPath.toString());
       List<String> partVals = new ArrayList<String> ();
       tPartition.setValues(partVals);
       for (FieldSchema field : tbl.getPartCols()) {
         partVals.add(spec.get(field.getName()));
       }
+      try {
+        this.partName = Warehouse.makePartName(tbl.getPartCols(), partVals);
+      } catch (MetaException e) {
+        throw new HiveException("Invalid partition key values", e);
+      }
     }
     
     static final Pattern pat = Pattern.compile("([^/]+)=([^/]+)");
@@ -162,8 +184,7 @@
     }
 
     public String getName() {
-        // starting to look really ugly now
-        return getPath()[0].toString();
+        return partName;
     }
 
     public Table getTable() {
@@ -242,6 +263,8 @@
      * mapping from a Path to the bucket number if any
      */
     private static Pattern bpattern = Pattern.compile("part-([0-9][0-9][0-9][0-9][0-9])");
+
+    private String partName;
     @SuppressWarnings("nls")
     public static int getBucketNum(Path p) {
         Matcher m = bpattern.matcher(p.getName());



Mime
View raw message