hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From br...@apache.org
Subject svn commit: r1620103 [12/27] - in /hive/branches/spark: ./ accumulo-handler/ common/src/java/org/apache/hadoop/hive/ant/ common/src/java/org/apache/hadoop/hive/common/type/ common/src/test/org/apache/hadoop/hive/common/type/ data/files/ hcatalog/stream...
Date Sun, 24 Aug 2014 03:43:57 GMT
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidInputFormat.java Sun Aug 24 03:43:48 2014
@@ -22,7 +22,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.ValidTxnList;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.RecordReader;
@@ -86,11 +86,20 @@ import java.io.IOException;
  * <p>
  * To support transitions between non-ACID layouts to ACID layouts, the input
  * formats are expected to support both layouts and detect the correct one.
- *
- * @param <V> The row type
+ * <p>
+ *   A note on the KEY of this InputFormat.  
+ *   For row-at-a-time processing, KEY can conveniently pass RowId into the operator
+ *   pipeline.  For vectorized execution the KEY could perhaps represent a range in the batch.
+ *   Since {@link org.apache.hadoop.hive.ql.io.orc.OrcInputFormat} is declared to return
+ *   {@code NullWritable} key, {@link org.apache.hadoop.hive.ql.io.AcidRecordReader} is defined
+ *   to provide access to the RowId.  Other implementations of AcidInputFormat can use either
+ *   mechanism.
+ * </p>
+ * 
+ * @param <VALUE> The row type
  */
-public interface AcidInputFormat<V>
-    extends InputFormat<NullWritable, V>, InputFormatChecker {
+public interface AcidInputFormat<KEY extends WritableComparable, VALUE>
+    extends InputFormat<KEY, VALUE>, InputFormatChecker {
 
   /**
    * Options for controlling the record readers.
@@ -140,7 +149,7 @@ public interface AcidInputFormat<V>
    * @return a record reader
    * @throws IOException
    */
-  public RowReader<V> getReader(InputSplit split,
+  public RowReader<VALUE> getReader(InputSplit split,
                                 Options options) throws IOException;
 
   public static interface RawReader<V>
@@ -162,11 +171,18 @@ public interface AcidInputFormat<V>
    * @return a record reader
    * @throws IOException
    */
-   RawReader<V> getRawReader(Configuration conf,
+   RawReader<VALUE> getRawReader(Configuration conf,
                              boolean collapseEvents,
                              int bucket,
                              ValidTxnList validTxnList,
                              Path baseDirectory,
                              Path[] deltaDirectory
                              ) throws IOException;
+
+  /**
+   * RecordReader returned by AcidInputFormat working in row-at-a-time mode should AcidRecordReader.
+   */
+  public interface AcidRecordReader<K,V> extends RecordReader<K,V> {
+    RecordIdentifier getRecordIdentifier();
+  }
 }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidOutputFormat.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidOutputFormat.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/AcidOutputFormat.java Sun Aug 24 03:43:48 2014
@@ -23,7 +23,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.Reporter;
 
 import java.io.IOException;
@@ -34,7 +34,7 @@ import java.util.Properties;
  * An extension for OutputFormats that want to implement ACID transactions.
  * @param <V> the row type of the file
  */
-public interface AcidOutputFormat<V> extends HiveOutputFormat<NullWritable, V> {
+public interface AcidOutputFormat<K extends WritableComparable, V> extends HiveOutputFormat<K, V> {
 
   /**
    * Options to control how the files are written

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java Sun Aug 24 03:43:48 2014
@@ -20,17 +20,13 @@ package org.apache.hadoop.hive.ql.io;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Properties;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.common.ObjectPair;
 import org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.FooterBuffer;
@@ -42,16 +38,13 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
-import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableUtils;
 import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.util.ReflectionUtils;
 
 /** This class prepares an IOContext, and provides the ability to perform a binary search on the
   * data.  The binary search can be used by setting the value of inputFormatSorted in the
@@ -119,7 +112,18 @@ public abstract class HiveContextAwareRe
     }
     updateIOContext();
     try {
-      return doNext(key, value);
+      boolean retVal = doNext(key, value);
+      if(retVal) {
+        if(key instanceof RecordIdentifier) {
+          //supports AcidInputFormat which uses the KEY pass ROW__ID info
+          ioCxtRef.ri = (RecordIdentifier)key;
+        }
+        else if(recordReader instanceof AcidInputFormat.AcidRecordReader) {
+          //supports AcidInputFormat which do not use the KEY pass ROW__ID info
+          ioCxtRef.ri = ((AcidInputFormat.AcidRecordReader) recordReader).getRecordIdentifier();
+        }
+      }
+      return retVal;
     } catch (IOException e) {
       ioCxtRef.setIOExceptions(true);
       throw e;

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java Sun Aug 24 03:43:48 2014
@@ -61,6 +61,10 @@ public class IOContext {
   Comparison comparison = null;
   // The class name of the generic UDF being used by the filter
   String genericUDFClassName = null;
+  /**
+   * supports {@link org.apache.hadoop.hive.ql.metadata.VirtualColumn#ROWID}
+   */
+  public RecordIdentifier ri;
 
   public static enum Comparison {
     GREATER,

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/RecordIdentifier.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/RecordIdentifier.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/RecordIdentifier.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/RecordIdentifier.java Sun Aug 24 03:43:48 2014
@@ -19,16 +19,81 @@
 package org.apache.hadoop.hive.ql.io;
 
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.io.WritableComparable;
 
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
 
 /**
- * Gives the Record identifer information for the current record.
+ * Gives the Record identifier information for the current record.
  */
 public class RecordIdentifier implements WritableComparable<RecordIdentifier> {
+  /**
+   * This is in support of {@link org.apache.hadoop.hive.ql.metadata.VirtualColumn#ROWID}
+   * Contains metadata about each field in RecordIdentifier that needs to be part of ROWID
+   * which is represented as a struct {@link org.apache.hadoop.hive.ql.io.RecordIdentifier.StructInfo}.
+   * Each field of RecordIdentifier which should be part of ROWID should be in this enum... which 
+   * really means that it should be part of VirtualColumn (so make a subclass for rowid).
+   */
+  public static enum Field {
+    //note the enum names match field names in the struct
+    transactionId(TypeInfoFactory.longTypeInfo,
+      PrimitiveObjectInspectorFactory.javaLongObjectInspector),
+    bucketId(TypeInfoFactory.intTypeInfo, PrimitiveObjectInspectorFactory.javaIntObjectInspector),
+    rowId(TypeInfoFactory.longTypeInfo, PrimitiveObjectInspectorFactory.javaLongObjectInspector);
+    public final TypeInfo fieldType;
+    public final ObjectInspector fieldOI;
+    Field(TypeInfo fieldType, ObjectInspector fieldOI) {
+      this.fieldType = fieldType;
+      this.fieldOI = fieldOI;
+    }
+  }
+  /**
+   * RecordIdentifier is passed along the operator tree as a struct.  This class contains a few
+   * utilities for that.
+   */
+  public static final class StructInfo {
+    private static final List<String> fieldNames = new ArrayList<String>(Field.values().length);
+    private static final List<TypeInfo> fieldTypes = new ArrayList<TypeInfo>(fieldNames.size());
+    private static final List<ObjectInspector> fieldOis = 
+      new ArrayList<ObjectInspector>(fieldNames.size());
+    static {
+      for(Field f : Field.values()) {
+        fieldNames.add(f.name());
+        fieldTypes.add(f.fieldType);
+        fieldOis.add(f.fieldOI);
+      }
+    }
+    public static final TypeInfo typeInfo = 
+      TypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypes);
+    public static final ObjectInspector oi = 
+      ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOis);
+
+    /**
+     * Copies relevant fields from {@code ri} to {@code struct}
+     * @param ri
+     * @param struct must be of size Field.values().size()
+     */
+    public static void toArray(RecordIdentifier ri, Object[] struct) {
+      assert struct != null && struct.length == Field.values().length;
+      if(ri == null) {
+        Arrays.fill(struct, null);
+        return;
+      }
+      struct[Field.transactionId.ordinal()] = ri.getTransactionId();
+      struct[Field.bucketId.ordinal()] = ri.getBucketId();
+      struct[Field.rowId.ordinal()] = ri.getRowId();
+    }
+  }
+  
   private long transactionId;
   private int bucketId;
   private long rowId;

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java Sun Aug 24 03:43:48 2014
@@ -127,7 +127,7 @@ public class AvroGenericRecordReader imp
     String s = job.get(AvroSerdeUtils.AVRO_SERDE_SCHEMA);
     if(s != null) {
       LOG.info("Found the avro schema in the job: " + s);
-      return Schema.parse(s);
+      return AvroSerdeUtils.getSchemaFor(s);
     }
     // No more places to get the schema from. Give up.  May have to re-encode later.
     return null;

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Sun Aug 24 03:43:48 2014
@@ -98,7 +98,7 @@ import com.google.common.util.concurrent
  */
 public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
   InputFormatChecker, VectorizedInputFormatInterface,
-    AcidInputFormat<OrcStruct> {
+    AcidInputFormat<NullWritable, OrcStruct> {
 
   private static final Log LOG = LogFactory.getLog(OrcInputFormat.class);
   static final HadoopShims SHIMS = ShimLoader.getHadoopShims();
@@ -989,7 +989,7 @@ public class OrcInputFormat  implements 
     boolean vectorMode = Utilities.isVectorMode(conf);
 
     // if HiveCombineInputFormat gives us FileSplits instead of OrcSplits,
-    // we know it is not ACID.
+    // we know it is not ACID. (see a check in CombineHiveInputFormat.getSplits() that assures this)
     if (inputSplit.getClass() == FileSplit.class) {
       if (vectorMode) {
         return createVectorizedReader(inputSplit, conf, reporter);
@@ -998,62 +998,75 @@ public class OrcInputFormat  implements 
           ((FileSplit) inputSplit).getPath(),
           OrcFile.readerOptions(conf)), conf, (FileSplit) inputSplit);
     }
-
+    
     OrcSplit split = (OrcSplit) inputSplit;
     reporter.setStatus(inputSplit.toString());
 
-    // if we are strictly old-school, just use the old code
+    Options options = new Options(conf).reporter(reporter);
+    final RowReader<OrcStruct> inner = getReader(inputSplit, options);
+    
+    
+    /*Even though there are no delta files, we still need to produce row ids so that an
+    * UPDATE or DELETE statement would work on a table which didn't have any previous updates*/
     if (split.isOriginal() && split.getDeltas().isEmpty()) {
       if (vectorMode) {
         return createVectorizedReader(inputSplit, conf, reporter);
       } else {
-        return new OrcRecordReader(OrcFile.createReader(split.getPath(),
-            OrcFile.readerOptions(conf)), conf, split);
+        return new NullKeyRecordReader(inner, conf);
       }
     }
 
-    Options options = new Options(conf).reporter(reporter);
-    final RowReader<OrcStruct> inner = getReader(inputSplit, options);
     if (vectorMode) {
       return (org.apache.hadoop.mapred.RecordReader)
           new VectorizedOrcAcidRowReader(inner, conf, (FileSplit) inputSplit);
     }
-    final RecordIdentifier id = inner.createKey();
-
-    // Return a RecordReader that is compatible with the Hive 0.12 reader
-    // with NullWritable for the key instead of RecordIdentifier.
-    return new org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct>(){
-      @Override
-      public boolean next(NullWritable nullWritable,
-                          OrcStruct orcStruct) throws IOException {
-        return inner.next(id, orcStruct);
-      }
+    return new NullKeyRecordReader(inner, conf);
+  }
+  /**
+   * Return a RecordReader that is compatible with the Hive 0.12 reader
+   * with NullWritable for the key instead of RecordIdentifier.
+   */
+  public static final class NullKeyRecordReader implements AcidRecordReader<NullWritable, OrcStruct> {
+    private final RecordIdentifier id;
+    private final RowReader<OrcStruct> inner;
+
+    public RecordIdentifier getRecordIdentifier() {
+      return id;
+    }
+    private NullKeyRecordReader(RowReader<OrcStruct> inner, Configuration conf) {
+      this.inner = inner;
+      id = inner.createKey();
+    }
+    @Override
+    public boolean next(NullWritable nullWritable,
+                        OrcStruct orcStruct) throws IOException {
+      return inner.next(id, orcStruct);
+    }
 
-      @Override
-      public NullWritable createKey() {
-        return NullWritable.get();
-      }
+    @Override
+    public NullWritable createKey() {
+      return NullWritable.get();
+    }
 
-      @Override
-      public OrcStruct createValue() {
-        return inner.createValue();
-      }
+    @Override
+    public OrcStruct createValue() {
+      return inner.createValue();
+    }
 
-      @Override
-      public long getPos() throws IOException {
-        return inner.getPos();
-      }
+    @Override
+    public long getPos() throws IOException {
+      return inner.getPos();
+    }
 
-      @Override
-      public void close() throws IOException {
-        inner.close();
-      }
+    @Override
+    public void close() throws IOException {
+      inner.close();
+    }
 
-      @Override
-      public float getProgress() throws IOException {
-        return inner.getProgress();
-      }
-    };
+    @Override
+    public float getProgress() throws IOException {
+      return inner.getProgress();
+    }
   }
 
 

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java Sun Aug 24 03:43:48 2014
@@ -50,7 +50,7 @@ import java.util.Properties;
  * A Hive OutputFormat for ORC files.
  */
 public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
-                        implements AcidOutputFormat<OrcSerdeRow> {
+                        implements AcidOutputFormat<NullWritable, OrcSerdeRow> {
 
   private static class OrcRecordWriter
       implements RecordWriter<NullWritable, OrcSerdeRow>,

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java Sun Aug 24 03:43:48 2014
@@ -88,6 +88,9 @@ public class OrcRecordUpdater implements
   private final IntWritable bucket = new IntWritable();
   private final LongWritable rowId = new LongWritable();
   private long insertedRows = 0;
+  // This records how many rows have been inserted or deleted.  It is separate from insertedRows
+  // because that is monotonically increasing to give new unique row ids.
+  private long rowCountDelta = 0;
   private final KeyIndexBuilder indexBuilder = new KeyIndexBuilder();
 
   static class AcidStats {
@@ -263,6 +266,7 @@ public class OrcRecordUpdater implements
     }
     addEvent(INSERT_OPERATION, currentTransaction, currentTransaction,
         insertedRows++, row);
+    rowCountDelta++;
   }
 
   @Override
@@ -283,6 +287,7 @@ public class OrcRecordUpdater implements
     }
     addEvent(DELETE_OPERATION, currentTransaction, originalTransaction, rowId,
         null);
+    rowCountDelta--;
   }
 
   @Override
@@ -317,7 +322,11 @@ public class OrcRecordUpdater implements
 
   @Override
   public SerDeStats getStats() {
-    return null;
+    SerDeStats stats = new SerDeStats();
+    stats.setRowCount(rowCountDelta);
+    // Don't worry about setting raw data size diff.  I have no idea how to calculate that
+    // without finding the row we are updating or deleting, which would be a mess.
+    return stats;
   }
 
   @VisibleForTesting

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Sun Aug 24 03:43:48 2014
@@ -21,6 +21,7 @@ import static org.apache.hadoop.hive.con
 
 import java.io.EOFException;
 import java.io.IOException;
+import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
 import java.sql.Timestamp;
@@ -1292,8 +1293,9 @@ class RecordReaderImpl implements Record
             BigInteger bInt = SerializationUtils.readBigInteger(valueStream);
             result.vector[i].update(bInt, (short) scratchScaleVector.vector[i]);
 
-            // Change the scale to match the schema if the scale in data is different.
-            if (scale != scratchScaleVector.vector[i]) {
+            // Change the scale to match the schema if the scale is less than in data.
+            // (HIVE-7373) If scale is bigger, then it leaves the original trailing zeros
+            if (scale < scratchScaleVector.vector[i]) {
               result.vector[i].changeScaleDestructive((short) scale);
             }
           }
@@ -2410,6 +2412,9 @@ class RecordReaderImpl implements Record
 
   private static Object getBaseObjectForComparison(Object predObj, Object statsObj) {
     if (predObj != null) {
+      if (predObj instanceof ExprNodeConstantDesc) {
+        predObj = ((ExprNodeConstantDesc) predObj).getValue();
+      }
       // following are implicitly convertible
       if (statsObj instanceof Long) {
         if (predObj instanceof Double) {
@@ -2428,10 +2433,6 @@ class RecordReaderImpl implements Record
           return Double.valueOf(predObj.toString());
         }
       } else if (statsObj instanceof String) {
-        // Ex: where d = date '1970-02-01' will be ExprNodeConstantDesc
-        if (predObj instanceof ExprNodeConstantDesc) {
-          return ((ExprNodeConstantDesc) predObj).getValue().toString();
-        }
         return predObj.toString();
       } else if (statsObj instanceof HiveDecimal) {
         if (predObj instanceof Long) {
@@ -2440,6 +2441,8 @@ class RecordReaderImpl implements Record
           return HiveDecimal.create(predObj.toString());
         } else if (predObj instanceof String) {
           return HiveDecimal.create(predObj.toString());
+        } else if (predObj instanceof BigDecimal) {
+          return HiveDecimal.create((BigDecimal)predObj);
         }
       }
     }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerWriterV2.java Sun Aug 24 03:43:48 2014
@@ -142,9 +142,9 @@ class RunLengthIntegerWriterV2 implement
   private final boolean signed;
   private EncodingType encoding;
   private int numLiterals;
-  private long[] zigzagLiterals;
-  private long[] baseRedLiterals;
-  private long[] adjDeltas;
+  private final long[] zigzagLiterals = new long[MAX_SCOPE];
+  private final long[] baseRedLiterals = new long[MAX_SCOPE];
+  private final long[] adjDeltas = new long[MAX_SCOPE];
   private long fixedDelta;
   private int zzBits90p;
   private int zzBits100p;
@@ -252,8 +252,11 @@ class RunLengthIntegerWriterV2 implement
       // store the first value as delta value using zigzag encoding
       utils.writeVslong(output, adjDeltas[0]);
 
-      // adjacent delta values are bit packed
-      utils.writeInts(adjDeltas, 1, adjDeltas.length - 1, fb, output);
+      // adjacent delta values are bit packed. The length of adjDeltas array is
+      // always one less than the number of literals (delta difference for n
+      // elements is n-1). We have already written one element, write the
+      // remaining numLiterals - 2 elements here
+      utils.writeInts(adjDeltas, 1, numLiterals - 2, fb, output);
     }
   }
 
@@ -323,7 +326,7 @@ class RunLengthIntegerWriterV2 implement
     // base reduced literals are bit packed
     int closestFixedBits = utils.getClosestFixedBits(fb);
 
-    utils.writeInts(baseRedLiterals, 0, baseRedLiterals.length, closestFixedBits,
+    utils.writeInts(baseRedLiterals, 0, numLiterals, closestFixedBits,
         output);
 
     // write patch list
@@ -372,7 +375,7 @@ class RunLengthIntegerWriterV2 implement
     output.write(headerSecondByte);
 
     // bit packing the zigzag encoded literals
-    utils.writeInts(zigzagLiterals, 0, zigzagLiterals.length, fb, output);
+    utils.writeInts(zigzagLiterals, 0, numLiterals, fb, output);
 
     // reset run length
     variableRunLength = 0;
@@ -414,14 +417,6 @@ class RunLengthIntegerWriterV2 implement
   }
 
   private void determineEncoding() {
-    // used for direct encoding
-    zigzagLiterals = new long[numLiterals];
-
-    // used for patched base encoding
-    baseRedLiterals = new long[numLiterals];
-
-    // used for delta encoding
-    adjDeltas = new long[numLiterals - 1];
 
     int idx = 0;
 
@@ -530,10 +525,10 @@ class RunLengthIntegerWriterV2 implement
     // is not significant then we can use direct or delta encoding
 
     double p = 0.9;
-    zzBits90p = utils.percentileBits(zigzagLiterals, p);
+    zzBits90p = utils.percentileBits(zigzagLiterals, 0, numLiterals, p);
 
     p = 1.0;
-    zzBits100p = utils.percentileBits(zigzagLiterals, p);
+    zzBits100p = utils.percentileBits(zigzagLiterals, 0, numLiterals, p);
 
     int diffBitsLH = zzBits100p - zzBits90p;
 
@@ -543,18 +538,18 @@ class RunLengthIntegerWriterV2 implement
         && isFixedDelta == false) {
       // patching is done only on base reduced values.
       // remove base from literals
-      for(int i = 0; i < zigzagLiterals.length; i++) {
+      for(int i = 0; i < numLiterals; i++) {
         baseRedLiterals[i] = literals[i] - min;
       }
 
       // 95th percentile width is used to determine max allowed value
       // after which patching will be done
       p = 0.95;
-      brBits95p = utils.percentileBits(baseRedLiterals, p);
+      brBits95p = utils.percentileBits(baseRedLiterals, 0, numLiterals, p);
 
       // 100th percentile is used to compute the max patch width
       p = 1.0;
-      brBits100p = utils.percentileBits(baseRedLiterals, p);
+      brBits100p = utils.percentileBits(baseRedLiterals, 0, numLiterals, p);
 
       // after base reducing the values, if the difference in bits between
       // 95th percentile and 100th percentile value is zero then there
@@ -592,7 +587,7 @@ class RunLengthIntegerWriterV2 implement
 
     // since we are considering only 95 percentile, the size of gap and
     // patch array can contain only be 5% values
-    patchLength = (int) Math.ceil((baseRedLiterals.length * 0.05));
+    patchLength = (int) Math.ceil((numLiterals * 0.05));
 
     int[] gapList = new int[patchLength];
     long[] patchList = new long[patchLength];
@@ -616,7 +611,7 @@ class RunLengthIntegerWriterV2 implement
     int gap = 0;
     int maxGap = 0;
 
-    for(int i = 0; i < baseRedLiterals.length; i++) {
+    for(int i = 0; i < numLiterals; i++) {
       // if value is above mask then create the patch and record the gap
       if (baseRedLiterals[i] > mask) {
         gap = i - prev;
@@ -694,9 +689,6 @@ class RunLengthIntegerWriterV2 implement
     numLiterals = 0;
     encoding = null;
     prevDelta = 0;
-    zigzagLiterals = null;
-    baseRedLiterals = null;
-    adjDeltas = null;
     fixedDelta = 0;
     zzBits90p = 0;
     zzBits100p = 0;

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/SerializationUtils.java Sun Aug 24 03:43:48 2014
@@ -244,7 +244,7 @@ final class SerializationUtils {
    * @param p - percentile value (>=0.0 to <=1.0)
    * @return pth percentile bits
    */
-  int percentileBits(long[] data, double p) {
+  int percentileBits(long[] data, int offset, int length, double p) {
     if ((p > 1.0) || (p <= 0.0)) {
       return -1;
     }
@@ -254,13 +254,12 @@ final class SerializationUtils {
     int[] hist = new int[32];
 
     // compute the histogram
-    for(long l : data) {
-      int idx = encodeBitWidth(findClosestNumBits(l));
+    for(int i = offset; i < (offset + length); i++) {
+      int idx = encodeBitWidth(findClosestNumBits(data[i]));
       hist[idx] += 1;
     }
 
-    int len = data.length;
-    int perLen = (int) (len * (1.0 - p));
+    int perLen = (int) (length * (1.0 - p));
 
     // return the bits required by pth percentile length
     for(int i = hist.length - 1; i >= 0; i--) {

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ProjectionPusher.java Sun Aug 24 03:43:48 2014
@@ -146,7 +146,7 @@ public class ProjectionPusher {
     if ((part != null) && (part.getTableDesc() != null)) {
       Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf);
     }
-    pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().toString());
+    pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().getPath());
     return cloneJobConf;
   }
 }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java Sun Aug 24 03:43:48 2014
@@ -13,6 +13,9 @@
  */
 package org.apache.hadoop.hive.ql.io.parquet.convert;
 
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.Writable;
 
@@ -30,7 +33,7 @@ public class ArrayWritableGroupConverter
   private Writable[] mapPairContainer;
 
   public ArrayWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent,
-      final int index) {
+      final int index, List<TypeInfo> hiveSchemaTypeInfos) {
     this.parent = parent;
     this.index = index;
     int count = groupType.getFieldCount();
@@ -40,7 +43,8 @@ public class ArrayWritableGroupConverter
     isMap = count == 2;
     converters = new Converter[count];
     for (int i = 0; i < count; i++) {
-      converters[i] = getConverterFromDescription(groupType.getType(i), i, this);
+      converters[i] = getConverterFromDescription(groupType.getType(i), i, this,
+          hiveSchemaTypeInfos);
     }
   }
 

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java Sun Aug 24 03:43:48 2014
@@ -16,6 +16,7 @@ package org.apache.hadoop.hive.ql.io.par
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.Writable;
 
@@ -36,19 +37,21 @@ public class DataWritableGroupConverter 
   private final Object[] currentArr;
   private Writable[] rootMap;
 
-  public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema) {
-    this(requestedSchema, null, 0, tableSchema);
+  public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema,
+      final List<TypeInfo> hiveSchemaTypeInfos) {
+    this(requestedSchema, null, 0, tableSchema, hiveSchemaTypeInfos);
     final int fieldCount = tableSchema.getFieldCount();
     this.rootMap = new Writable[fieldCount];
   }
 
   public DataWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent,
-      final int index) {
-    this(groupType, parent, index, groupType);
+      final int index, final List<TypeInfo> hiveSchemaTypeInfos) {
+    this(groupType, parent, index, groupType, hiveSchemaTypeInfos);
   }
 
   public DataWritableGroupConverter(final GroupType selectedGroupType,
-      final HiveGroupConverter parent, final int index, final GroupType containingGroupType) {
+      final HiveGroupConverter parent, final int index, final GroupType containingGroupType,
+      final List<TypeInfo> hiveSchemaTypeInfos) {
     this.parent = parent;
     this.index = index;
     final int totalFieldCount = containingGroupType.getFieldCount();
@@ -62,7 +65,8 @@ public class DataWritableGroupConverter 
       Type subtype = selectedFields.get(i);
       if (containingGroupType.getFields().contains(subtype)) {
         converters[i] = getConverterFromDescription(subtype,
-            containingGroupType.getFieldIndex(subtype.getName()), this);
+            containingGroupType.getFieldIndex(subtype.getName()), this,
+            hiveSchemaTypeInfos);
       } else {
         throw new IllegalStateException("Group type [" + containingGroupType +
             "] does not contain requested field: " + subtype);

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java Sun Aug 24 03:43:48 2014
@@ -13,6 +13,9 @@
  */
 package org.apache.hadoop.hive.ql.io.parquet.convert;
 
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.ArrayWritable;
 
 import parquet.io.api.GroupConverter;
@@ -28,8 +31,10 @@ public class DataWritableRecordConverter
 
   private final DataWritableGroupConverter root;
 
-  public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema) {
-    this.root = new DataWritableGroupConverter(requestedSchema, tableSchema);
+  public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema,
+      final List<TypeInfo> hiveColumnTypeInfos) {
+    this.root = new DataWritableGroupConverter(requestedSchema, tableSchema,
+        hiveColumnTypeInfos);
   }
 
   @Override
@@ -41,4 +46,4 @@ public class DataWritableRecordConverter
   public GroupConverter getRootConverter() {
     return root;
   }
-}
+}
\ No newline at end of file

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java Sun Aug 24 03:43:48 2014
@@ -16,12 +16,19 @@ package org.apache.hadoop.hive.ql.io.par
 import java.math.BigDecimal;
 import java.sql.Timestamp;
 import java.util.ArrayList;
+import java.util.List;
 
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
 import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
 import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
+import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.FloatWritable;
@@ -145,6 +152,32 @@ public enum ETypeConverter {
         }
       };
     }
+  },
+  ECHAR_CONVERTER(HiveCharWritable.class) {
+    @Override
+    Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
+      return new BinaryConverter<HiveCharWritable>(type, parent, index) {
+        @Override
+        protected HiveCharWritable convert(Binary binary) {
+          HiveChar hiveChar = new HiveChar();
+          hiveChar.setValue(binary.toStringUsingUTF8());
+          return new HiveCharWritable(hiveChar);
+        }
+      };
+    }
+  },
+  EVARCHAR_CONVERTER(HiveVarcharWritable.class) {
+    @Override
+    Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
+      return new BinaryConverter<HiveVarcharWritable>(type, parent, index) {
+        @Override
+        protected HiveVarcharWritable convert(Binary binary) {
+          HiveVarchar hiveVarchar = new HiveVarchar();
+          hiveVarchar.setValue(binary.toStringUsingUTF8());
+          return new HiveVarcharWritable(hiveVarchar);
+        }
+      };
+    }
   };
 
   final Class<?> _type;
@@ -159,7 +192,8 @@ public enum ETypeConverter {
 
   abstract Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent);
 
-  public static Converter getNewConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
+  public static Converter getNewConverter(final PrimitiveType type, final int index,
+      final HiveGroupConverter parent, List<TypeInfo> hiveSchemaTypeInfos) {
     if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) {
       //TODO- cleanup once parquet support Timestamp type annotation.
       return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent);
@@ -167,7 +201,15 @@ public enum ETypeConverter {
     if (OriginalType.DECIMAL == type.getOriginalType()) {
       return EDECIMAL_CONVERTER.getConverter(type, index, parent);
     } else if (OriginalType.UTF8 == type.getOriginalType()) {
-      return ESTRING_CONVERTER.getConverter(type, index, parent);
+      if (hiveSchemaTypeInfos.get(index).getTypeName()
+          .startsWith(serdeConstants.CHAR_TYPE_NAME)) {
+        return ECHAR_CONVERTER.getConverter(type, index, parent);
+      } else if (hiveSchemaTypeInfos.get(index).getTypeName()
+          .startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
+        return EVARCHAR_CONVERTER.getConverter(type, index, parent);
+      } else if (type.isPrimitive()) {
+        return ESTRING_CONVERTER.getConverter(type, index, parent);
+      }
     }
 
     Class<?> javaType = type.getPrimitiveTypeName().javaType;

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java Sun Aug 24 03:43:48 2014
@@ -13,6 +13,9 @@
  */
 package org.apache.hadoop.hive.ql.io.parquet.convert;
 
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.Writable;
 
 import parquet.io.api.Converter;
@@ -23,17 +26,20 @@ import parquet.schema.Type.Repetition;
 public abstract class HiveGroupConverter extends GroupConverter {
 
   protected static Converter getConverterFromDescription(final Type type, final int index,
-      final HiveGroupConverter parent) {
+      final HiveGroupConverter parent, List<TypeInfo> hiveSchemaTypeInfos) {
     if (type == null) {
       return null;
     }
     if (type.isPrimitive()) {
-      return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent);
+      return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent,
+          hiveSchemaTypeInfos);
     } else {
       if (type.asGroupType().getRepetition() == Repetition.REPEATED) {
-        return new ArrayWritableGroupConverter(type.asGroupType(), parent, index);
+        return new ArrayWritableGroupConverter(type.asGroupType(), parent, index,
+            hiveSchemaTypeInfos);
       } else {
-        return new DataWritableGroupConverter(type.asGroupType(), parent, index);
+        return new DataWritableGroupConverter(type.asGroupType(), parent, index,
+            hiveSchemaTypeInfos);
       }
     }
   }
@@ -42,4 +48,4 @@ public abstract class HiveGroupConverter
 
   protected abstract void add(int index, Writable value);
 
-}
+}
\ No newline at end of file

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java Sun Aug 24 03:43:48 2014
@@ -16,6 +16,7 @@ package org.apache.hadoop.hive.ql.io.par
 import java.util.List;
 
 import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
+import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
@@ -25,7 +26,6 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 
 import parquet.schema.ConversionPatterns;
-import parquet.schema.DecimalMetadata;
 import parquet.schema.GroupType;
 import parquet.schema.MessageType;
 import parquet.schema.OriginalType;
@@ -81,6 +81,14 @@ public class HiveSchemaConverter {
         return new PrimitiveType(repetition, PrimitiveTypeName.INT96, name);
       } else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
         throw new UnsupportedOperationException("Void type not implemented");
+      } else if (typeInfo.getTypeName().toLowerCase().startsWith(
+          serdeConstants.CHAR_TYPE_NAME)) {
+        return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8)
+            .named(name);
+      } else if (typeInfo.getTypeName().toLowerCase().startsWith(
+          serdeConstants.VARCHAR_TYPE_NAME)) {
+        return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8)
+            .named(name);
       } else if (typeInfo instanceof DecimalTypeInfo) {
         DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
         int prec = decimalTypeInfo.precision();

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java Sun Aug 24 03:43:48 2014
@@ -14,6 +14,7 @@
 package org.apache.hadoop.hive.ql.io.parquet.read;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -23,6 +24,8 @@ import org.apache.hadoop.hive.ql.io.IOCo
 import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.util.StringUtils;
 
@@ -60,6 +63,28 @@ public class DataWritableReadSupport ext
     return (List<String>) VirtualColumn.
         removeVirtualColumns(StringUtils.getStringCollection(columns));
   }
+
+  private static List<TypeInfo> getColumnTypes(Configuration configuration) {
+
+    List<String> columnNames;
+    String columnNamesProperty = configuration.get(IOConstants.COLUMNS);
+    if (columnNamesProperty.length() == 0) {
+      columnNames = new ArrayList<String>();
+    } else {
+      columnNames = Arrays.asList(columnNamesProperty.split(","));
+    }
+    List<TypeInfo> columnTypes;
+    String columnTypesProperty = configuration.get(IOConstants.COLUMNS_TYPES);
+    if (columnTypesProperty.length() == 0) {
+      columnTypes = new ArrayList<TypeInfo>();
+    } else {
+      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesProperty);
+    }
+
+    columnTypes = VirtualColumn.removeVirtualColumnTypes(columnNames, columnTypes);
+    return columnTypes;
+  }
+
   /**
    *
    * It creates the readContext for Parquet side with the requested schema during the init phase.
@@ -100,20 +125,22 @@ public class DataWritableReadSupport ext
       final List<Type> typeListWanted = new ArrayList<Type>();
       final boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false);
       for (final Integer idx : indexColumnsWanted) {
-        String col = listColumns.get(idx);
-        if (indexAccess) {
-          typeListWanted.add(tableSchema.getType(col));
-        } else {
-          col = col.toLowerCase();
-          if (lowerCaseFileSchemaColumns.containsKey(col)) {
-            typeListWanted.add(tableSchema.getType(lowerCaseFileSchemaColumns.get(col)));
+        if (idx < listColumns.size()) {
+          String col = listColumns.get(idx);
+          if (indexAccess) {
+            typeListWanted.add(tableSchema.getType(col));
           } else {
-            // should never occur?
-            String msg = "Column " + col + " at index " + idx + " does not exist in " +
+            col = col.toLowerCase();
+            if (lowerCaseFileSchemaColumns.containsKey(col)) {
+              typeListWanted.add(tableSchema.getType(lowerCaseFileSchemaColumns.get(col)));
+            } else {
+              // should never occur?
+              String msg = "Column " + col + " at index " + idx + " does not exist in " +
               lowerCaseFileSchemaColumns;
-            throw new IllegalStateException(msg);
+              throw new IllegalStateException(msg);
+            }
           }
-        }
+	}
       }
       requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(),
               typeListWanted), fileSchema, configuration);
@@ -146,7 +173,8 @@ public class DataWritableReadSupport ext
     }
     final MessageType tableSchema = resolveSchemaAccess(MessageTypeParser.
         parseMessageType(metadata.get(HIVE_SCHEMA_KEY)), fileSchema, configuration);
-    return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema);
+    return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema,
+        getColumnTypes(configuration));
   }
 
   /**

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java Sun Aug 24 03:43:48 2014
@@ -25,12 +25,14 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 import org.apache.hadoop.io.ArrayWritable;
 
 /**
@@ -102,12 +104,10 @@ public class ArrayWritableObjectInspecto
       return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
     } else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
       throw new UnsupportedOperationException("Parquet does not support date. See HIVE-6384");
-    } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
-      throw new UnsupportedOperationException("Parquet does not support decimal. See HIVE-6384");
     } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.CHAR_TYPE_NAME)) {
-      throw new UnsupportedOperationException("Parquet does not support char. See HIVE-6384");
+      return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((CharTypeInfo) typeInfo);
     } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
-      throw new UnsupportedOperationException("Parquet does not support varchar. See HIVE-6384");
+      return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((VarcharTypeInfo) typeInfo);
     } else {
       throw new UnsupportedOperationException("Unknown field type: " + typeInfo);
     }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java Sun Aug 24 03:43:48 2014
@@ -42,6 +42,8 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
@@ -60,6 +62,7 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
+import parquet.io.api.Binary;
 
 /**
  *
@@ -280,6 +283,12 @@ public class ParquetHiveSerDe extends Ab
       return new BytesWritable(tgt);
     case TIMESTAMP:
       return new TimestampWritable(((TimestampObjectInspector) inspector).getPrimitiveJavaObject(obj));
+    case CHAR:
+      String strippedValue = ((HiveCharObjectInspector) inspector).getPrimitiveJavaObject(obj).getStrippedValue();
+      return new BytesWritable(Binary.fromString(strippedValue).getBytes());
+    case VARCHAR:
+      String value = ((HiveVarcharObjectInspector) inspector).getPrimitiveJavaObject(obj).getValue();
+        return new BytesWritable(Binary.fromString(value).getBytes());
     default:
       throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory());
     }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java Sun Aug 24 03:43:48 2014
@@ -18,14 +18,6 @@
 
 package org.apache.hadoop.hive.ql.io.sarg;
 
-import java.util.ArrayDeque;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Deque;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
 import com.esotericsoftware.kryo.Kryo;
 import com.esotericsoftware.kryo.io.Input;
 import com.esotericsoftware.kryo.io.Output;
@@ -57,6 +49,15 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
+import java.math.BigDecimal;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
 /**
  * The implementation of SearchArguments.
  */
@@ -947,7 +948,8 @@ final class SearchArgumentImpl implement
           literal instanceof Long ||
           literal instanceof Double ||
           literal instanceof DateWritable ||
-          literal instanceof HiveDecimal) {
+          literal instanceof HiveDecimal ||
+          literal instanceof BigDecimal) {
         return literal;
       } else if (literal instanceof HiveChar ||
           literal instanceof HiveVarchar) {
@@ -979,7 +981,8 @@ final class SearchArgumentImpl implement
         return PredicateLeaf.Type.FLOAT;
       } else if (literal instanceof DateWritable) {
         return PredicateLeaf.Type.DATE;
-      } else if (literal instanceof HiveDecimal) {
+      } else if (literal instanceof HiveDecimal ||
+          literal instanceof BigDecimal) {
         return PredicateLeaf.Type.DECIMAL;
       }
       throw new IllegalArgumentException("Unknown type for literal " + literal);

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java Sun Aug 24 03:43:48 2014
@@ -165,13 +165,13 @@ public class DbTxnManager extends HiveTx
           break;
 
         case TABLE:
+        case DUMMYPARTITION:   // in case of dynamic partitioning lock the table
           t = output.getTable();
           compBuilder.setDbName(t.getDbName());
           compBuilder.setTableName(t.getTableName());
           break;
 
         case PARTITION:
-        case DUMMYPARTITION:
           compBuilder.setPartitionName(output.getPartition().getName());
           t = output.getPartition().getTable();
           compBuilder.setDbName(t.getDbName());
@@ -301,7 +301,10 @@ public class DbTxnManager extends HiveTx
     try {
       if (txnId > 0) rollbackTxn();
       if (lockMgr != null) lockMgr.close();
+      if (client != null) client.close();
     } catch (Exception e) {
+      LOG.error("Caught exception " + e.getClass().getName() + " with message <" + e.getMessage()
+      + ">, swallowing as there is nothing we can do with it.");
       // Not much we can do about it here.
     }
   }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Sun Aug 24 03:43:48 2014
@@ -89,6 +89,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.Role;
 import org.apache.hadoop.hive.metastore.api.RolePrincipalGrant;
 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
 import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
 import org.apache.hadoop.hive.metastore.api.SkewedInfo;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
@@ -2553,6 +2554,15 @@ private void constructOneLBLocationMap(F
       throw new HiveException(e);
     }
   }
+  
+  public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws HiveException {
+    try {
+      return getMSC().setPartitionColumnStatistics(request);
+    } catch (Exception e) {
+      LOG.debug(StringUtils.stringifyException(e));
+      throw new HiveException(e);
+    }
+  }
 
   public List<ColumnStatisticsObj> getTableColumnStatistics(
       String dbName, String tableName, List<String> colNames) throws HiveException {

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java Sun Aug 24 03:43:48 2014
@@ -233,6 +233,10 @@ public class Partition implements Serial
     return ret;
   }
 
+  public Path getPartitionPath() {
+    return getDataLocation();
+  }
+
   public Path getDataLocation() {
     if (table.isPartitioned()) {
       return new Path(tPartition.getSd().getLocation());

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java Sun Aug 24 03:43:48 2014
@@ -249,6 +249,8 @@ public class SessionHiveMetaStoreClient 
               + " is not a directory or unable to create one");
         }
       }
+      // Make sure location string is in proper format
+      tbl.getSd().setLocation(tblPath.toString());
     }
 
     // Add temp table info to current session

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java Sun Aug 24 03:43:48 2014
@@ -22,25 +22,36 @@ import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
+import java.util.ListIterator;
 
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Iterables;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.classification.InterfaceAudience;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.io.RecordIdentifier;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 
+@InterfaceAudience.Private
 public class VirtualColumn implements Serializable {
 
   private static final long serialVersionUID = 1L;
 
-  public static VirtualColumn FILENAME = new VirtualColumn("INPUT__FILE__NAME", (PrimitiveTypeInfo)TypeInfoFactory.stringTypeInfo);
-  public static VirtualColumn BLOCKOFFSET = new VirtualColumn("BLOCK__OFFSET__INSIDE__FILE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo);
-  public static VirtualColumn ROWOFFSET = new VirtualColumn("ROW__OFFSET__INSIDE__BLOCK", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo);
+  public static final VirtualColumn FILENAME = new VirtualColumn("INPUT__FILE__NAME", (PrimitiveTypeInfo)TypeInfoFactory.stringTypeInfo);
+  public static final VirtualColumn BLOCKOFFSET = new VirtualColumn("BLOCK__OFFSET__INSIDE__FILE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo);
+  public static final VirtualColumn ROWOFFSET = new VirtualColumn("ROW__OFFSET__INSIDE__BLOCK", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo);
 
-  public static VirtualColumn RAWDATASIZE = new VirtualColumn("RAW__DATA__SIZE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo);
+  public static final VirtualColumn RAWDATASIZE = new VirtualColumn("RAW__DATA__SIZE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo);
+  /**
+   * {@link org.apache.hadoop.hive.ql.io.RecordIdentifier} 
+   */
+  public static final VirtualColumn ROWID = new VirtualColumn("ROW__ID", RecordIdentifier.StructInfo.typeInfo, true, RecordIdentifier.StructInfo.oi);
 
   /**
    * GROUPINGID is used with GROUP BY GROUPINGS SETS, ROLLUP and CUBE.
@@ -49,27 +60,28 @@ public class VirtualColumn implements Se
    * set if that column has been aggregated in that row. Otherwise the
    * value is "0".  Returns the decimal representation of the bit vector.
    */
-  public static VirtualColumn GROUPINGID =
+  public static final VirtualColumn GROUPINGID =
       new VirtualColumn("GROUPING__ID", (PrimitiveTypeInfo) TypeInfoFactory.intTypeInfo);
 
-  public static VirtualColumn[] VIRTUAL_COLUMNS =
-      new VirtualColumn[] {FILENAME, BLOCKOFFSET, ROWOFFSET, RAWDATASIZE, GROUPINGID};
-
-  private String name;
-  private PrimitiveTypeInfo typeInfo;
-  private boolean isHidden = true;
+  public static ImmutableSet<String> VIRTUAL_COLUMN_NAMES =
+      ImmutableSet.of(FILENAME.getName(), BLOCKOFFSET.getName(), ROWOFFSET.getName(),
+          RAWDATASIZE.getName(), GROUPINGID.getName(), ROWID.getName());
 
-  public VirtualColumn() {
-  }
+  private final String name;
+  private final TypeInfo typeInfo;
+  private final boolean isHidden;
+  private final ObjectInspector oi;
 
-  public VirtualColumn(String name, PrimitiveTypeInfo typeInfo) {
-    this(name, typeInfo, true);
+  private VirtualColumn(String name, PrimitiveTypeInfo typeInfo) {
+    this(name, typeInfo, true, 
+      PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo));
   }
 
-  VirtualColumn(String name, PrimitiveTypeInfo typeInfo, boolean isHidden) {
+  private VirtualColumn(String name, TypeInfo typeInfo, boolean isHidden, ObjectInspector oi) {
     this.name = name;
     this.typeInfo = typeInfo;
     this.isHidden = isHidden;
+    this.oi = oi;
   }
 
   public static List<VirtualColumn> getStatsRegistry(Configuration conf) {
@@ -87,26 +99,19 @@ public class VirtualColumn implements Se
     if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEROWOFFSET)) {
       l.add(ROWOFFSET);
     }
+    l.add(ROWID);
 
     return l;
   }
 
-  public PrimitiveTypeInfo getTypeInfo() {
+  public TypeInfo getTypeInfo() {
     return typeInfo;
   }
 
-  public void setTypeInfo(PrimitiveTypeInfo typeInfo) {
-    this.typeInfo = typeInfo;
-  }
-
   public String getName() {
     return this.name;
   }
 
-  public void setName(String name) {
-    this.name = name;
-  }
-
   public boolean isHidden() {
     return isHidden;
   }
@@ -115,37 +120,58 @@ public class VirtualColumn implements Se
     return isHidden;
   }
 
-  public void setIsHidden(boolean isHidden) {
-    this.isHidden = isHidden;
+  public ObjectInspector getObjectInspector() {
+    return oi;
   }
 
   @Override
   public boolean equals(Object o) {
-    if (o == null) {
-      return false;
-    }
     if (this == o) {
       return true;
     }
+    if(!(o instanceof VirtualColumn)) {
+      return false;
+    }
     VirtualColumn c = (VirtualColumn) o;
     return this.name.equals(c.name)
         && this.typeInfo.getTypeName().equals(c.getTypeInfo().getTypeName());
   }
-
+  @Override
+  public int hashCode() {
+    int c = 19;
+    c = 31 * name.hashCode() + c;
+    return  31 * typeInfo.getTypeName().hashCode() + c;
+  }
   public static Collection<String> removeVirtualColumns(final Collection<String> columns) {
-    for(VirtualColumn vcol : VIRTUAL_COLUMNS) {
-      columns.remove(vcol.getName());
-    }
+    Iterables.removeAll(columns, VIRTUAL_COLUMN_NAMES);
     return columns;
   }
 
+  public static List<TypeInfo> removeVirtualColumnTypes(final List<String> columnNames,
+      final List<TypeInfo> columnTypes) {
+    if (columnNames.size() != columnTypes.size()) {
+      throw new IllegalArgumentException("Number of column names in configuration " +
+          columnNames.size() + " differs from column types " + columnTypes.size());
+    }
+
+    int i = 0;
+    ListIterator<TypeInfo> it = columnTypes.listIterator();
+    while(it.hasNext()) {
+      it.next();
+      if (VIRTUAL_COLUMN_NAMES.contains(columnNames.get(i))) {
+        it.remove();
+      }
+      ++i;
+    }
+    return columnTypes;
+  }
+
   public static StructObjectInspector getVCSObjectInspector(List<VirtualColumn> vcs) {
     List<String> names = new ArrayList<String>(vcs.size());
     List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(vcs.size());
     for (VirtualColumn vc : vcs) {
       names.add(vc.getName());
-      inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
-          vc.getTypeInfo()));
+      inspectors.add(vc.oi);
     }
     return ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors);
   }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Sun Aug 24 03:43:48 2014
@@ -927,11 +927,9 @@ public class Vectorizer implements Physi
     if (desc instanceof ExprNodeColumnDesc) {
       ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc;
       // Currently, we do not support vectorized virtual columns (see HIVE-5570).
-      for (VirtualColumn vc : VirtualColumn.VIRTUAL_COLUMNS) {
-        if (c.getColumn().equals(vc.getName())) {
-            LOG.info("Cannot vectorize virtual column " + c.getColumn());
-            return false;
-        }
+      if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) {
+        LOG.info("Cannot vectorize virtual column " + c.getColumn());
+        return false;
       }
     }
     String typeName = desc.getTypeInfo().getTypeName();
@@ -1076,10 +1074,8 @@ public class Vectorizer implements Physi
 
     // Not using method column.getIsVirtualCol() because partitioning columns are also
     // treated as virtual columns in ColumnInfo.
-    for (VirtualColumn vc : VirtualColumn.VIRTUAL_COLUMNS) {
-      if (column.getInternalName().equals(vc.getName())) {
+    if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(column.getInternalName())) {
         return true;
-      }
     }
     return false;
   }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java Sun Aug 24 03:43:48 2014
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.optimi
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -67,8 +68,10 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.serde.serdeConstants;
 
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.Stack;
 
 public class StatsRulesProcFactory {
@@ -803,12 +806,13 @@ public class StatsRulesProcFactory {
           // statistics object that is combination of statistics from all
           // relations involved in JOIN
           Statistics stats = new Statistics();
-          List<Long> rowCountParents = Lists.newArrayList();
+          Map<String, Long> rowCountParents = new HashMap<String, Long>();
           List<Long> distinctVals = Lists.newArrayList();
 
           // 2 relations, multiple attributes
           boolean multiAttr = false;
           int numAttr = 1;
+          int numParent = parents.size();
 
           Map<String, ColStatistics> joinedColStats = Maps.newHashMap();
           Map<Integer, List<String>> joinKeys = Maps.newHashMap();
@@ -818,9 +822,20 @@ public class StatsRulesProcFactory {
             ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos);
 
             Statistics parentStats = parent.getStatistics();
-            rowCountParents.add(parentStats.getNumRows());
             List<ExprNodeDesc> keyExprs = parent.getConf().getKeyCols();
 
+            // Parent RS may have column statistics from multiple parents.
+            // Populate table alias to row count map, this will be used later to
+            // scale down/up column statistics based on new row count
+            // NOTE: JOIN with UNION as parent of RS will not have table alias
+            // propagated properly. UNION operator does not propagate the table
+            // alias of subqueries properly to expression nodes. Hence union20.q
+            // will have wrong number of rows.
+            Set<String> tableAliases = StatsUtils.getAllTableAlias(parent.getColumnExprMap());
+            for (String tabAlias : tableAliases) {
+              rowCountParents.put(tabAlias, parentStats.getNumRows());
+            }
+
             // multi-attribute join key
             if (keyExprs.size() > 1) {
               multiAttr = true;
@@ -860,12 +875,19 @@ public class StatsRulesProcFactory {
                   perAttrDVs.add(cs.getCountDistint());
                 }
               }
+
               distinctVals.add(getDenominator(perAttrDVs));
               perAttrDVs.clear();
             }
 
-            for (Long l : distinctVals) {
-              denom *= l;
+            if (numAttr > numParent) {
+              // To avoid denominator getting larger and aggressively reducing
+              // number of rows, we will ease out denominator.
+              denom = getEasedOutDenominator(distinctVals);
+            } else {
+              for (Long l : distinctVals) {
+                denom *= l;
+              }
             }
           } else {
             for (List<String> jkeys : joinKeys.values()) {
@@ -890,6 +912,7 @@ public class StatsRulesProcFactory {
           Map<String, ExprNodeDesc> colExprMap = jop.getColumnExprMap();
           RowSchema rs = jop.getSchema();
           List<ColStatistics> outColStats = Lists.newArrayList();
+          Map<String, String> outInTabAlias = new HashMap<String, String>();
           for (ColumnInfo ci : rs.getSignature()) {
             String key = ci.getInternalName();
             ExprNodeDesc end = colExprMap.get(key);
@@ -901,6 +924,7 @@ public class StatsRulesProcFactory {
               ColStatistics cs = joinedColStats.get(fqColName);
               String outColName = key;
               String outTabAlias = ci.getTabAlias();
+              outInTabAlias.put(outTabAlias, tabAlias);
               if (cs != null) {
                 cs.setColumnName(outColName);
                 cs.setTableAlias(outTabAlias);
@@ -911,7 +935,8 @@ public class StatsRulesProcFactory {
 
           // update join statistics
           stats.setColumnStats(outColStats);
-          long newRowCount = computeNewRowCount(rowCountParents, denom);
+          long newRowCount = computeNewRowCount(
+              Lists.newArrayList(rowCountParents.values()), denom);
 
           if (newRowCount <= 0 && LOG.isDebugEnabled()) {
             newRowCount = 0;
@@ -920,7 +945,8 @@ public class StatsRulesProcFactory {
                 + " #Rows of parents: " + rowCountParents.toString() + ". Denominator: " + denom);
           }
 
-          updateStatsForJoinType(stats, newRowCount, true, jop.getConf());
+          updateStatsForJoinType(stats, newRowCount, jop.getConf(),
+              rowCountParents, outInTabAlias);
           jop.setStatistics(stats);
 
           if (LOG.isDebugEnabled()) {
@@ -966,37 +992,54 @@ public class StatsRulesProcFactory {
       return null;
     }
 
+    private Long getEasedOutDenominator(List<Long> distinctVals) {
+      // Exponential back-off for NDVs.
+      // 1) Descending order sort of NDVs
+      // 2) denominator = NDV1 * (NDV2 ^ (1/2)) * (NDV3 ^ (1/4))) * ....
+      Collections.sort(distinctVals, Collections.reverseOrder());
+
+      long denom = distinctVals.get(0);
+      for (int i = 1; i < distinctVals.size(); i++) {
+        denom = (long) (denom * Math.pow(distinctVals.get(i), 1.0 / (1 << i)));
+      }
+
+      return denom;
+    }
+
     private void updateStatsForJoinType(Statistics stats, long newNumRows,
-        boolean useColStats, JoinDesc conf) {
-      long oldRowCount = stats.getNumRows();
-      double ratio = (double) newNumRows / (double) oldRowCount;
+        JoinDesc conf, Map<String, Long> rowCountParents,
+        Map<String, String> outInTabAlias) {
       stats.setNumRows(newNumRows);
 
-      if (useColStats) {
-        List<ColStatistics> colStats = stats.getColumnStats();
-        for (ColStatistics cs : colStats) {
-          long oldDV = cs.getCountDistint();
-          long newDV = oldDV;
-
-          // if ratio is greater than 1, then number of rows increases. This can happen
-          // when some operators like GROUPBY duplicates the input rows in which case
-          // number of distincts should not change. Update the distinct count only when
-          // the output number of rows is less than input number of rows.
-          if (ratio <= 1.0) {
-            newDV = (long) Math.ceil(ratio * oldDV);
-          }
-          // Assumes inner join
-          // TODO: HIVE-5579 will handle different join types
-          cs.setNumNulls(0);
-          cs.setCountDistint(newDV);
-        }
-        stats.setColumnStats(colStats);
-        long newDataSize = StatsUtils.getDataSizeFromColumnStats(newNumRows, colStats);
-        stats.setDataSize(newDataSize);
-      } else {
-        long newDataSize = (long) (ratio * stats.getDataSize());
-        stats.setDataSize(newDataSize);
+      // scale down/up the column statistics based on the changes in number of
+      // rows from each parent. For ex: If there are 2 parents for JOIN operator
+      // with 1st parent having 200 rows and 2nd parent having 2000 rows. Now if
+      // the new number of rows after applying join rule is 10, then the column
+      // stats for columns from 1st parent should be scaled down by 200/10 = 20x
+      // and stats for columns from 2nd parent should be scaled down by 200x
+      List<ColStatistics> colStats = stats.getColumnStats();
+      for (ColStatistics cs : colStats) {
+        long oldRowCount = rowCountParents.get(outInTabAlias.get(cs.getTableAlias()));
+        double ratio = (double) newNumRows / (double) oldRowCount;
+        long oldDV = cs.getCountDistint();
+        long newDV = oldDV;
+
+        // if ratio is greater than 1, then number of rows increases. This can happen
+        // when some operators like GROUPBY duplicates the input rows in which case
+        // number of distincts should not change. Update the distinct count only when
+        // the output number of rows is less than input number of rows.
+        if (ratio <= 1.0) {
+          newDV = (long) Math.ceil(ratio * oldDV);
+        }
+        // Assumes inner join
+        // TODO: HIVE-5579 will handle different join types
+        cs.setNumNulls(0);
+        cs.setCountDistint(newDV);
       }
+      stats.setColumnStats(colStats);
+      long newDataSize = StatsUtils
+          .getDataSizeFromColumnStats(newNumRows, colStats);
+      stats.setDataSize(newDataSize);
     }
 
     private long computeNewRowCount(List<Long> rowCountParents, long denom) {

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g Sun Aug 24 03:43:48 2014
@@ -144,7 +144,7 @@ fromSource
 @init { gParent.pushMsg("from source", state); }
 @after { gParent.popMsg(state); }
     :
-    ((Identifier LPAREN)=> partitionedTableFunction | tableSource | subQuerySource) (lateralView^)*
+    ((Identifier LPAREN)=> partitionedTableFunction | tableSource | subQuerySource | virtualTableSource) (lateralView^)*
     ;
 
 tableBucketSample
@@ -256,3 +256,46 @@ searchCondition
     ;
 
 //-----------------------------------------------------------------------------------
+
+//-------- Row Constructor ----------------------------------------------------------
+//in support of SELECT * FROM (VALUES(1,2,3),(4,5,6),...) as FOO(a,b,c) and
+// INSERT INTO <table> (col1,col2,...) VALUES(...),(...),...
+// INSERT INTO <table> (col1,col2,...) SELECT * FROM (VALUES(1,2,3),(4,5,6),...) as Foo(a,b,c)
+valueRowConstructor
+    :
+    LPAREN atomExpression (COMMA atomExpression)* RPAREN -> ^(TOK_VALUE_ROW atomExpression+)
+    ;
+
+valuesTableConstructor
+    :
+    valueRowConstructor (COMMA valueRowConstructor)* -> ^(TOK_VALUES_TABLE valueRowConstructor+)
+    ;
+
+/*
+VALUES(1),(2) means 2 rows, 1 column each.
+VALUES(1,2),(3,4) means 2 rows, 2 columns each.
+VALUES(1,2,3) means 1 row, 3 columns
+*/
+valuesClause
+    :
+    KW_VALUES valuesTableConstructor -> valuesTableConstructor
+    ;
+
+/*
+This represents a clause like this:
+(VALUES(1,2),(2,3)) as VirtTable(col1,col2)
+*/
+virtualTableSource
+   	:
+   	LPAREN valuesClause RPAREN tableNameColList -> ^(TOK_VIRTUAL_TABLE tableNameColList valuesClause)
+   	;
+/*
+e.g. as VirtTable(col1,col2)
+Note that we only want literals as column names
+*/
+tableNameColList
+    :
+    KW_AS? identifier LPAREN identifier (COMMA identifier)* RPAREN -> ^(TOK_VIRTUAL_TABREF ^(TOK_TABNAME identifier) ^(TOK_COL_NAME identifier+))
+    ;
+
+//-----------------------------------------------------------------------------------
\ No newline at end of file

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/FunctionSemanticAnalyzer.java Sun Aug 24 03:43:48 2014
@@ -169,6 +169,7 @@ public class FunctionSemanticAnalyzer ex
       try {
         String[] qualifiedNameParts = FunctionUtils.getQualifiedFunctionNameParts(functionName);
         String dbName = qualifiedNameParts[0];
+        functionName = qualifiedNameParts[1];
         database = getDatabase(dbName);
       } catch (HiveException e) {
         LOG.error(e);

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g?rev=1620103&r1=1620102&r2=1620103&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g Sun Aug 24 03:43:48 2014
@@ -292,6 +292,7 @@ KW_TRANSACTIONS: 'TRANSACTIONS';
 KW_REWRITE : 'REWRITE';
 KW_AUTHORIZATION: 'AUTHORIZATION';
 KW_CONF: 'CONF';
+KW_VALUES: 'VALUES';
 
 // Operators
 // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work.



Mime
View raw message