hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From heyongqi...@apache.org
Subject svn commit: r1038439 - in /hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/io/
Date Wed, 24 Nov 2010 01:54:57 GMT
Author: heyongqiang
Date: Wed Nov 24 01:54:56 2010
New Revision: 1038439

URL: http://svn.apache.org/viewvc?rev=1038439&view=rev
Log:
HIVE-1801 HiveInputFormat/CombineHiveInputFormat sync RCFile twice (Siying Dong via He Yongqiang)

Modified:
    hive/trunk/CHANGES.txt
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java

Modified: hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hive/trunk/CHANGES.txt?rev=1038439&r1=1038438&r2=1038439&view=diff
==============================================================================
--- hive/trunk/CHANGES.txt (original)
+++ hive/trunk/CHANGES.txt Wed Nov 24 01:54:56 2010
@@ -268,6 +268,9 @@ Trunk -  Unreleased
     HIVE-1787 optimize the code path when there are no outer joins
     (Siying Dong via namit)
 
+    HIVE-1801 HiveInputFormat or CombineHiveInputFormat always sync blocks of RCFile twice
+    (Siying Dong via He Yongqiang)
+
   OPTIMIZATIONS
 
   BUG FIXES

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java?rev=1038439&r1=1038438&r2=1038439&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveRecordReader.java Wed Nov
24 01:54:56 2010
@@ -66,9 +66,10 @@ public class CombineHiveRecordReader<K e
         .getLocations());
 
     this.recordReader = inputFormat.getRecordReader(fsplit, job, reporter);
-    this.initIOContext(fsplit, job, inputFormatClass);
+    this.initIOContext(fsplit, job, inputFormatClass, this.recordReader);
   }
 
+  @Override
   public void doClose() throws IOException {
     recordReader.close();
   }
@@ -89,6 +90,7 @@ public class CombineHiveRecordReader<K e
     return recordReader.getProgress();
   }
 
+  @Override
   public boolean doNext(K key, V value) throws IOException {
     if (ExecMapper.getDone()) {
       return false;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java?rev=1038439&r1=1038438&r2=1038439&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveContextAwareRecordReader.java
Wed Nov 24 01:54:56 2010
@@ -105,6 +105,11 @@ public abstract class HiveContextAwareRe
 
   public void initIOContext(FileSplit split, JobConf job,
       Class inputFormatClass) throws IOException {
+    this.initIOContext(split, job, inputFormatClass, null);
+  }
+
+  public void initIOContext(FileSplit split, JobConf job,
+      Class inputFormatClass, RecordReader recordReader) throws IOException {
     boolean blockPointer = false;
     long blockStart = -1;
     FileSplit fileSplit = (FileSplit) split;
@@ -116,9 +121,12 @@ public abstract class HiveContextAwareRe
       in.sync(fileSplit.getStart());
       blockStart = in.getPosition();
       in.close();
+    } else if (recordReader instanceof RCFileRecordReader) {
+      blockPointer = true;
+      blockStart = ((RCFileRecordReader) recordReader).getStart();
     } else if (inputFormatClass.getName().contains("RCFile")) {
-      RCFile.Reader in = new RCFile.Reader(fs, path, job);
       blockPointer = true;
+      RCFile.Reader in = new RCFile.Reader(fs, path, job);
       in.sync(fileSplit.getStart());
       blockStart = in.getPosition();
       in.close();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1038439&r1=1038438&r2=1038439&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Wed Nov 24 01:54:56
2010
@@ -33,9 +33,7 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
@@ -185,7 +183,7 @@ public class HiveInputFormat<K extends W
 
   public static InputFormat<WritableComparable, Writable> getInputFormatFromCache(
     Class inputFormatClass, JobConf job) throws IOException {
-  
+
     if (inputFormats == null) {
       inputFormats = new HashMap<Class, InputFormat<WritableComparable, Writable>>();
     }
@@ -229,10 +227,12 @@ public class HiveInputFormat<K extends W
     if ((part != null) && (part.getTableDesc() != null)) {
       Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf);
     }
-    
-    HiveRecordReader<K,V> rr = new HiveRecordReader(inputFormat.getRecordReader(inputSplit,
-        cloneJobConf, reporter));
-    rr.initIOContext(hsplit, job, inputFormatClass);
+
+    RecordReader innerReader = inputFormat.getRecordReader(inputSplit,
+        cloneJobConf, reporter);
+
+    HiveRecordReader<K,V> rr = new HiveRecordReader(innerReader);
+    rr.initIOContext(hsplit, job, inputFormatClass, innerReader);
     return rr;
   }
 
@@ -274,7 +274,7 @@ public class HiveInputFormat<K extends W
           pushFilters(newjob, tableScan);
         }
       }
-      
+
       FileInputFormat.setInputPaths(newjob, dir);
       newjob.setInputFormat(inputFormat.getClass());
       InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length);
@@ -353,7 +353,7 @@ public class HiveInputFormat<K extends W
       TableScanDesc.FILTER_EXPR_CONF_STR,
       filterExprSerialized);
   }
-  
+
   protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass,
       String splitPath, String splitPathWithNoSchema) {
     if (this.mrwork == null) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java?rev=1038439&r1=1038438&r2=1038439&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFileRecordReader.java Wed Nov 24
01:54:56 2010
@@ -110,7 +110,7 @@ public class RCFileRecordReader<K extend
 
   /**
    * Return the progress within the input split.
-   * 
+   *
    * @return 0.0 to 1.0 of the input byte range
    */
   public float getProgress() throws IOException {
@@ -129,6 +129,10 @@ public class RCFileRecordReader<K extend
     in.seek(pos);
   }
 
+  public long getStart() {
+    return start;
+  }
+
   public void close() throws IOException {
     in.close();
   }



Mime
View raw message