avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r998356 - in /avro/branches/branch-1.4: ./ lang/java/src/java/org/apache/avro/file/ lang/java/src/java/org/apache/avro/mapred/ lang/java/src/java/org/apache/avro/tool/ lang/java/src/test/java/org/apache/avro/mapred/
Date Fri, 17 Sep 2010 22:25:35 GMT
Author: cutting
Date: Fri Sep 17 22:25:35 2010
New Revision: 998356

URL: http://svn.apache.org/viewvc?rev=998356&view=rev
Log:
Merge r998354 from trunk to 1.4 branch.  Fixes: AVRO-662.

Added:
    avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/file/FileReader.java
      - copied unchanged from r998354, avro/trunk/lang/java/src/java/org/apache/avro/file/FileReader.java
    avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/mapred/SequenceFileInputFormat.java
      - copied unchanged from r998354, avro/trunk/lang/java/src/java/org/apache/avro/mapred/SequenceFileInputFormat.java
    avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/mapred/SequenceFileReader.java
      - copied unchanged from r998354, avro/trunk/lang/java/src/java/org/apache/avro/mapred/SequenceFileReader.java
    avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/mapred/SequenceFileRecordReader.java
      - copied unchanged from r998354, avro/trunk/lang/java/src/java/org/apache/avro/mapred/SequenceFileRecordReader.java
    avro/branches/branch-1.4/lang/java/src/test/java/org/apache/avro/mapred/TestSequenceFileReader.java
      - copied unchanged from r998354, avro/trunk/lang/java/src/test/java/org/apache/avro/mapred/TestSequenceFileReader.java
Modified:
    avro/branches/branch-1.4/   (props changed)
    avro/branches/branch-1.4/CHANGES.txt
    avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/file/DataFileReader.java
    avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/mapred/AvroJob.java
    avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/mapred/AvroRecordReader.java
    avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/tool/DataFileReadTool.java

Propchange: avro/branches/branch-1.4/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Sep 17 22:25:35 2010
@@ -1 +1 @@
-/avro/trunk:990852,990860,990867,990871,990878,991031,991423,992146,992149,992167,996640,996642,996649,998347
+/avro/trunk:990852,990860,990867,990871,990878,991031,991423,992146,992149,992167,996640,996642,996649,998347,998354

Modified: avro/branches/branch-1.4/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/branches/branch-1.4/CHANGES.txt?rev=998356&r1=998355&r2=998356&view=diff
==============================================================================
--- avro/branches/branch-1.4/CHANGES.txt (original)
+++ avro/branches/branch-1.4/CHANGES.txt Fri Sep 17 22:25:35 2010
@@ -6,6 +6,9 @@ Avro 1.4.1 (unreleased)
 
     AVRO-641. Java: Add SASL security for socket-based RPC. (cutting)
 
+    AVRO-634. Java: Add support for reading Hadoop sequence files as
+    Avro data to MapReduce API. (cutting)
+
   IMPROVEMENTS
 
     AVRO-655. Change build so that 'dist' target no longer also runs C

Modified: avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/file/DataFileReader.java
URL: http://svn.apache.org/viewvc/avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/file/DataFileReader.java?rev=998356&r1=998355&r2=998356&view=diff
==============================================================================
--- avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/file/DataFileReader.java (original)
+++ avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/file/DataFileReader.java Fri
Sep 17 22:25:35 2010
@@ -29,7 +29,8 @@ import static org.apache.avro.file.DataF
 /** Random access to files written with {@link DataFileWriter}.
  * @see DataFileWriter
  */
-public class DataFileReader<D> extends DataFileStream<D> {
+public class DataFileReader<D>
+  extends DataFileStream<D> implements FileReader<D> {
   private SeekableInputStream sin;
   private long blockStart;
 
@@ -105,6 +106,8 @@ public class DataFileReader<D> extends D
     return ((blockStart >= position+SYNC_SIZE)||(blockStart >= sin.length()));
   }
 
+  @Override public long tell() throws IOException { return sin.tell(); }
+
   private static class SeekableInputStream extends InputStream 
   implements SeekableInput {
     private final byte[] oneByte = new byte[1];

Modified: avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/mapred/AvroJob.java
URL: http://svn.apache.org/viewvc/avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/mapred/AvroJob.java?rev=998356&r1=998355&r2=998356&view=diff
==============================================================================
--- avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/mapred/AvroJob.java (original)
+++ avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/mapred/AvroJob.java Fri Sep
17 22:25:35 2010
@@ -95,6 +95,11 @@ public class AvroJob {
     }
   }
 
+  /** Indicate that a job's input files are in SequenceFile format.*/
+  public static void setInputSequenceFile(JobConf job) {
+    job.setInputFormat(SequenceFileInputFormat.class);
+  }
+
   /** Return a job's output key schema. */
   public static Schema getOutputSchema(Configuration job) {
     return Schema.parse(job.get(OUTPUT_SCHEMA));

Modified: avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/mapred/AvroRecordReader.java
URL: http://svn.apache.org/viewvc/avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/mapred/AvroRecordReader.java?rev=998356&r1=998355&r2=998356&view=diff
==============================================================================
--- avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/mapred/AvroRecordReader.java
(original)
+++ avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/mapred/AvroRecordReader.java
Fri Sep 17 22:25:35 2010
@@ -25,7 +25,7 @@ import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.RecordReader;
 
-import org.apache.avro.Schema;
+import org.apache.avro.file.FileReader;
 import org.apache.avro.file.DataFileReader;
 import org.apache.avro.specific.SpecificDatumReader;
 
@@ -33,20 +33,23 @@ import org.apache.avro.specific.Specific
 public class AvroRecordReader<T>
   implements RecordReader<AvroWrapper<T>, NullWritable> {
 
-  private FsInput in;
-  private DataFileReader<T> reader;
+  private FileReader<T> reader;
   private long start;
   private long end;
 
   public AvroRecordReader(JobConf job, FileSplit split)
     throws IOException {
-    this.in = new FsInput(split.getPath(), job);
-
-    Schema s = AvroJob.getInputSchema(job);
-    this.reader = new DataFileReader<T>(in, new SpecificDatumReader<T>(s));
+    this(new DataFileReader<T>
+         (new FsInput(split.getPath(), job),
+          new SpecificDatumReader<T>(AvroJob.getInputSchema(job))),
+         split);
+  }
 
+  protected AvroRecordReader(FileReader<T> reader, FileSplit split)
+    throws IOException {
+    this.reader = reader;
     reader.sync(split.getStart());                    // sync to start
-    this.start = in.tell();
+    this.start = reader.tell();
     this.end = split.getStart() + split.getLength();
   }
 
@@ -68,12 +71,12 @@ public class AvroRecordReader<T>
     if (end == start) {
       return 0.0f;
     } else {
-      return Math.min(1.0f, (in.tell() - start) / (float)(end - start));
+      return Math.min(1.0f, (getPos() - start) / (float)(end - start));
     }
   }
   
   public long getPos() throws IOException {
-    return in.tell();
+    return reader.tell();
   }
 
   public void close() throws IOException { reader.close(); }

Modified: avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/tool/DataFileReadTool.java
URL: http://svn.apache.org/viewvc/avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/tool/DataFileReadTool.java?rev=998356&r1=998355&r2=998356&view=diff
==============================================================================
--- avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/tool/DataFileReadTool.java
(original)
+++ avro/branches/branch-1.4/lang/java/src/java/org/apache/avro/tool/DataFileReadTool.java
Fri Sep 17 22:25:35 2010
@@ -23,6 +23,7 @@ import java.io.PrintStream;
 import java.util.List;
 
 import org.apache.avro.Schema;
+import org.apache.avro.file.FileReader;
 import org.apache.avro.file.DataFileReader;
 import org.apache.avro.io.DatumWriter;
 import org.apache.avro.generic.GenericDatumReader;
@@ -55,7 +56,7 @@ public class DataFileReadTool implements
     }
 
     GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
-    DataFileReader<Object> fileReader =
+    FileReader<Object> fileReader =
       new DataFileReader<Object>(new File(args.get(0)), reader);
     try {
       Schema schema = fileReader.getSchema();



Mime
View raw message