flink-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rmetz...@apache.org
Subject [2/4] The Hadoop Compatibility has been refactored and extended to support the new Java API.
Date Sat, 21 Jun 2014 09:45:26 GMT
http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/HadoopInputFormat.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/HadoopInputFormat.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/HadoopInputFormat.java
new file mode 100644
index 0000000..882f4a3
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/HadoopInputFormat.java
@@ -0,0 +1,287 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import eu.stratosphere.api.common.io.FileInputFormat.FileBaseStatistics;
+import eu.stratosphere.api.common.io.InputFormat;
+import eu.stratosphere.api.common.io.statistics.BaseStatistics;
+import eu.stratosphere.api.java.tuple.Tuple2;
+import eu.stratosphere.api.java.typeutils.ResultTypeQueryable;
+import eu.stratosphere.api.java.typeutils.TupleTypeInfo;
+import eu.stratosphere.api.java.typeutils.WritableTypeInfo;
+import eu.stratosphere.configuration.Configuration;
+import eu.stratosphere.core.fs.FileStatus;
+import eu.stratosphere.core.fs.FileSystem;
+import eu.stratosphere.core.fs.Path;
+import eu.stratosphere.hadoopcompatibility.mapred.utils.HadoopUtils;
+import eu.stratosphere.hadoopcompatibility.mapred.wrapper.HadoopDummyReporter;
+import eu.stratosphere.hadoopcompatibility.mapred.wrapper.HadoopInputSplit;
+import eu.stratosphere.types.TypeInformation;
+
+public class HadoopInputFormat<K extends Writable, V extends Writable> implements InputFormat<Tuple2<K,V>, HadoopInputSplit>, ResultTypeQueryable<Tuple2<K,V>> {
+	
+	private static final long serialVersionUID = 1L;
+	
+	private static final Log LOG = LogFactory.getLog(HadoopInputFormat.class);
+	
+	private org.apache.hadoop.mapred.InputFormat<K, V> mapredInputFormat;
+	private Class<K> keyClass;
+	private Class<V> valueClass;
+	private JobConf jobConf;
+	
+	public transient K key;
+	public transient V value;
+	
+	public RecordReader<K, V> recordReader;
+	private transient boolean fetched = false;
+	private transient boolean hasNext;
+	
+	public HadoopInputFormat() {
+		super();
+	}
+	
+	public HadoopInputFormat(org.apache.hadoop.mapred.InputFormat<K,V> mapredInputFormat, Class<K> key, Class<V> value, JobConf job) {
+		super();
+		this.mapredInputFormat = mapredInputFormat;
+		this.keyClass = key;
+		this.valueClass = value;
+		HadoopUtils.mergeHadoopConf(job);
+		this.jobConf = job;
+	}
+	
+	public void setJobConf(JobConf job) {
+		this.jobConf = job;
+	}
+	
+	public org.apache.hadoop.mapred.InputFormat<K,V> getHadoopInputFormat() {
+		return mapredInputFormat;
+	}
+	
+	public void setHadoopInputFormat(org.apache.hadoop.mapred.InputFormat<K,V> mapredInputFormat) {
+		this.mapredInputFormat = mapredInputFormat;
+	}
+	
+	public JobConf getJobConf() {
+		return jobConf;
+	}
+	
+	// --------------------------------------------------------------------------------------------
+	//  InputFormat
+	// --------------------------------------------------------------------------------------------
+	
+	@Override
+	public void configure(Configuration parameters) {
+		// nothing to do
+	}
+	
+	@Override
+	public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
+		// only gather base statistics for FileInputFormats
+		if(!(mapredInputFormat instanceof FileInputFormat)) {
+			return null;
+		}
+		
+		final FileBaseStatistics cachedFileStats = (cachedStats != null && cachedStats instanceof FileBaseStatistics) ?
+				(FileBaseStatistics) cachedStats : null;
+		
+		try {
+			final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(this.jobConf);
+			
+			return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
+		} catch (IOException ioex) {
+			if (LOG.isWarnEnabled()) {
+				LOG.warn("Could not determine statistics due to an io error: "
+						+ ioex.getMessage());
+			}
+		}
+		catch (Throwable t) {
+			if (LOG.isErrorEnabled()) {
+				LOG.error("Unexpected problen while getting the file statistics: "
+						+ t.getMessage(), t);
+			}
+		}
+		
+		// no statistics available
+		return null;
+	}
+	
+	@Override
+	public HadoopInputSplit[] createInputSplits(int minNumSplits)
+			throws IOException {
+		org.apache.hadoop.mapred.InputSplit[] splitArray = mapredInputFormat.getSplits(jobConf, minNumSplits);
+		HadoopInputSplit[] hiSplit = new HadoopInputSplit[splitArray.length];
+		for(int i=0;i<splitArray.length;i++){
+			hiSplit[i] = new HadoopInputSplit(splitArray[i], jobConf);
+		}
+		return hiSplit;
+	}
+	
+	@Override
+	public Class<? extends HadoopInputSplit> getInputSplitType() {
+		return HadoopInputSplit.class;
+	}
+	
+	@Override
+	public void open(HadoopInputSplit split) throws IOException {
+		this.recordReader = this.mapredInputFormat.getRecordReader(split.getHadoopInputSplit(), jobConf, new HadoopDummyReporter());
+		key = this.recordReader.createKey();
+		value = this.recordReader.createValue();
+		this.fetched = false;
+	}
+	
+	@Override
+	public boolean reachedEnd() throws IOException {
+		if(!fetched) {
+			fetchNext();
+		}
+		return !hasNext;
+	}
+	
+	private void fetchNext() throws IOException {
+		hasNext = this.recordReader.next(key, value);
+		fetched = true;
+	}
+	
+	@Override
+	public Tuple2<K, V> nextRecord(Tuple2<K, V> record) throws IOException {
+		if(!fetched) {
+			fetchNext();
+		}
+		if(!hasNext) {
+			return null;
+		}
+		record.f0 = key;
+		record.f1 = value;
+		fetched = false;
+		return record;
+	}
+	
+	@Override
+	public void close() throws IOException {
+		this.recordReader.close();
+	}
+	
+	// --------------------------------------------------------------------------------------------
+	//  Helper methods
+	// --------------------------------------------------------------------------------------------
+	
+	private FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, org.apache.hadoop.fs.Path[] hadoopFilePaths,
+			ArrayList<FileStatus> files) throws IOException {
+		
+		long latestModTime = 0L;
+		
+		// get the file info and check whether the cached statistics are still valid.
+		for(org.apache.hadoop.fs.Path hadoopPath : hadoopFilePaths) {
+			
+			final Path filePath = new Path(hadoopPath.toUri());
+			final FileSystem fs = FileSystem.get(filePath.toUri());
+			
+			final FileStatus file = fs.getFileStatus(filePath);
+			latestModTime = Math.max(latestModTime, file.getModificationTime());
+			
+			// enumerate all files and check their modification time stamp.
+			if (file.isDir()) {
+				FileStatus[] fss = fs.listStatus(filePath);
+				files.ensureCapacity(files.size() + fss.length);
+				
+				for (FileStatus s : fss) {
+					if (!s.isDir()) {
+						files.add(s);
+						latestModTime = Math.max(s.getModificationTime(), latestModTime);
+					}
+				}
+			} else {
+				files.add(file);
+			}
+		}
+		
+		// check whether the cached statistics are still valid, if we have any
+		if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) {
+			return cachedStats;
+		}
+		
+		// calculate the whole length
+		long len = 0;
+		for (FileStatus s : files) {
+			len += s.getLen();
+		}
+		
+		// sanity check
+		if (len <= 0) {
+			len = BaseStatistics.SIZE_UNKNOWN;
+		}
+		
+		return new FileBaseStatistics(latestModTime, len, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN);
+	}
+	
+	// --------------------------------------------------------------------------------------------
+	//  Custom serialization methods
+	// --------------------------------------------------------------------------------------------
+	
+	private void writeObject(ObjectOutputStream out) throws IOException {
+		out.writeUTF(mapredInputFormat.getClass().getName());
+		out.writeUTF(keyClass.getName());
+		out.writeUTF(valueClass.getName());
+		jobConf.write(out);
+	}
+	
+	@SuppressWarnings("unchecked")
+	private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
+		String hadoopInputFormatClassName = in.readUTF();
+		String keyClassName = in.readUTF();
+		String valueClassName = in.readUTF();
+		if(jobConf == null) {
+			jobConf = new JobConf();
+		}
+		jobConf.readFields(in);
+		try {
+			this.mapredInputFormat = (org.apache.hadoop.mapred.InputFormat<K,V>) Class.forName(hadoopInputFormatClassName, true, Thread.currentThread().getContextClassLoader()).newInstance();
+		} catch (Exception e) {
+			throw new RuntimeException("Unable to instantiate the hadoop input format", e);
+		}
+		try {
+			this.keyClass = (Class<K>) Class.forName(keyClassName, true, Thread.currentThread().getContextClassLoader());
+		} catch (Exception e) {
+			throw new RuntimeException("Unable to find key class.", e);
+		}
+		try {
+			this.valueClass = (Class<V>) Class.forName(valueClassName, true, Thread.currentThread().getContextClassLoader());
+		} catch (Exception e) {
+			throw new RuntimeException("Unable to find value class.", e);
+		}
+		ReflectionUtils.setConf(mapredInputFormat, jobConf);
+	}
+	
+	// --------------------------------------------------------------------------------------------
+	//  ResultTypeQueryable
+	// --------------------------------------------------------------------------------------------
+	
+	@Override
+	public TypeInformation<Tuple2<K,V>> getProducedType() {
+		return new TupleTypeInfo<Tuple2<K,V>>(new WritableTypeInfo<K>((Class<K>) keyClass), new WritableTypeInfo<V>((Class<V>) valueClass));
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/HadoopOutputFormat.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/HadoopOutputFormat.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/HadoopOutputFormat.java
new file mode 100644
index 0000000..849c701
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/HadoopOutputFormat.java
@@ -0,0 +1,164 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2014 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.FileOutputCommitter;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobContext;
+import org.apache.hadoop.mapred.JobID;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.TaskAttemptContext;
+import org.apache.hadoop.mapred.TaskAttemptID;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import eu.stratosphere.api.common.io.OutputFormat;
+import eu.stratosphere.api.java.tuple.Tuple2;
+import eu.stratosphere.configuration.Configuration;
+import eu.stratosphere.hadoopcompatibility.mapred.utils.HadoopUtils;
+import eu.stratosphere.hadoopcompatibility.mapred.wrapper.HadoopDummyProgressable;
+import eu.stratosphere.hadoopcompatibility.mapred.wrapper.HadoopDummyReporter;
+
+
+public class HadoopOutputFormat<K extends Writable,V extends Writable> implements OutputFormat<Tuple2<K, V>> {
+	
+	private static final long serialVersionUID = 1L;
+	
+	public JobConf jobConf;	
+	public org.apache.hadoop.mapred.OutputFormat<K,V> mapredOutputFormat;	
+	public transient RecordWriter<K,V> recordWriter;	
+	public transient FileOutputCommitter fileOutputCommitter;
+	private transient TaskAttemptContext context;
+	private transient JobContext jobContext;
+	
+	public HadoopOutputFormat(org.apache.hadoop.mapred.OutputFormat<K,V> mapredOutputFormat, JobConf job) {
+		super();
+		this.mapredOutputFormat = mapredOutputFormat;
+		HadoopUtils.mergeHadoopConf(job);
+		this.jobConf = job;
+	}
+	
+	public void setJobConf(JobConf job) {
+		this.jobConf = job;
+	}
+	
+	public JobConf getJobConf() {
+		return jobConf;
+	}
+	
+	public org.apache.hadoop.mapred.OutputFormat<K,V> getHadoopOutputFormat() {
+		return mapredOutputFormat;
+	}
+	
+	public void setHadoopOutputFormat(org.apache.hadoop.mapred.OutputFormat<K,V> mapredOutputFormat) {
+		this.mapredOutputFormat = mapredOutputFormat;
+	}
+	
+	// --------------------------------------------------------------------------------------------
+	//  OutputFormat
+	// --------------------------------------------------------------------------------------------
+	
+	@Override
+	public void configure(Configuration parameters) {
+		// nothing to do
+	}
+	
+	/**
+	 * create the temporary output file for hadoop RecordWriter.
+	 * @param taskNumber The number of the parallel instance.
+	 * @param numTasks The number of parallel tasks.
+	 * @throws IOException
+	 */
+	@Override
+	public void open(int taskNumber, int numTasks) throws IOException {
+		if (Integer.toString(taskNumber + 1).length() > 6) {
+			throw new IOException("Task id too large.");
+		}
+		
+		TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" 
+				+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s"," ").replace(" ", "0") 
+				+ Integer.toString(taskNumber + 1) 
+				+ "_0");
+		
+		try {
+			this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID);
+		} catch (Exception e) {
+			throw new RuntimeException(e);
+		}
+		
+		this.jobConf.set("mapred.task.id", taskAttemptID.toString());
+		// for hadoop 2.2
+		this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
+		
+		this.fileOutputCommitter = new FileOutputCommitter();
+		
+		try {
+			this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
+		}
+		catch (Exception e) {
+			throw new RuntimeException(e);
+		}
+		
+		this.fileOutputCommitter.setupJob(jobContext);
+		
+		this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
+	}
+	
+	@Override
+	public void writeRecord(Tuple2<K, V> record) throws IOException {
+		this.recordWriter.write(record.f0, record.f1);
+	}
+	
+	/**
+	 * commit the task by moving the output file out from the temporary directory.
+	 * @throws IOException
+	 */
+	@Override
+	public void close() throws IOException {
+		this.recordWriter.close(new HadoopDummyReporter());
+		
+		if (this.fileOutputCommitter.needsTaskCommit(this.context)) {
+			this.fileOutputCommitter.commitTask(this.context);
+		}
+		this.fileOutputCommitter.commitJob(this.jobContext);
+	}
+	
+	// --------------------------------------------------------------------------------------------
+	//  Custom serialization methods
+	// --------------------------------------------------------------------------------------------
+	
+	private void writeObject(ObjectOutputStream out) throws IOException {
+		out.writeUTF(mapredOutputFormat.getClass().getName());
+		jobConf.write(out);
+	}
+	
+	@SuppressWarnings("unchecked")
+	private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
+		String hadoopOutputFormatName = in.readUTF();
+		if(jobConf == null) {
+			jobConf = new JobConf();
+		}
+		jobConf.readFields(in);
+		try {
+			this.mapredOutputFormat = (org.apache.hadoop.mapred.OutputFormat<K,V>) Class.forName(hadoopOutputFormatName, true, Thread.currentThread().getContextClassLoader()).newInstance();
+		} catch (Exception e) {
+			throw new RuntimeException("Unable to instantiate the hadoop output format", e);
+		}
+		ReflectionUtils.setConf(mapredOutputFormat, jobConf);
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/example/WordCount.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/example/WordCount.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/example/WordCount.java
new file mode 100644
index 0000000..54160bf
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/example/WordCount.java
@@ -0,0 +1,115 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+package eu.stratosphere.hadoopcompatibility.mapred.example;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+
+import eu.stratosphere.api.java.DataSet;
+import eu.stratosphere.api.java.ExecutionEnvironment;
+import eu.stratosphere.api.java.aggregation.Aggregations;
+import eu.stratosphere.api.java.functions.FlatMapFunction;
+import eu.stratosphere.api.java.functions.MapFunction;
+import eu.stratosphere.api.java.tuple.Tuple2;
+import eu.stratosphere.hadoopcompatibility.mapred.HadoopInputFormat;
+import eu.stratosphere.hadoopcompatibility.mapred.HadoopOutputFormat;
+import eu.stratosphere.util.Collector;
+
+
+
+/**
+ * Implements a word count which takes the input file and counts the number of
+ * occurrences of each word in the file and writes the result back to disk.
+ * 
+ * This example shows how to use Hadoop Input Formats, how to convert Hadoop Writables to 
+ * common Java types for better usage in a Stratosphere job and how to use Hadoop Output Formats.
+ */
+@SuppressWarnings("serial")
+public class WordCount {
+	
+	public static void main(String[] args) throws Exception {
+		if (args.length < 2) {
+			System.err.println("Usage: WordCount <input path> <result path>");
+			return;
+		}
+		
+		final String inputPath = args[0];
+		final String outputPath = args[1];
+		
+		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
+		env.setDegreeOfParallelism(1);
+		
+		// Set up the Hadoop Input Format
+		HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, new JobConf());
+		TextInputFormat.addInputPath(hadoopInputFormat.getJobConf(), new Path(inputPath));
+		
+		// Create a Stratosphere job with it
+		DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
+		
+		// Tokenize the line and convert from Writable "Text" to String for better handling
+		DataSet<Tuple2<String, Integer>> words = text.flatMap(new Tokenizer());
+		
+		// Sum up the words
+		DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);
+		
+		// Convert String back to Writable "Text" for use with Hadoop Output Format
+		DataSet<Tuple2<Text, IntWritable>> hadoopResult = result.map(new HadoopDatatypeMapper());
+		
+		// Set up Hadoop Output Format
+		HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(), new JobConf());
+		hadoopOutputFormat.getJobConf().set("mapred.textoutputformat.separator", " ");
+		TextOutputFormat.setOutputPath(hadoopOutputFormat.getJobConf(), new Path(outputPath));
+		
+		// Output & Execute
+		hadoopResult.output(hadoopOutputFormat);
+		env.execute("Word Count");
+	}
+	
+	/**
+	 * Splits a line into words and converts Hadoop Writables into normal Java data types.
+	 */
+	public static final class Tokenizer extends FlatMapFunction<Tuple2<LongWritable, Text>, Tuple2<String, Integer>> {
+		
+		@Override
+		public void flatMap(Tuple2<LongWritable, Text> value, Collector<Tuple2<String, Integer>> out) {
+			// normalize and split the line
+			String line = value.f1.toString();
+			String[] tokens = line.toLowerCase().split("\\W+");
+			
+			// emit the pairs
+			for (String token : tokens) {
+				if (token.length() > 0) {
+					out.collect(new Tuple2<String, Integer>(token, 1));
+				}
+			}
+		}
+	}
+	
+	/**
+	 * Converts Java data types to Hadoop Writables.
+	 */
+	public static final class HadoopDatatypeMapper extends MapFunction<Tuple2<String, Integer>, Tuple2<Text, IntWritable>> {
+		
+		@Override
+		public Tuple2<Text, IntWritable> map(Tuple2<String, Integer> value) throws Exception {
+			return new Tuple2<Text, IntWritable>(new Text(value.f0), new IntWritable(value.f1));
+		}
+		
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopDataSink.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopDataSink.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopDataSink.java
new file mode 100644
index 0000000..2d0e052
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopDataSink.java
@@ -0,0 +1,102 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred.record;
+
+import java.util.List;
+
+import eu.stratosphere.types.Record;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputFormat;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import eu.stratosphere.api.java.record.operators.GenericDataSink;
+import eu.stratosphere.api.common.operators.Operator;
+import eu.stratosphere.compiler.contextcheck.Validatable;
+import eu.stratosphere.hadoopcompatibility.mapred.record.datatypes.DefaultStratosphereTypeConverter;
+import eu.stratosphere.hadoopcompatibility.mapred.record.datatypes.StratosphereTypeConverter;
+
+/**
+ * The HadoopDataSink is a generic wrapper for all Hadoop OutputFormats.
+ *
+ * Example usage:
+ * <pre>
+ * 		HadoopDataSink out = new HadoopDataSink(new org.apache.hadoop.mapred.TextOutputFormat<Text, IntWritable>(), new JobConf(), "Hadoop TextOutputFormat",reducer, Text.class,IntWritable.class);
+ *		org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(out.getJobConf(), new Path(output));
+ * </pre>
+ *
+ * Note that it is possible to provide custom data type converter.
+ *
+ * The HadoopDataSink provides a default converter: {@link eu.stratosphere.hadoopcompatibility.mapred.record.datatypes.DefaultStratosphereTypeConverter}
+ **/
+public class HadoopDataSink<K,V> extends GenericDataSink implements Validatable {
+
+	private static String DEFAULT_NAME = "<Unnamed Hadoop Data Sink>";
+
+	private JobConf jobConf;
+
+	public HadoopDataSink(OutputFormat<K,V> hadoopFormat, JobConf jobConf, String name, Operator<Record> input, StratosphereTypeConverter<K,V> conv, Class<K> keyClass, Class<V> valueClass) {
+		this(hadoopFormat, jobConf, name, ImmutableList.<Operator<Record>>of(input), conv, keyClass, valueClass);
+	}
+
+	public HadoopDataSink(OutputFormat<K,V> hadoopFormat, JobConf jobConf, String name, Operator<Record> input, Class<K> keyClass, Class<V> valueClass) {
+		this(hadoopFormat, jobConf, name, input, new DefaultStratosphereTypeConverter<K, V>(keyClass, valueClass), keyClass, valueClass);
+	}
+
+	public HadoopDataSink(OutputFormat<K,V> hadoopFormat, JobConf jobConf, Operator<Record> input, Class<K> keyClass, Class<V> valueClass) {
+		this(hadoopFormat, jobConf, DEFAULT_NAME, input, new DefaultStratosphereTypeConverter<K, V>(keyClass, valueClass), keyClass, valueClass);
+	}
+
+	public HadoopDataSink(OutputFormat<K,V> hadoopFormat, Operator<Record> input, Class<K> keyClass, Class<V> valueClass) {
+		this(hadoopFormat, new JobConf(), DEFAULT_NAME, input, new DefaultStratosphereTypeConverter<K, V>(keyClass, valueClass), keyClass, valueClass);
+	}
+
+
+
+	@SuppressWarnings("deprecation")
+	public HadoopDataSink(OutputFormat<K,V> hadoopFormat, JobConf jobConf, String name, List<Operator<Record>> input, StratosphereTypeConverter<K,V> conv, Class<K> keyClass, Class<V> valueClass) {
+		super(new HadoopRecordOutputFormat<K,V>(hadoopFormat, jobConf, conv),input, name);
+		Preconditions.checkNotNull(hadoopFormat);
+		Preconditions.checkNotNull(jobConf);
+		this.name = name;
+		this.jobConf = jobConf;
+		jobConf.setOutputKeyClass(keyClass);
+		jobConf.setOutputValueClass(valueClass);
+	}
+
+	public HadoopDataSink(OutputFormat<K,V> hadoopFormat, JobConf jobConf, String name, List<Operator<Record>> input, Class<K> keyClass, Class<V> valueClass) {
+		this(hadoopFormat, jobConf, name, input, new DefaultStratosphereTypeConverter<K, V>(keyClass, valueClass), keyClass, valueClass);
+	}
+
+	public HadoopDataSink(OutputFormat<K,V> hadoopFormat, JobConf jobConf, List<Operator<Record>> input, Class<K> keyClass, Class<V> valueClass) {
+		this(hadoopFormat, jobConf, DEFAULT_NAME, input, new DefaultStratosphereTypeConverter<K, V>(keyClass, valueClass), keyClass, valueClass);
+	}
+
+	public HadoopDataSink(OutputFormat<K,V> hadoopFormat, List<Operator<Record>> input, Class<K> keyClass, Class<V> valueClass) {
+		this(hadoopFormat, new JobConf(), DEFAULT_NAME, input, new DefaultStratosphereTypeConverter<K, V>(keyClass, valueClass), keyClass, valueClass);
+	}
+
+	public JobConf getJobConf() {
+		return this.jobConf;
+	}
+
+	@Override
+	public void check() {
+		// see for more details https://github.com/stratosphere/stratosphere/pull/531
+		Preconditions.checkNotNull(FileOutputFormat.getOutputPath(jobConf), "The HadoopDataSink currently expects a correct outputPath.");
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopDataSource.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopDataSource.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopDataSource.java
new file mode 100644
index 0000000..f8f2120
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopDataSource.java
@@ -0,0 +1,81 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2014 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred.record;
+
+
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobConf;
+
+import com.google.common.base.Preconditions;
+
+import eu.stratosphere.api.java.record.operators.GenericDataSource;
+import eu.stratosphere.hadoopcompatibility.mapred.record.datatypes.DefaultHadoopTypeConverter;
+import eu.stratosphere.hadoopcompatibility.mapred.record.datatypes.HadoopTypeConverter;
+
+
+
+/**
+ * The HadoopDataSource is a generic wrapper for all Hadoop InputFormats.
+ * 
+ * Example usage:
+ * <pre>
+ * 		HadoopDataSource source = new HadoopDataSource(new org.apache.hadoop.mapred.TextInputFormat(), new JobConf(), "Input Lines");
+ *		org.apache.hadoop.mapred.TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));
+ * </pre>
+ * 
+ * Note that it is possible to provide custom data type converter.
+ * 
+ * The HadoopDataSource provides two different standard converters:
+ * * WritableWrapperConverter: Converts Hadoop Types to a record that contains a WritableComparableWrapper (key) and a WritableWrapper
+ * * DefaultHadoopTypeConverter: Converts the standard hadoop types (longWritable, Text) to Stratosphere's standard types.
+ *
+ */
+public class HadoopDataSource<K,V> extends GenericDataSource<HadoopRecordInputFormat<K,V>> {
+
+	private static String DEFAULT_NAME = "<Unnamed Hadoop Data Source>";
+	
+	private JobConf jobConf;
+	
+	/**
+	 * 
+	 * @param hadoopFormat Implementation of a Hadoop input format
+	 * @param jobConf JobConf object (Hadoop)
+	 * @param name Name of the DataSource
+	 * @param conv Definition of a custom type converter {@link DefaultHadoopTypeConverter}.
+	 */
+	public HadoopDataSource(InputFormat<K,V> hadoopFormat, JobConf jobConf, String name, HadoopTypeConverter<K,V> conv) {
+		super(new HadoopRecordInputFormat<K,V>(hadoopFormat, jobConf, conv),name);
+		Preconditions.checkNotNull(hadoopFormat);
+		Preconditions.checkNotNull(jobConf);
+		Preconditions.checkNotNull(conv);
+		this.name = name;
+		this.jobConf = jobConf;
+	}
+	
+	public HadoopDataSource(InputFormat<K,V> hadoopFormat, JobConf jobConf, String name) {
+		this(hadoopFormat, jobConf, name, new DefaultHadoopTypeConverter<K,V>() );
+	}
+	public HadoopDataSource(InputFormat<K,V> hadoopFormat, JobConf jobConf) {
+		this(hadoopFormat, jobConf, DEFAULT_NAME);
+	}
+	
+	public HadoopDataSource(InputFormat<K,V> hadoopFormat) {
+		this(hadoopFormat, new JobConf(), DEFAULT_NAME);
+	}
+
+	public JobConf getJobConf() {
+		return this.jobConf;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopRecordInputFormat.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopRecordInputFormat.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopRecordInputFormat.java
new file mode 100644
index 0000000..7c47aa8
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopRecordInputFormat.java
@@ -0,0 +1,167 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred.record;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import eu.stratosphere.api.common.io.InputFormat;
+import eu.stratosphere.api.common.io.statistics.BaseStatistics;
+import eu.stratosphere.configuration.Configuration;
+import eu.stratosphere.hadoopcompatibility.mapred.record.datatypes.HadoopTypeConverter;
+import eu.stratosphere.hadoopcompatibility.mapred.utils.HadoopUtils;
+import eu.stratosphere.hadoopcompatibility.mapred.wrapper.HadoopDummyReporter;
+import eu.stratosphere.hadoopcompatibility.mapred.wrapper.HadoopInputSplit;
+import eu.stratosphere.types.Record;
+
+public class HadoopRecordInputFormat<K, V> implements InputFormat<Record, HadoopInputSplit> {
+
+	private static final long serialVersionUID = 1L;
+
+	public org.apache.hadoop.mapred.InputFormat<K, V> hadoopInputFormat;
+	public HadoopTypeConverter<K,V> converter;
+	private String hadoopInputFormatName;
+	public JobConf jobConf;
+	public transient K key;
+	public transient V value;
+	public RecordReader<K, V> recordReader;
+	private boolean fetched = false;
+	private boolean hasNext;
+		
+	public HadoopRecordInputFormat() {
+		super();
+	}
+	
+	public HadoopRecordInputFormat(org.apache.hadoop.mapred.InputFormat<K,V> hadoopInputFormat, JobConf job, HadoopTypeConverter<K,V> conv) {
+		super();
+		this.hadoopInputFormat = hadoopInputFormat;
+		this.hadoopInputFormatName = hadoopInputFormat.getClass().getName();
+		this.converter = conv;
+		HadoopUtils.mergeHadoopConf(job);
+		this.jobConf = job;
+	}
+
+	@Override
+	public void configure(Configuration parameters) {
+		
+	}
+
+	@Override
+	public BaseStatistics getStatistics(BaseStatistics cachedStatistics) throws IOException {
+		return null;
+	}
+
+	@Override
+	public HadoopInputSplit[] createInputSplits(int minNumSplits)
+			throws IOException {
+		org.apache.hadoop.mapred.InputSplit[] splitArray = hadoopInputFormat.getSplits(jobConf, minNumSplits);
+		HadoopInputSplit[] hiSplit = new HadoopInputSplit[splitArray.length];
+		for(int i=0;i<splitArray.length;i++){
+			hiSplit[i] = new HadoopInputSplit(splitArray[i], jobConf);
+		}
+		return hiSplit;
+	}
+
+	@Override
+	public Class<? extends HadoopInputSplit> getInputSplitType() {
+		return HadoopInputSplit.class;
+	}
+
+	@Override
+	public void open(HadoopInputSplit split) throws IOException {
+		this.recordReader = this.hadoopInputFormat.getRecordReader(split.getHadoopInputSplit(), jobConf, new HadoopDummyReporter());
+		key = this.recordReader.createKey();
+		value = this.recordReader.createValue();
+		this.fetched = false;
+	}
+
+	private void fetchNext() throws IOException {
+		hasNext = this.recordReader.next(key, value);
+		fetched = true;
+	}
+	
+	@Override
+	public boolean reachedEnd() throws IOException {
+		if(!fetched) {
+			fetchNext();
+		}
+		return !hasNext;
+	}
+
+	@Override
+	public Record nextRecord(Record record) throws IOException {
+		if(!fetched) {
+			fetchNext();
+		}
+		if(!hasNext) {
+			return null;
+		}
+		converter.convert(record, key, value);
+		fetched = false;
+		return record;
+	}
+
+	@Override
+	public void close() throws IOException {
+		this.recordReader.close();
+	}
+	
+	/**
+	 * Custom serialization methods.
+	 *  @see http://docs.oracle.com/javase/7/docs/api/java/io/Serializable.html
+	 */
+	private void writeObject(ObjectOutputStream out) throws IOException {
+		out.writeUTF(hadoopInputFormatName);
+		jobConf.write(out);
+		out.writeObject(converter);
+	}
+
+	@SuppressWarnings("unchecked")
+	private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
+		hadoopInputFormatName = in.readUTF();
+		if(jobConf == null) {
+			jobConf = new JobConf();
+		}
+		jobConf.readFields(in);
+		try {
+			this.hadoopInputFormat = (org.apache.hadoop.mapred.InputFormat<K,V>) Class.forName(this.hadoopInputFormatName).newInstance();
+		} catch (Exception e) {
+			throw new RuntimeException("Unable to instantiate the hadoop input format", e);
+		}
+		ReflectionUtils.setConf(hadoopInputFormat, jobConf);
+		converter = (HadoopTypeConverter<K,V>) in.readObject();
+	}
+	
+	public void setJobConf(JobConf job) {
+		this.jobConf = job;
+	}
+		
+
+	public org.apache.hadoop.mapred.InputFormat<K,V> getHadoopInputFormat() {
+		return hadoopInputFormat;
+	}
+	
+	public void setHadoopInputFormat(org.apache.hadoop.mapred.InputFormat<K,V> hadoopInputFormat) {
+		this.hadoopInputFormat = hadoopInputFormat;
+	}
+	
+	public JobConf getJobConf() {
+		return jobConf;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopRecordOutputFormat.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopRecordOutputFormat.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopRecordOutputFormat.java
new file mode 100644
index 0000000..edd02a1
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/HadoopRecordOutputFormat.java
@@ -0,0 +1,151 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2014 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred.record;
+
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.TaskAttemptID;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import eu.stratosphere.api.common.io.OutputFormat;
+import eu.stratosphere.configuration.Configuration;
+import eu.stratosphere.hadoopcompatibility.mapred.record.datatypes.HadoopFileOutputCommitter;
+import eu.stratosphere.hadoopcompatibility.mapred.record.datatypes.StratosphereTypeConverter;
+import eu.stratosphere.hadoopcompatibility.mapred.utils.HadoopUtils;
+import eu.stratosphere.hadoopcompatibility.mapred.wrapper.HadoopDummyProgressable;
+import eu.stratosphere.hadoopcompatibility.mapred.wrapper.HadoopDummyReporter;
+import eu.stratosphere.types.Record;
+
+
+public class HadoopRecordOutputFormat<K,V> implements OutputFormat<Record> {
+
+	private static final long serialVersionUID = 1L;
+
+	public JobConf jobConf;
+
+	public org.apache.hadoop.mapred.OutputFormat<K,V> hadoopOutputFormat;
+
+	private String hadoopOutputFormatName;
+
+	public RecordWriter<K,V> recordWriter;
+
+	public StratosphereTypeConverter<K,V> converter;
+
+	public HadoopFileOutputCommitter fileOutputCommitterWrapper;
+
+	public HadoopRecordOutputFormat(org.apache.hadoop.mapred.OutputFormat<K,V> hadoopFormat, JobConf job, StratosphereTypeConverter<K,V> conv) {
+		super();
+		this.hadoopOutputFormat = hadoopFormat;
+		this.hadoopOutputFormatName = hadoopFormat.getClass().getName();
+		this.converter = conv;
+		this.fileOutputCommitterWrapper = new HadoopFileOutputCommitter();
+		HadoopUtils.mergeHadoopConf(job);
+		this.jobConf = job;
+	}
+
+	@Override
+	public void configure(Configuration parameters) {
+	}
+
+	/**
+	 * create the temporary output file for hadoop RecordWriter.
+	 * @param taskNumber The number of the parallel instance.
+	 * @param numTasks The number of parallel tasks.
+	 * @throws IOException
+	 */
+	@Override
+	public void open(int taskNumber, int numTasks) throws IOException {
+		this.fileOutputCommitterWrapper.setupJob(this.jobConf);
+		if (Integer.toString(taskNumber + 1).length() <= 6) {
+			this.jobConf.set("mapred.task.id", "attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s"," ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0");
+			//compatible for hadoop 2.2.0, the temporary output directory is different from hadoop 1.2.1
+			this.jobConf.set("mapreduce.task.output.dir", this.fileOutputCommitterWrapper.getTempTaskOutputPath(this.jobConf,TaskAttemptID.forName(this.jobConf.get("mapred.task.id"))).toString());
+		} else {
+			throw new IOException("task id too large");
+		}
+		this.recordWriter = this.hadoopOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
+	}
+
+
+	@Override
+	public void writeRecord(Record record) throws IOException {
+		K key = this.converter.convertKey(record);
+		V value = this.converter.convertValue(record);
+		this.recordWriter.write(key, value);
+	}
+
+	/**
+	 * commit the task by moving the output file out from the temporary directory.
+	 * @throws IOException
+	 */
+	@Override
+	public void close() throws IOException {
+		this.recordWriter.close(new HadoopDummyReporter());
+		if (this.fileOutputCommitterWrapper.needsTaskCommit(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id")))) {
+			this.fileOutputCommitterWrapper.commitTask(this.jobConf, TaskAttemptID.forName(this.jobConf.get("mapred.task.id")));
+		}
+	//TODO: commitjob when all the tasks are finished
+	}
+
+
+	/**
+	 * Custom serialization methods.
+	 *  @see http://docs.oracle.com/javase/7/docs/api/java/io/Serializable.html
+	 */
+	private void writeObject(ObjectOutputStream out) throws IOException {
+		out.writeUTF(hadoopOutputFormatName);
+		jobConf.write(out);
+		out.writeObject(converter);
+		out.writeObject(fileOutputCommitterWrapper);
+	}
+
+	@SuppressWarnings("unchecked")
+	private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
+		hadoopOutputFormatName = in.readUTF();
+		if(jobConf == null) {
+			jobConf = new JobConf();
+		}
+		jobConf.readFields(in);
+		try {
+			this.hadoopOutputFormat = (org.apache.hadoop.mapred.OutputFormat<K,V>) Class.forName(this.hadoopOutputFormatName).newInstance();
+		} catch (Exception e) {
+			throw new RuntimeException("Unable to instantiate the hadoop output format", e);
+		}
+		ReflectionUtils.setConf(hadoopOutputFormat, jobConf);
+		converter = (StratosphereTypeConverter<K,V>) in.readObject();
+		fileOutputCommitterWrapper = (HadoopFileOutputCommitter) in.readObject();
+	}
+
+
+	public void setJobConf(JobConf job) {
+		this.jobConf = job;
+	}
+
+	public JobConf getJobConf() {
+		return jobConf;
+	}
+
+	public org.apache.hadoop.mapred.OutputFormat<K,V> getHadoopOutputFormat() {
+		return hadoopOutputFormat;
+	}
+
+	public void setHadoopOutputFormat(org.apache.hadoop.mapred.OutputFormat<K,V> hadoopOutputFormat) {
+		this.hadoopOutputFormat = hadoopOutputFormat;
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/DefaultHadoopTypeConverter.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/DefaultHadoopTypeConverter.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/DefaultHadoopTypeConverter.java
new file mode 100644
index 0000000..3832772
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/DefaultHadoopTypeConverter.java
@@ -0,0 +1,78 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred.record.datatypes;
+
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+
+import eu.stratosphere.types.BooleanValue;
+import eu.stratosphere.types.ByteValue;
+import eu.stratosphere.types.DoubleValue;
+import eu.stratosphere.types.FloatValue;
+import eu.stratosphere.types.IntValue;
+import eu.stratosphere.types.LongValue;
+import eu.stratosphere.types.NullValue;
+import eu.stratosphere.types.Record;
+import eu.stratosphere.types.StringValue;
+import eu.stratosphere.types.Value;
+
+
+/**
+ * Converter for the default hadoop writables.
+ * Key will be in field 0, Value in field 1 of a Stratosphere Record.
+ */
+public class DefaultHadoopTypeConverter<K, V> implements HadoopTypeConverter<K, V> {
+	private static final long serialVersionUID = 1L;
+
+	@Override
+	public void convert(Record stratosphereRecord, K hadoopKey, V hadoopValue) {
+		stratosphereRecord.setField(0, convert(hadoopKey));
+		stratosphereRecord.setField(1, convert(hadoopValue));
+	}
+	
+	protected Value convert(Object hadoopType) {
+		if(hadoopType instanceof org.apache.hadoop.io.LongWritable ) {
+			return new LongValue(((LongWritable)hadoopType).get());
+		}
+		if(hadoopType instanceof org.apache.hadoop.io.Text) {
+			return new StringValue(((Text)hadoopType).toString());
+		}
+		if(hadoopType instanceof org.apache.hadoop.io.IntWritable) {
+			return new IntValue(((IntWritable)hadoopType).get());
+		}
+		if(hadoopType instanceof org.apache.hadoop.io.FloatWritable) {
+			return new FloatValue(((FloatWritable)hadoopType).get());
+		}
+		if(hadoopType instanceof org.apache.hadoop.io.DoubleWritable) {
+			return new DoubleValue(((DoubleWritable)hadoopType).get());
+		}
+		if(hadoopType instanceof org.apache.hadoop.io.BooleanWritable) {
+			return new BooleanValue(((BooleanWritable)hadoopType).get());
+		}
+		if(hadoopType instanceof org.apache.hadoop.io.ByteWritable) {
+			return new ByteValue(((ByteWritable)hadoopType).get());
+		}
+		if (hadoopType instanceof NullWritable) {
+			return NullValue.getInstance();
+		}
+		
+		throw new RuntimeException("Unable to convert Hadoop type ("+hadoopType.getClass().getCanonicalName()+") to Stratosphere.");
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/DefaultStratosphereTypeConverter.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/DefaultStratosphereTypeConverter.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/DefaultStratosphereTypeConverter.java
new file mode 100644
index 0000000..a1850cc
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/DefaultStratosphereTypeConverter.java
@@ -0,0 +1,91 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred.record.datatypes;
+
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+
+import eu.stratosphere.types.BooleanValue;
+import eu.stratosphere.types.ByteValue;
+import eu.stratosphere.types.DoubleValue;
+import eu.stratosphere.types.FloatValue;
+import eu.stratosphere.types.IntValue;
+import eu.stratosphere.types.LongValue;
+import eu.stratosphere.types.Record;
+import eu.stratosphere.types.StringValue;
+
+/**
+ * Converter Stratosphere Record into the default hadoop writables.
+ *
+ */
+public class DefaultStratosphereTypeConverter<K,V> implements StratosphereTypeConverter<K,V> {
+	private static final long serialVersionUID = 1L;
+
+	private Class<K> keyClass;
+	private Class<V> valueClass;
+
+	public DefaultStratosphereTypeConverter(Class<K> keyClass, Class<V> valueClass) {
+		this.keyClass= keyClass;
+		this.valueClass = valueClass;
+	}
+	@Override
+	public K convertKey(Record stratosphereRecord) {
+		if(stratosphereRecord.getNumFields() > 0) {
+			return convert(stratosphereRecord, 0, this.keyClass);
+		} else {
+			return null;
+		}
+	}
+
+	@Override
+	public V convertValue(Record stratosphereRecord) {
+		if(stratosphereRecord.getNumFields() > 1) {
+			return convert(stratosphereRecord, 1, this.valueClass);
+		} else {
+			return null;
+		}
+	}
+
+	@SuppressWarnings("unchecked")
+	private<T> T convert(Record stratosphereType, int pos, Class<T> hadoopType) {
+		if(hadoopType == LongWritable.class ) {
+			return (T) new LongWritable((stratosphereType.getField(pos, LongValue.class)).getValue());
+		}
+		if(hadoopType == org.apache.hadoop.io.Text.class) {
+			return (T) new Text((stratosphereType.getField(pos, StringValue.class)).getValue());
+		}
+		if(hadoopType == org.apache.hadoop.io.IntWritable.class) {
+			return (T) new IntWritable((stratosphereType.getField(pos, IntValue.class)).getValue());
+		}
+		if(hadoopType == org.apache.hadoop.io.FloatWritable.class) {
+			return (T) new FloatWritable((stratosphereType.getField(pos, FloatValue.class)).getValue());
+		}
+		if(hadoopType == org.apache.hadoop.io.DoubleWritable.class) {
+			return (T) new DoubleWritable((stratosphereType.getField(pos, DoubleValue.class)).getValue());
+		}
+		if(hadoopType == org.apache.hadoop.io.BooleanWritable.class) {
+			return (T) new BooleanWritable((stratosphereType.getField(pos, BooleanValue.class)).getValue());
+		}
+		if(hadoopType == org.apache.hadoop.io.ByteWritable.class) {
+			return (T) new ByteWritable((stratosphereType.getField(pos, ByteValue.class)).getValue());
+		}
+
+		throw new RuntimeException("Unable to convert Stratosphere type ("+stratosphereType.getClass().getCanonicalName()+") to Hadoop.");
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/HadoopFileOutputCommitter.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/HadoopFileOutputCommitter.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/HadoopFileOutputCommitter.java
new file mode 100644
index 0000000..8f46c00
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/HadoopFileOutputCommitter.java
@@ -0,0 +1,191 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred.record.datatypes;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.net.URI;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileOutputCommitter;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.TaskAttemptID;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Hadoop 1.2.1 {@link org.apache.hadoop.mapred.FileOutputCommitter} takes {@link org.apache.hadoop.mapred.JobContext}
+ * as input parameter. However JobContext class is package private, and in Hadoop 2.2.0 it's public.
+ * This class takes {@link org.apache.hadoop.mapred.JobConf} as input instead of JobContext in order to setup and commit tasks.
+ */
+public class HadoopFileOutputCommitter extends FileOutputCommitter implements Serializable {
+
+	private static final long serialVersionUID = 1L;
+	
+	static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER =
+		"mapreduce.fileoutputcommitter.marksuccessfuljobs";
+
+	public void setupJob(JobConf conf) throws IOException {
+		Path outputPath = FileOutputFormat.getOutputPath(conf);
+		if (outputPath != null) {
+			Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
+			FileSystem fileSys = tmpDir.getFileSystem(conf);
+			if (!fileSys.mkdirs(tmpDir)) {
+				LOG.error("Mkdirs failed to create " + tmpDir.toString());
+			}
+		}
+	}
+
+	private static boolean getOutputDirMarking(JobConf conf) {
+		return conf.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER,
+			true);
+	}
+
+	private void markSuccessfulOutputDir(JobConf conf)
+		throws IOException {
+		Path outputPath = FileOutputFormat.getOutputPath(conf);
+		if (outputPath != null) {
+			FileSystem fileSys = outputPath.getFileSystem(conf);
+			// create a file in the folder to mark it
+			if (fileSys.exists(outputPath)) {
+				Path filePath = new Path(outputPath, SUCCEEDED_FILE_NAME);
+				fileSys.create(filePath).close();
+			}
+		}
+	}
+
+	private Path getFinalPath(Path jobOutputDir, Path taskOutput,
+							Path taskOutputPath) throws IOException {
+		URI taskOutputUri = taskOutput.toUri();
+		URI relativePath = taskOutputPath.toUri().relativize(taskOutputUri);
+		if (taskOutputUri == relativePath) {//taskOutputPath is not a parent of taskOutput
+			throw new IOException("Can not get the relative path: base = " +
+				taskOutputPath + " child = " + taskOutput);
+		}
+		if (relativePath.getPath().length() > 0) {
+			return new Path(jobOutputDir, relativePath.getPath());
+		} else {
+			return jobOutputDir;
+		}
+	}
+	private void moveTaskOutputs(JobConf conf, TaskAttemptID taskAttemptID,
+								FileSystem fs,
+								Path jobOutputDir,
+								Path taskOutput)
+		throws IOException {
+		if (fs.isFile(taskOutput)) {
+			Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput,
+				getTempTaskOutputPath(conf, taskAttemptID));
+			if (!fs.rename(taskOutput, finalOutputPath)) {
+				if (!fs.delete(finalOutputPath, true)) {
+					throw new IOException("Failed to delete earlier output of task: " +
+						taskAttemptID);
+				}
+				if (!fs.rename(taskOutput, finalOutputPath)) {
+					throw new IOException("Failed to save output of task: " +
+						taskAttemptID);
+				}
+			}
+			LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
+		} else if(fs.getFileStatus(taskOutput).isDir()) {
+			FileStatus[] paths = fs.listStatus(taskOutput);
+			Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput,
+				getTempTaskOutputPath(conf, taskAttemptID));
+			fs.mkdirs(finalOutputPath);
+			if (paths != null) {
+				for (FileStatus path : paths) {
+					moveTaskOutputs(conf,taskAttemptID, fs, jobOutputDir, path.getPath());
+				}
+			}
+		}
+	}
+
+	public void commitTask(JobConf conf, TaskAttemptID taskAttemptID)
+		throws IOException {
+		Path taskOutputPath = getTempTaskOutputPath(conf, taskAttemptID);
+		if (taskOutputPath != null) {
+			FileSystem fs = taskOutputPath.getFileSystem(conf);
+			if (fs.exists(taskOutputPath)) {
+				Path jobOutputPath = taskOutputPath.getParent().getParent();
+				// Move the task outputs to their final place
+				moveTaskOutputs(conf,taskAttemptID, fs, jobOutputPath, taskOutputPath);
+				// Delete the temporary task-specific output directory
+				if (!fs.delete(taskOutputPath, true)) {
+					LOG.info("Failed to delete the temporary output" +
+						" directory of task: " + taskAttemptID + " - " + taskOutputPath);
+				}
+				LOG.info("Saved output of task '" + taskAttemptID + "' to " +
+					jobOutputPath);
+			}
+		}
+	}
+	public boolean needsTaskCommit(JobConf conf, TaskAttemptID taskAttemptID)
+		throws IOException {
+		try {
+			Path taskOutputPath = getTempTaskOutputPath(conf, taskAttemptID);
+			if (taskOutputPath != null) {
+				// Get the file-system for the task output directory
+				FileSystem fs = taskOutputPath.getFileSystem(conf);
+				// since task output path is created on demand,
+				// if it exists, task needs a commit
+				if (fs.exists(taskOutputPath)) {
+					return true;
+				}
+			}
+		} catch (IOException  ioe) {
+			throw ioe;
+		}
+		return false;
+	}
+
+	public Path getTempTaskOutputPath(JobConf conf, TaskAttemptID taskAttemptID) {
+		Path outputPath = FileOutputFormat.getOutputPath(conf);
+		if (outputPath != null) {
+			Path p = new Path(outputPath,
+				(FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR +
+					"_" + taskAttemptID.toString()));
+			try {
+				FileSystem fs = p.getFileSystem(conf);
+				return p.makeQualified(fs);
+			} catch (IOException ie) {
+				LOG.warn(StringUtils.stringifyException(ie));
+				return p;
+			}
+		}
+		return null;
+	}
+	public void cleanupJob(JobConf conf) throws IOException {
+		// do the clean up of temporary directory
+		Path outputPath = FileOutputFormat.getOutputPath(conf);
+		if (outputPath != null) {
+			Path tmpDir = new Path(outputPath, FileOutputCommitter.TEMP_DIR_NAME);
+			FileSystem fileSys = tmpDir.getFileSystem(conf);
+			if (fileSys.exists(tmpDir)) {
+				fileSys.delete(tmpDir, true);
+			}
+		} else {
+			LOG.warn("Output path is null in cleanup");
+		}
+	}
+
+	public void commitJob(JobConf conf) throws IOException {
+		cleanupJob(conf);
+		if (getOutputDirMarking(conf)) {
+			markSuccessfulOutputDir(conf);
+		}
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/HadoopTypeConverter.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/HadoopTypeConverter.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/HadoopTypeConverter.java
new file mode 100644
index 0000000..83c14e6
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/HadoopTypeConverter.java
@@ -0,0 +1,36 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred.record.datatypes;
+
+import java.io.Serializable;
+
+import eu.stratosphere.types.Record;
+
+
+/**
+ * An interface describing a class that is able to 
+ * convert Hadoop types into Stratosphere's Record model.
+ * 
+ * The converter must be Serializable.
+ * 
+ * Stratosphere provides a DefaultHadoopTypeConverter. Custom implementations should
+ * chain the type converters.
+ */
+public interface HadoopTypeConverter<K, V> extends Serializable {
+	
+	/**
+	 * Convert a Hadoop type to a Stratosphere type.
+	 */
+	public void convert(Record stratosphereRecord, K hadoopKey, V hadoopValue);
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/StratosphereTypeConverter.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/StratosphereTypeConverter.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/StratosphereTypeConverter.java
new file mode 100644
index 0000000..27d710d
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/StratosphereTypeConverter.java
@@ -0,0 +1,37 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred.record.datatypes;
+
+import java.io.Serializable;
+
+import eu.stratosphere.types.Record;
+
+/**
+ * An interface describing a class that is able to
+ * convert Stratosphere's Record into Hadoop types model.
+ *
+ * The converter must be Serializable.
+ *
+ * Stratosphere provides a DefaultStratosphereTypeConverter. Custom implementations should
+ * chain the type converters.
+ */
+public interface StratosphereTypeConverter<K,V> extends Serializable {
+
+	/**
+	 * Convert a Stratosphere type to a Hadoop type.
+	 */
+	public K convertKey(Record stratosphereRecord);
+
+	public V convertValue(Record stratosphereRecord);
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/WritableComparableWrapper.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/WritableComparableWrapper.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/WritableComparableWrapper.java
new file mode 100644
index 0000000..767f539
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/WritableComparableWrapper.java
@@ -0,0 +1,35 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred.record.datatypes;
+
+import org.apache.hadoop.io.WritableComparable;
+
+import eu.stratosphere.types.Key;
+
+public class WritableComparableWrapper<T extends WritableComparable<T>> extends WritableWrapper<T> implements Key<WritableComparableWrapper<T>> {
+	private static final long serialVersionUID = 1L;
+	
+	public WritableComparableWrapper() {
+		super();
+	}
+	
+	public WritableComparableWrapper(T toWrap) {
+		super(toWrap);
+	}
+
+	@Override
+	public int compareTo(WritableComparableWrapper<T> o) {
+		return super.value().compareTo(o.value());
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/WritableWrapper.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/WritableWrapper.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/WritableWrapper.java
new file mode 100644
index 0000000..d74eb74
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/WritableWrapper.java
@@ -0,0 +1,66 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred.record.datatypes;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+
+import eu.stratosphere.types.Value;
+import eu.stratosphere.util.InstantiationUtil;
+
+public class WritableWrapper<T extends Writable> implements Value {
+	private static final long serialVersionUID = 2L;
+	
+	private T wrapped;
+	private String wrappedType;
+	private ClassLoader cl;
+	
+	public WritableWrapper() {
+	}
+	
+	public WritableWrapper(T toWrap) {
+		wrapped = toWrap;
+		wrappedType = toWrap.getClass().getCanonicalName();
+	}
+
+	public T value() {
+		return wrapped;
+	}
+	
+	@Override
+	public void write(DataOutput out) throws IOException {
+		out.writeUTF(wrappedType);
+		wrapped.write(out);
+	}
+
+	@Override
+	public void read(DataInput in) throws IOException {
+		if(cl == null) {
+			cl = Thread.currentThread().getContextClassLoader();
+		}
+		wrappedType = in.readUTF();
+		try {
+			@SuppressWarnings("unchecked")
+			Class<T> wrClass = (Class<T>) Class.forName(wrappedType, true, cl).asSubclass(Writable.class);
+			wrapped = InstantiationUtil.instantiate(wrClass, Writable.class);
+		} catch (ClassNotFoundException e) {
+			throw new RuntimeException("Error creating the WritableWrapper", e);
+		}
+		wrapped.readFields(in);
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/WritableWrapperConverter.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/WritableWrapperConverter.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/WritableWrapperConverter.java
new file mode 100644
index 0000000..2a42c51
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/datatypes/WritableWrapperConverter.java
@@ -0,0 +1,40 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred.record.datatypes;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+
+import eu.stratosphere.types.Record;
+import eu.stratosphere.types.Value;
+
+@SuppressWarnings("rawtypes")
+public class WritableWrapperConverter<K extends WritableComparable, V extends Writable> implements HadoopTypeConverter<K,V> {
+	private static final long serialVersionUID = 1L;
+
+	@Override
+	public void convert(Record stratosphereRecord, K hadoopKey, V hadoopValue) {
+		stratosphereRecord.setField(0, convertKey(hadoopKey));
+		stratosphereRecord.setField(1, convertValue(hadoopValue));
+	}
+	
+	@SuppressWarnings("unchecked")
+	private final Value convertKey(K in) {
+		return new WritableComparableWrapper(in);
+	}
+	
+	private final Value convertValue(V in) {
+		return new WritableWrapper<V>(in);
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-flink/blob/a65b7591/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/example/WordCount.java
----------------------------------------------------------------------
diff --git a/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/example/WordCount.java b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/example/WordCount.java
new file mode 100644
index 0000000..e135ed5
--- /dev/null
+++ b/stratosphere-addons/hadoop-compatibility/src/main/java/eu/stratosphere/hadoopcompatibility/mapred/record/example/WordCount.java
@@ -0,0 +1,179 @@
+/***********************************************************************************************************************
+ * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ **********************************************************************************************************************/
+
+package eu.stratosphere.hadoopcompatibility.mapred.record.example;
+
+import java.io.Serializable;
+import java.util.Iterator;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.TextInputFormat;
+
+import eu.stratosphere.api.common.Plan;
+import eu.stratosphere.api.common.Program;
+import eu.stratosphere.api.common.ProgramDescription;
+import eu.stratosphere.api.java.record.functions.FunctionAnnotation.ConstantFields;
+import eu.stratosphere.api.java.record.functions.MapFunction;
+import eu.stratosphere.api.java.record.functions.ReduceFunction;
+import eu.stratosphere.api.java.record.io.CsvOutputFormat;
+import eu.stratosphere.api.java.record.operators.FileDataSink;
+import eu.stratosphere.api.java.record.operators.MapOperator;
+import eu.stratosphere.api.java.record.operators.ReduceOperator;
+import eu.stratosphere.api.java.record.operators.ReduceOperator.Combinable;
+import eu.stratosphere.client.LocalExecutor;
+import eu.stratosphere.hadoopcompatibility.mapred.record.HadoopDataSource;
+import eu.stratosphere.hadoopcompatibility.mapred.record.datatypes.WritableWrapperConverter;
+import eu.stratosphere.types.IntValue;
+import eu.stratosphere.types.Record;
+import eu.stratosphere.types.StringValue;
+import eu.stratosphere.util.Collector;
+
+/**
+ * Implements a word count which takes the input file and counts the number of
+ * the occurrences of each word in the file. 
+ * 
+ * <br /><br />
+ * 
+ * <b>Note</b>: This example uses the out dated Record API.
+ * It is recommended to use the new Java API.
+ * 
+ * @see eu.stratosphere.hadoopcompatibility.mapred.record.example.example.WordCount
+ */
+public class WordCount implements Program, ProgramDescription {
+	
+	private static final long serialVersionUID = 1L;
+
+
+	/**
+	 * Converts a Record containing one string in to multiple string/integer pairs.
+	 * The string is tokenized by whitespaces. For each token a new record is emitted,
+	 * where the token is the first field and an Integer(1) is the second field.
+	 */
+	public static class TokenizeLine extends MapFunction implements Serializable {
+		private static final long serialVersionUID = 1L;
+		
+		@Override
+		public void map(Record record, Collector<Record> collector) {
+			// get the first field (as type StringValue) from the record
+			String line = record.getField(1, StringValue.class).getValue();
+			// normalize the line
+			line = line.replaceAll("\\W+", " ").toLowerCase();
+			
+			// tokenize the line
+			StringTokenizer tokenizer = new StringTokenizer(line);
+			while (tokenizer.hasMoreTokens()) {
+				String word = tokenizer.nextToken();
+				
+				// we emit a (word, 1) pair 
+				collector.collect(new Record(new StringValue(word), new IntValue(1)));
+			}
+		}
+	}
+
+	/**
+	 * Sums up the counts for a certain given key. The counts are assumed to be at position <code>1</code>
+	 * in the record. The other fields are not modified.
+	 */
+	@Combinable
+	@ConstantFields(0)
+	public static class CountWords extends ReduceFunction implements Serializable {
+		
+		private static final long serialVersionUID = 1L;
+		
+		@Override
+		public void reduce(Iterator<Record> records, Collector<Record> out) throws Exception {
+			Record element = null;
+			int sum = 0;
+			while (records.hasNext()) {
+				element = records.next();
+				int cnt = element.getField(1, IntValue.class).getValue();
+				sum += cnt;
+			}
+
+			element.setField(1, new IntValue(sum));
+			out.collect(element);
+		}
+		
+		@Override
+		public void combine(Iterator<Record> records, Collector<Record> out) throws Exception {
+			// the logic is the same as in the reduce function, so simply call the reduce method
+			reduce(records, out);
+		}
+	}
+
+
+	@SuppressWarnings({ "rawtypes", "unchecked", "unused" })
+	@Override
+	public Plan getPlan(String... args) {
+		// parse job parameters
+		int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
+		String dataInput = (args.length > 1 ? args[1] : "");
+		String output    = (args.length > 2 ? args[2] : "");
+		
+		
+		HadoopDataSource source = new HadoopDataSource(new TextInputFormat(), new JobConf(), "Input Lines");
+		TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));
+		
+		// Example with Wrapper Converter
+		HadoopDataSource<LongWritable,Text> sourceHadoopType = new HadoopDataSource<LongWritable, Text>(
+				new TextInputFormat(), new JobConf(), "Input Lines", new WritableWrapperConverter<LongWritable, Text>());
+		TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));
+		
+		MapOperator mapper = MapOperator.builder(new TokenizeLine())
+			.input(source)
+			.name("Tokenize Lines")
+			.build();
+		ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
+			.input(mapper)
+			.name("Count Words")
+			.build();
+		FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, reducer, "Word Counts");
+		CsvOutputFormat.configureRecordFormat(out)
+			.recordDelimiter('\n')
+			.fieldDelimiter(' ')
+			.field(StringValue.class, 0)
+			.field(IntValue.class, 1);
+		
+		Plan plan = new Plan(out, "WordCount Example");
+		plan.setDefaultParallelism(numSubTasks);
+		return plan;
+	}
+
+
+	@Override
+	public String getDescription() {
+		return "Parameters: [numSubStasks] [input] [output]";
+	}
+
+	
+	public static void main(String[] args) throws Exception {
+		WordCount wc = new WordCount();
+		
+		if (args.length < 3) {
+			System.err.println(wc.getDescription());
+			System.exit(1);
+		}
+		
+		Plan plan = wc.getPlan(args);
+		
+		// This will execute the word-count embedded in a local context. replace this line by the commented
+		// succeeding line to send the job to a local installation or to a cluster for execution
+		LocalExecutor.execute(plan);
+//		PlanExecutor ex = new RemoteExecutor("localhost", 6123, "target/pact-examples-0.4-SNAPSHOT-WordCount.jar");
+//		ex.executePlan(plan);
+	}
+}


Mime
View raw message