jena-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rve...@apache.org
Subject [36/50] [abbrv] Get Jena Hadoop RDF Tools building
Date Mon, 20 Oct 2014 14:47:58 GMT
http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/BlockedNQuadsReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/BlockedNQuadsReader.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/BlockedNQuadsReader.java
new file mode 100644
index 0000000..57fc262
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/BlockedNQuadsReader.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.readers;
+
+import org.apache.jena.riot.Lang;
+
+/**
+ * A record record for NQuads
+ * <p>
+ * This is a hybrid of the {@link NQuadsReader} and the
+ * {@link WholeFileNQuadsReader} in that it does not process individual lines
+ * rather it processes the inputs in blocks of lines parsing the whole block
+ * rather than individual lines. This provides a compromise between the higher
+ * parser setup of creating more parsers and the benefit of being able to split
+ * input files over multiple mappers.
+ * </p>
+ * 
+ * 
+ * 
+ */
+public class BlockedNQuadsReader extends AbstractBlockBasedQuadReader {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.NQUADS;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/BlockedNTriplesReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/BlockedNTriplesReader.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/BlockedNTriplesReader.java
new file mode 100644
index 0000000..9d4efcd
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/BlockedNTriplesReader.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.readers;
+
+import org.apache.jena.riot.Lang;
+
+/**
+ * A record record for NTriples
+ * <p>
+ * This is a hybrid of the {@link NTriplesReader} and the
+ * {@link WholeFileNTriplesReader} in that it does not process individual lines
+ * rather it processes the inputs in blocks of lines parsing the whole block
+ * rather than individual lines. This provides a compromise between the higher
+ * parser setup of creating more parsers and the benefit of being able to split
+ * input files over multiple mappers.
+ * </p>
+ * 
+ * 
+ * 
+ */
+public class BlockedNTriplesReader extends AbstractBlockBasedTripleReader {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.NTRIPLES;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/NQuadsReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/NQuadsReader.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/NQuadsReader.java
new file mode 100644
index 0000000..5c13bc2
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/NQuadsReader.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.readers;
+
+import java.util.Iterator;
+
+import org.apache.jena.riot.lang.LangNQuads;
+import org.apache.jena.riot.system.ParserProfile;
+import org.apache.jena.riot.tokens.Tokenizer;
+import org.apache.jena.riot.tokens.TokenizerFactory;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A record reader for NQuads
+ * 
+ * 
+ * 
+ */
+public class NQuadsReader extends AbstractLineBasedQuadReader {
+
+    @Override
+    protected Tokenizer getTokenizer(String line) {
+        return TokenizerFactory.makeTokenizerString(line);
+    }
+
+    @Override
+    protected Iterator<Quad> getQuadsIterator(Tokenizer tokenizer, ParserProfile profile) {
+        return new LangNQuads(tokenizer, profile, null);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/NTriplesReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/NTriplesReader.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/NTriplesReader.java
new file mode 100644
index 0000000..bc46a8f
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/NTriplesReader.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.readers;
+
+import java.util.Iterator;
+
+import org.apache.jena.riot.lang.LangNTriples;
+import org.apache.jena.riot.system.ParserProfile;
+import org.apache.jena.riot.tokens.Tokenizer;
+import org.apache.jena.riot.tokens.TokenizerFactory;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * A record reader for NTriples
+ * 
+ * 
+ * 
+ */
+public class NTriplesReader extends AbstractLineBasedTripleReader {
+
+    @Override
+    protected Iterator<Triple> getTriplesIterator(Tokenizer tokenizer, ParserProfile profile) {
+        return new LangNTriples(tokenizer, profile, null);
+    }
+
+    @Override
+    protected Tokenizer getTokenizer(String line) {
+        return TokenizerFactory.makeTokenizerString(line);
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/QuadsReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/QuadsReader.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/QuadsReader.java
new file mode 100644
index 0000000..d198997
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/QuadsReader.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.readers;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFLanguages;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A record reader that reads triples from any RDF quads format
+ * 
+ * 
+ * 
+ */
+public class QuadsReader extends AbstractRdfReader<Quad, QuadWritable> {
+
+    @Override
+    protected RecordReader<LongWritable, QuadWritable> selectRecordReader(Lang lang) throws IOException {
+        if (!RDFLanguages.isQuads(lang))
+            throw new IOException(lang.getLabel() + " is not a RDF quads format, perhaps you wanted TriplesInputFormat or TriplesOrQuadsInputFormat instead?");
+
+        if (lang.equals(Lang.NQ) || lang.equals(Lang.NQUADS)) {
+            return new WholeFileNQuadsReader();
+        } else if (lang.equals(Lang.TRIG)) {
+            return new TriGReader();
+        }
+        throw new IOException(lang.getLabel() + " has no associated RecordReader implementation");
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/RdfJsonReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/RdfJsonReader.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/RdfJsonReader.java
new file mode 100644
index 0000000..7a8f1d3
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/RdfJsonReader.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.readers;
+
+import org.apache.jena.riot.Lang;
+
+/**
+ * A record reader for RDF/JSON files
+ * 
+ * 
+ * 
+ */
+public class RdfJsonReader extends AbstractWholeFileTripleReader {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.RDFJSON;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/RdfXmlReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/RdfXmlReader.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/RdfXmlReader.java
new file mode 100644
index 0000000..7ff4ab2
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/RdfXmlReader.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.readers;
+
+import org.apache.jena.riot.Lang;
+
+/**
+ * A record reader for RDF/XML files
+ * 
+ * 
+ * 
+ */
+public class RdfXmlReader extends AbstractWholeFileTripleReader {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.RDFXML;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriGReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriGReader.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriGReader.java
new file mode 100644
index 0000000..cccf6ea
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriGReader.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.readers;
+
+import org.apache.jena.riot.Lang;
+
+/**
+ * A record reader for TriG files
+ * 
+ * 
+ * 
+ */
+public class TriGReader extends AbstractWholeFileQuadReader {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.TRIG;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriplesOrQuadsReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriplesOrQuadsReader.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriplesOrQuadsReader.java
new file mode 100644
index 0000000..6cbd311
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriplesOrQuadsReader.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.readers;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFLanguages;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A record reader that reads RDF from any triples/quads format. Triples are
+ * converted into quads in the default graph. This behaviour can be changed by
+ * deriving from this class and overriding the {@link #getGraphNode()} method
+ * 
+ * 
+ * 
+ */
+@SuppressWarnings("javadoc")
+public class TriplesOrQuadsReader extends AbstractRdfReader<Quad, QuadWritable> {
+
+    @Override
+    protected RecordReader<LongWritable, QuadWritable> selectRecordReader(Lang lang) throws IOException {
+        if (!RDFLanguages.isQuads(lang) && !RDFLanguages.isTriples(lang))
+            throw new IOException(lang.getLabel() + " is not a RDF triples/quads format");
+
+        if (lang.equals(Lang.NQ) || lang.equals(Lang.NQUADS)) {
+            return new WholeFileNQuadsReader();
+        } else if (lang.equals(Lang.TRIG)) {
+            return new TriGReader();
+        } else if (lang.equals(Lang.NTRIPLES) || lang.equals(Lang.NT)) {
+            return new TriplesToQuadsReader(new WholeFileNTriplesReader());
+        } else if (lang.equals(Lang.TTL) || lang.equals(Lang.TURTLE) || lang.equals(Lang.N3)) {
+            return new TriplesToQuadsReader(new TurtleReader());
+        } else if (lang.equals(Lang.RDFXML)) {
+            return new TriplesToQuadsReader(new RdfXmlReader());
+        } else if (lang.equals(Lang.RDFJSON)) {
+            return new TriplesToQuadsReader(new RdfJsonReader());
+        }
+        throw new IOException(lang.getLabel() + " has no associated RecordReader implementation");
+    }
+
+    /**
+     * Gets the graph node which represents the graph into which triples will be
+     * indicated to belong to when they are converting into quads.
+     * <p>
+     * Defaults to {@link Quad#defaultGraphNodeGenerated} which represents the
+     * default graph
+     * </p>
+     * 
+     * @return Graph node
+     */
+    protected Node getGraphNode() {
+        return Quad.defaultGraphNodeGenerated;
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriplesReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriplesReader.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriplesReader.java
new file mode 100644
index 0000000..5a16de3
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriplesReader.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.readers;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFLanguages;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * A record reader that reads triples from any RDF triples format
+ * 
+ *
+ */
+public class TriplesReader extends AbstractRdfReader<Triple, TripleWritable> {
+
+    @Override
+    protected RecordReader<LongWritable, TripleWritable> selectRecordReader(Lang lang) throws IOException {
+        if (!RDFLanguages.isTriples(lang))
+            throw new IOException(lang.getLabel() + " is not a RDF triples format, perhaps you wanted QuadsInputFormat or TriplesOrQuadsInputFormat instead?");
+
+        if (lang.equals(Lang.NTRIPLES) || lang.equals(Lang.NT)) {
+            return new WholeFileNTriplesReader();
+        } else if (lang.equals(Lang.TTL) || lang.equals(Lang.TURTLE) || lang.equals(Lang.N3)) {
+            return new TurtleReader();
+        } else if (lang.equals(Lang.RDFXML)) {
+            return new RdfXmlReader();
+        } else if (lang.equals(Lang.RDFJSON)) {
+            return new RdfJsonReader();
+        }
+        throw new IOException(lang.getLabel() + " has no associated RecordReader implementation");
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriplesToQuadsReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriplesToQuadsReader.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriplesToQuadsReader.java
new file mode 100644
index 0000000..a388f0e
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TriplesToQuadsReader.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.readers;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A record reader that converts triples into quads by wrapping a
+ * {@code RecordReader<LongWritable, TripleWritable>} implementation
+ * 
+ * 
+ * 
+ */
+public class TriplesToQuadsReader extends RecordReader<LongWritable, QuadWritable> {
+
+    private final RecordReader<LongWritable, TripleWritable> reader;
+    private Node graph;
+
+    /**
+     * Creates a new reader
+     * 
+     * @param reader
+     *            Triple reader
+     */
+    public TriplesToQuadsReader(RecordReader<LongWritable, TripleWritable> reader) {
+        this(reader, Quad.defaultGraphNodeGenerated);
+    }
+
+    /**
+     * Creates a new reader
+     * 
+     * @param reader
+     *            Triple reader
+     * @param graphNode
+     *            Graph node
+     */
+    public TriplesToQuadsReader(RecordReader<LongWritable, TripleWritable> reader, Node graphNode) {
+        if (reader == null)
+            throw new NullPointerException("reader cannot be null");
+        if (graphNode == null)
+            throw new NullPointerException("Graph node cannot be null");
+        this.reader = reader;
+        this.graph = graphNode;
+    }
+
+    @Override
+    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
+        this.reader.initialize(split, context);
+    }
+
+    @Override
+    public final boolean nextKeyValue() throws IOException, InterruptedException {
+        return this.reader.nextKeyValue();
+    }
+
+    @Override
+    public final LongWritable getCurrentKey() throws IOException, InterruptedException {
+        return this.reader.getCurrentKey();
+    }
+
+    @Override
+    public final QuadWritable getCurrentValue() throws IOException, InterruptedException {
+        TripleWritable t = this.reader.getCurrentValue();
+        return new QuadWritable(new Quad(this.graph, t.get()));
+    }
+
+    @Override
+    public final float getProgress() throws IOException, InterruptedException {
+        return this.reader.getProgress();
+    }
+
+    @Override
+    public final void close() throws IOException {
+        this.reader.close();
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TurtleReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TurtleReader.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TurtleReader.java
new file mode 100644
index 0000000..766717f
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/TurtleReader.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.readers;
+
+import org.apache.jena.riot.Lang;
+
+/**
+ * A record reader for Turtle files
+ * 
+ * 
+ * 
+ */
+public class TurtleReader extends AbstractWholeFileTripleReader {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.TURTLE;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/WholeFileNQuadsReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/WholeFileNQuadsReader.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/WholeFileNQuadsReader.java
new file mode 100644
index 0000000..1df4a90
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/WholeFileNQuadsReader.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.readers;
+
+import org.apache.jena.riot.Lang;
+
+/**
+ * A record record for NQuads
+ * <p>
+ * Unlike the {@link NQuadsReader} this processes files as a whole rather than
+ * individual lines. This has the advantage of less parser setup overhead but
+ * the disadvantage that the input cannot be split between multiple mappers.
+ * </p>
+ * 
+ * 
+ * 
+ */
+public class WholeFileNQuadsReader extends AbstractWholeFileQuadReader {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.NQUADS;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/WholeFileNTriplesReader.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/WholeFileNTriplesReader.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/WholeFileNTriplesReader.java
new file mode 100644
index 0000000..d975c58
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/readers/WholeFileNTriplesReader.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.readers;
+
+import org.apache.jena.riot.Lang;
+
+/**
+ * A record record for NTriples
+ * <p>
+ * Unlike the {@link NTriplesReader} this processes files as a whole rather than
+ * individual lines. This has the advantage of less parser setup overhead but
+ * the disadvantage that the input cannot be split between multiple mappers.
+ * </p>
+ * 
+ * 
+ * 
+ */
+public class WholeFileNTriplesReader extends AbstractWholeFileTripleReader {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.NTRIPLES;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/BlockInputStream.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/BlockInputStream.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/BlockInputStream.java
new file mode 100644
index 0000000..a9e692e
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/BlockInputStream.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * A block input stream which can is a wrapper around another input stream which
+ * restricts reading to a specific number of bytes and can report the number of
+ * bytes read
+ * <p>
+ * The class assumes that the underlying input stream has already been seeked to
+ * the appropriate start point
+ * </p>
+ * 
+ * 
+ * 
+ */
+public final class BlockInputStream extends TrackedInputStream {
+
+    private long limit = Long.MAX_VALUE;
+
+    /**
+     * Creates a new tracked input stream
+     * 
+     * @param input
+     *            Input stream to track
+     * @param limit
+     *            Maximum number of bytes to read from the stream
+     */
+    public BlockInputStream(InputStream input, long limit) {
+        super(input);
+        if (limit < 0)
+            throw new IllegalArgumentException("limit must be >= 0");
+        this.limit = limit;
+    }
+
+    @Override
+    public int read() throws IOException {
+        if (this.bytesRead >= this.limit) {
+            return -1;
+        }
+        return super.read();
+    }
+
+    @Override
+    public int available() throws IOException {
+        if (this.bytesRead >= this.limit) {
+            return 0;
+        }
+        return super.available();
+    }
+
+    @Override
+    public int read(byte[] b, int off, int len) throws IOException {
+        if (len == 0) {
+            return 0;
+        } else if (this.bytesRead >= this.limit) {
+            return -1;
+        } else if (len > this.limit - this.bytesRead) {
+            len = (int) (this.limit - this.bytesRead);
+        }
+        return super.read(b, off, len);
+    }
+
+    @Override
+    public long skip(long n) throws IOException {
+        if (n == 0) {
+            return 0;
+        } else if (this.bytesRead >= this.limit) {
+            return -1;
+        } else if (n > this.limit - this.bytesRead) {
+            n = this.limit - this.bytesRead;
+        }
+        return super.skip(n);
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/RdfIOUtils.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/RdfIOUtils.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/RdfIOUtils.java
new file mode 100644
index 0000000..f1c9dfe
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/RdfIOUtils.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.util;
+
+import java.util.UUID;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.jena.hadoop.rdf.io.HadoopIOConstants;
+import org.apache.jena.riot.lang.LabelToNode;
+import org.apache.jena.riot.system.ErrorHandlerFactory;
+import org.apache.jena.riot.system.IRIResolver;
+import org.apache.jena.riot.system.ParserProfile;
+import org.apache.jena.riot.system.ParserProfileBase;
+import org.apache.jena.riot.system.Prologue;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * RDF IO utility functions
+ * 
+ * 
+ * 
+ */
+public class RdfIOUtils {
+    private static final Logger log = LoggerFactory.getLogger(RdfIOUtils.class);
+
+    /**
+     * Private constructor prevents instantiation
+     */
+    private RdfIOUtils() {
+    }
+
+    /**
+     * Creates a parser profile for the given job context
+     * 
+     * @param context
+     *            Context
+     * @param path
+     *            File path
+     * @return Parser profile
+     */
+    public static ParserProfile createParserProfile(JobContext context, Path path) {
+        Prologue prologue = new Prologue(null, IRIResolver.createNoResolve());
+        UUID seed = RdfIOUtils.getSeed(context, path);
+        LabelToNode labelMapping = LabelToNode.createScopeByDocumentHash(seed);
+        return new ParserProfileBase(prologue, ErrorHandlerFactory.errorHandlerStd, labelMapping);
+    }
+
+    /**
+     * Selects a seed for use in generating blank node identifiers
+     * 
+     * @param context
+     *            Job Context
+     * @param path
+     *            File path
+     * @return Seed
+     */
+    public static UUID getSeed(JobContext context, Path path) {
+        // This is to ensure that blank node allocation policy is constant when
+        // subsequent MapReduce jobs need that
+        String jobId = context.getJobID().toString();
+        if (jobId == null) {
+            jobId = String.valueOf(System.currentTimeMillis());
+            log.warn(
+                    "Job ID was not set, using current milliseconds of {}. Sequence of MapReduce jobs must handle carefully blank nodes.",
+                    jobId);
+        }
+        log.debug("MapReduceAllocator({}, {})", jobId, path);
+
+        // Form a reproducible seed for the run
+        return new UUID(jobId.hashCode(), path.hashCode());
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackableInputStream.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackableInputStream.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackableInputStream.java
new file mode 100644
index 0000000..92e2df5
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackableInputStream.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.util;
+
+import java.io.InputStream;
+
+/**
+ * An input stream that tracks the number of bytes read
+ * 
+ * 
+ * 
+ */
+public abstract class TrackableInputStream extends InputStream {
+
+    /**
+     * Gets the number of bytes read
+     * 
+     * @return Number of bytes read
+     */
+    public abstract long getBytesRead();
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStream.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStream.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStream.java
new file mode 100644
index 0000000..e51a866
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedInputStream.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * A tracked input stream which can is a wrapper around another input stream and
+ * can report the number of bytes read
+ * 
+ * 
+ * 
+ */
+public class TrackedInputStream extends TrackableInputStream {
+
+    protected InputStream input;
+    protected long bytesRead = 0, lastMark;
+
+    /**
+     * Creates a new tracked input stream
+     * 
+     * @param input
+     *            Input stream to track
+     */
+    public TrackedInputStream(InputStream input) {
+        if (input == null)
+            throw new NullPointerException("Input cannot be null");
+        this.input = input;
+    }
+
+    @Override
+    public int read() throws IOException {
+        int read = this.input.read();
+        if (read >= 0)
+            this.bytesRead++;
+        return read;
+    }
+
+    @Override
+    public long getBytesRead() {
+        return this.bytesRead;
+    }
+
+    @Override
+    public void close() throws IOException {
+        this.input.close();
+    }
+
+    @Override
+    public int available() throws IOException {
+        return this.input.available();
+    }
+
+    @Override
+    public synchronized void mark(int readlimit) {
+        this.input.mark(readlimit);
+        this.lastMark = this.bytesRead;
+    }
+
+    @Override
+    public boolean markSupported() {
+        return this.input.markSupported();
+    }
+
+    @Override
+    public int read(byte[] b, int off, int len) throws IOException {
+        if (len == 0) return 0;
+        int read = this.input.read(b, off, len);
+        if (read > 0)
+            this.bytesRead += read;
+        return read;
+    }
+
+    @Override
+    public int read(byte[] b) throws IOException {
+        return this.read(b, 0, b.length);
+    }
+
+    @Override
+    public synchronized void reset() throws IOException {
+        this.input.reset();
+        this.bytesRead = this.lastMark;
+    }
+
+    @Override
+    public long skip(long n) throws IOException {
+        if (n == 0)
+            return 0;
+        long skipped = 0;
+        byte[] buffer = new byte[16];
+        int readSize = Math.min(buffer.length, n > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) n);
+        int read;
+        do {
+            if (n - skipped > readSize) {
+                read = this.input.read(buffer, 0, readSize);
+            } else {
+                read = this.input.read(buffer, 0, (int) (n - skipped));
+            }
+            if (read > 0) {
+                this.bytesRead += read;
+                skipped += read;
+            }
+        } while (skipped < n && read >= 0);
+
+        return skipped;
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedQuadsStream.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedQuadsStream.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedQuadsStream.java
new file mode 100644
index 0000000..845c709
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedQuadsStream.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.util;
+
+import org.apache.jena.riot.lang.PipedRDFIterator;
+
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A tracked piped quads stream
+ * 
+ * 
+ * 
+ */
+public class TrackedPipedQuadsStream extends TrackedPipedRDFStream<Quad> {
+
+    /**
+     * Creates a new stream
+     * 
+     * @param sink
+     *            Sink
+     * @param input
+     *            Input stream
+     */
+    public TrackedPipedQuadsStream(PipedRDFIterator<Quad> sink, TrackableInputStream input) {
+        super(sink, input);
+    }
+
+    @Override
+    public void triple(Triple triple) {
+        // Triples are discarded
+    }
+
+    @Override
+    public void quad(Quad quad) {
+        this.receive(quad);
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedRDFStream.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedRDFStream.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedRDFStream.java
new file mode 100644
index 0000000..6e910be
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedRDFStream.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.util;
+
+import java.util.LinkedList;
+import java.util.Queue;
+
+import org.apache.jena.riot.lang.PipedRDFIterator;
+import org.apache.jena.riot.lang.PipedRDFStream;
+
+/**
+ * A tracked piped RDF stream
+ * 
+ * 
+ * 
+ * @param <T>
+ *            Type corresponding to a supported RDF primitive
+ */
+public abstract class TrackedPipedRDFStream<T> extends PipedRDFStream<T> {
+
+    private TrackableInputStream input;
+    private Queue<Long> positions = new LinkedList<Long>();
+
+    protected TrackedPipedRDFStream(PipedRDFIterator<T> sink, TrackableInputStream input) {
+        super(sink);
+        this.input = input;
+    }
+
+    @Override
+    protected void receive(T t) {
+        // Track positions the input stream is at as we receive inputs
+        synchronized (this.positions) {
+            this.positions.add(this.input.getBytesRead());
+        }
+        super.receive(t);
+    }
+
+    /**
+     * Gets the next position
+     * 
+     * @return Position
+     */
+    public Long getPosition() {
+        synchronized (this.positions) {
+            return this.positions.poll();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedTriplesStream.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedTriplesStream.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedTriplesStream.java
new file mode 100644
index 0000000..2040c4f
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/input/util/TrackedPipedTriplesStream.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.input.util;
+
+import org.apache.jena.riot.lang.PipedRDFIterator;
+
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A tracked piped triples stream
+ * 
+ * 
+ * 
+ */
+public class TrackedPipedTriplesStream extends TrackedPipedRDFStream<Triple> {
+
+    /**
+     * Creates a tracked triples stream
+     * 
+     * @param sink
+     *            Sink
+     * @param input
+     *            Input stream
+     */
+    public TrackedPipedTriplesStream(PipedRDFIterator<Triple> sink, TrackableInputStream input) {
+        super(sink, input);
+    }
+
+    @Override
+    public void triple(Triple triple) {
+        receive(triple);
+    }
+
+    @Override
+    public void quad(Quad quad) {
+        // Quads are discarded
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractBatchedNodeTupleOutputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractBatchedNodeTupleOutputFormat.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractBatchedNodeTupleOutputFormat.java
new file mode 100644
index 0000000..ac4ea2b
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractBatchedNodeTupleOutputFormat.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output;
+
+import java.io.Writer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.jena.hadoop.rdf.io.RdfIOConstants;
+import org.apache.jena.hadoop.rdf.io.output.writers.AbstractBatchedNodeTupleWriter;
+import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
+
+
+/**
+ * Abstract output format for formats that use a
+ * {@link AbstractBatchedNodeTupleWriter} as their writer
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ * @param <TTuple>
+ *            Tuple type
+ * @param <TValue>
+ *            Writable tuple type i.e. the value type
+ */
+public abstract class AbstractBatchedNodeTupleOutputFormat<TKey, TTuple, TValue extends AbstractNodeTupleWritable<TTuple>> extends
+        AbstractNodeTupleOutputFormat<TKey, TTuple, TValue> {
+
+    @Override
+    protected RecordWriter<TKey, TValue> getRecordWriter(Writer writer, Configuration config) {
+        long batchSize = config.getLong(RdfIOConstants.OUTPUT_BATCH_SIZE, RdfIOConstants.DEFAULT_OUTPUT_BATCH_SIZE);
+        return this.getRecordWriter(writer, batchSize);
+    }
+    
+    protected abstract RecordWriter<TKey, TValue> getRecordWriter(Writer writer, long batchSize);
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeOutputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeOutputFormat.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeOutputFormat.java
new file mode 100644
index 0000000..cfc98bd
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeOutputFormat.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output;
+
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Abstract output format which takes pairs with Node keys and arbitrary values
+ * and writes them as a simple line based text file
+ * 
+ * 
+ * 
+ * @param <TValue> Value type
+ */
+public abstract class AbstractNodeOutputFormat<TValue> extends FileOutputFormat<NodeWritable, TValue> {
+    
+    private static final Logger LOG = LoggerFactory.getLogger(AbstractNodeOutputFormat.class);
+
+    @Override
+    public RecordWriter<NodeWritable, TValue> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
+        Configuration config = context.getConfiguration();
+        boolean isCompressed = getCompressOutput(context);
+        CompressionCodec codec = null;
+        String extension = this.getFileExtension();
+        if (isCompressed) {
+            Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
+            codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, config);
+            extension += codec.getDefaultExtension();
+        }
+        Path file = getDefaultWorkFile(context, extension);
+        LOG.info("Writing output to file " + file);
+        FileSystem fs = file.getFileSystem(config);
+        if (!isCompressed) {
+            FSDataOutputStream fileOut = fs.create(file, false);
+            return this.getRecordWriter(new OutputStreamWriter(fileOut), config);
+        } else {
+            FSDataOutputStream fileOut = fs.create(file, false);
+            return this.getRecordWriter(new OutputStreamWriter(codec.createOutputStream(fileOut)), config);
+        }
+    }
+
+    /**
+     * Gets the file extension to use for output
+     * 
+     * @return File extension including the '.'
+     */
+    protected String getFileExtension() {
+        return ".nodes";
+    }
+
+    /**
+     * Gets the record writer to use
+     * 
+     * @param writer
+     *            Writer to write output to
+     * @param config
+     *            Configuration
+     * @return Record writer
+     */
+    protected abstract RecordWriter<NodeWritable, TValue> getRecordWriter(Writer writer, Configuration config);
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormat.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormat.java
new file mode 100644
index 0000000..1944bda
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractNodeTupleOutputFormat.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output;
+
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * An abstract implementation of an output format for line based tuple formats
+ * where the key is ignored and only the tuple values will be output
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ * @param <TValue>
+ *            Tuple value type
+ * @param <T>
+ *            Writable node tuple type
+ * 
+ */
+public abstract class AbstractNodeTupleOutputFormat<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>> extends
+        FileOutputFormat<TKey, T> {
+
+    private static final Logger LOG = LoggerFactory.getLogger(AbstractNodeTupleOutputFormat.class);
+
+    @Override
+    public RecordWriter<TKey, T> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
+        Configuration config = context.getConfiguration();
+        boolean isCompressed = getCompressOutput(context);
+        CompressionCodec codec = null;
+        String extension = this.getFileExtension();
+        if (isCompressed) {
+            Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class);
+            codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, config);
+            extension += codec.getDefaultExtension();
+        }
+        Path file = getDefaultWorkFile(context, extension);
+        LOG.info("Writing output to file " + file);
+        FileSystem fs = file.getFileSystem(config);
+        if (!isCompressed) {
+            FSDataOutputStream fileOut = fs.create(file, false);
+            return this.getRecordWriter(new OutputStreamWriter(fileOut), config);
+        } else {
+            FSDataOutputStream fileOut = fs.create(file, false);
+            return this.getRecordWriter(new OutputStreamWriter(codec.createOutputStream(fileOut)), config);
+        }
+    }
+
+    /**
+     * Gets the file extension to use for output
+     * 
+     * @return File extension including the '.'
+     */
+    protected abstract String getFileExtension();
+
+    /**
+     * Gets the record writer to use
+     * 
+     * @param writer
+     *            Writer to write output to
+     * @param config
+     *            Configuration
+     * @return Record writer
+     */
+    protected abstract RecordWriter<TKey, T> getRecordWriter(Writer writer, Configuration config);
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractStreamRdfNodeTupleOutputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractStreamRdfNodeTupleOutputFormat.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractStreamRdfNodeTupleOutputFormat.java
new file mode 100644
index 0000000..54be421
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/AbstractStreamRdfNodeTupleOutputFormat.java
@@ -0,0 +1,58 @@
+package org.apache.jena.hadoop.rdf.io.output;
+
+import java.io.Writer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
+import org.apache.jena.riot.system.StreamRDF;
+
+/**
+ * Abstract output format for formats that use the RIOT {@link StreamRDF} API to
+ * stream the writes
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ * @param <TTuple>
+ *            Tuple type
+ * @param <TValue>
+ *            Writable tuple type i.e. the value type
+ */
+public abstract class AbstractStreamRdfNodeTupleOutputFormat<TKey, TTuple, TValue extends AbstractNodeTupleWritable<TTuple>>
+		extends AbstractNodeTupleOutputFormat<TKey, TTuple, TValue> {
+
+	@Override
+	protected RecordWriter<TKey, TValue> getRecordWriter(Writer writer,
+			Configuration config) {
+		return getRecordWriter(getStream(writer, config), writer, config);
+	}
+
+	/**
+	 * Gets a writer which provides a bridge between the {@link RecordWriter}
+	 * and {@link StreamRDF} APIs
+	 * 
+	 * @param stream
+	 *            RDF Stream
+	 * @param writer
+	 *            Writer
+	 * @param config
+	 *            Configuration
+	 * @return Record Writer
+	 */
+	protected abstract RecordWriter<TKey, TValue> getRecordWriter(
+			StreamRDF stream, Writer writer, Configuration config);
+
+	/**
+	 * Gets a {@link StreamRDF} to which the tuples to be output should be
+	 * passed
+	 * 
+	 * @param writer
+	 *            Writer
+	 * @param config
+	 *            Configuration
+	 * @return RDF Stream
+	 */
+	protected abstract StreamRDF getStream(Writer writer, Configuration config);
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/BatchedTriGOutputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/BatchedTriGOutputFormat.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/BatchedTriGOutputFormat.java
new file mode 100644
index 0000000..8cb28fe
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/BatchedTriGOutputFormat.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output;
+
+import java.io.Writer;
+
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.jena.hadoop.rdf.io.output.writers.BatchedTriGWriter;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * Output format for TriG that uses a batched approach, note that this will
+ * produce invalid data where blank nodes span batches so it is typically better
+ * to use the {@link TriGOutputFormat} instead
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class BatchedTriGOutputFormat<TKey> extends
+		AbstractBatchedNodeTupleOutputFormat<TKey, Quad, QuadWritable> {
+
+	@Override
+	protected RecordWriter<TKey, QuadWritable> getRecordWriter(Writer writer,
+			long batchSize) {
+		return new BatchedTriGWriter<TKey>(writer, batchSize);
+	}
+
+	@Override
+	protected String getFileExtension() {
+		return ".trig";
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/BatchedTurtleOutputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/BatchedTurtleOutputFormat.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/BatchedTurtleOutputFormat.java
new file mode 100644
index 0000000..4ad9549
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/BatchedTurtleOutputFormat.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output;
+
+import java.io.Writer;
+
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.jena.hadoop.rdf.io.output.writers.BatchedTurtleWriter;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * Output format for Turtle that uses a batched approach, note that this will
+ * produce invalid data where blank nodes span batches so it is typically better
+ * to use the {@link TurtleOutputFormat} instead
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class BatchedTurtleOutputFormat<TKey> extends
+		AbstractBatchedNodeTupleOutputFormat<TKey, Triple, TripleWritable> {
+
+	@Override
+	protected RecordWriter<TKey, TripleWritable> getRecordWriter(Writer writer,
+			long batchSize) {
+		return new BatchedTurtleWriter<TKey>(writer, batchSize);
+	}
+
+	@Override
+	protected String getFileExtension() {
+		return ".ttl";
+	}
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/NQuadsOutputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/NQuadsOutputFormat.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/NQuadsOutputFormat.java
new file mode 100644
index 0000000..f7f35df
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/NQuadsOutputFormat.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output;
+
+import java.io.Writer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.jena.hadoop.rdf.io.output.writers.NQuadsWriter;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * NQuads output format
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class NQuadsOutputFormat<TKey> extends AbstractNodeTupleOutputFormat<TKey, Quad, QuadWritable> {
+
+    @Override
+    protected RecordWriter<TKey, QuadWritable> getRecordWriter(Writer writer, Configuration config) {
+        return new NQuadsWriter<TKey>(writer);
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return ".nq";
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/NTriplesNodeOutputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/NTriplesNodeOutputFormat.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/NTriplesNodeOutputFormat.java
new file mode 100644
index 0000000..14bac22
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/NTriplesNodeOutputFormat.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output;
+
+import java.io.Writer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.jena.hadoop.rdf.io.output.writers.NTriplesNodeWriter;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+
+
+/**
+ * NTriples based node output format
+ * 
+ * 
+ * 
+ * @param <TValue>
+ *            Value type
+ */
+public class NTriplesNodeOutputFormat<TValue> extends AbstractNodeOutputFormat<TValue> {
+
+    @Override
+    protected RecordWriter<NodeWritable, TValue> getRecordWriter(Writer writer, Configuration config) {
+        return new NTriplesNodeWriter<TValue>(writer);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/NTriplesOutputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/NTriplesOutputFormat.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/NTriplesOutputFormat.java
new file mode 100644
index 0000000..1309852
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/NTriplesOutputFormat.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output;
+
+import java.io.Writer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.jena.hadoop.rdf.io.output.writers.NTriplesWriter;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * NTriples output format
+ * 
+ * 
+ * @param <TKey> 
+ * 
+ */
+public class NTriplesOutputFormat<TKey> extends AbstractNodeTupleOutputFormat<TKey, Triple, TripleWritable> {
+
+    @Override
+    protected RecordWriter<TKey, TripleWritable> getRecordWriter(Writer writer, Configuration config) {
+        return new NTriplesWriter<TKey>(writer);
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return ".nt";
+    }
+
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/RdfJsonOutputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/RdfJsonOutputFormat.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/RdfJsonOutputFormat.java
new file mode 100644
index 0000000..6d79132
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/RdfJsonOutputFormat.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output;
+
+import java.io.Writer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.jena.hadoop.rdf.io.output.writers.RdfJsonWriter;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * RDF/JSON output format
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class RdfJsonOutputFormat<TKey> extends AbstractNodeTupleOutputFormat<TKey, Triple, TripleWritable> {
+
+    @Override
+    protected String getFileExtension() {
+        return ".rj";
+    }
+
+    @Override
+    protected RecordWriter<TKey, TripleWritable> getRecordWriter(Writer writer, Configuration config) {
+        return new RdfJsonWriter<TKey>(writer);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/RdfXmlOutputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/RdfXmlOutputFormat.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/RdfXmlOutputFormat.java
new file mode 100644
index 0000000..4bd37ca
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/RdfXmlOutputFormat.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.io.output;
+
+import java.io.Writer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.jena.hadoop.rdf.io.output.writers.RdfXmlWriter;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * RDF/XML output format
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class RdfXmlOutputFormat<TKey> extends AbstractNodeTupleOutputFormat<TKey, Triple, TripleWritable> {
+
+    @Override
+    protected String getFileExtension() {
+        return ".rdf";
+    }
+
+    @Override
+    protected RecordWriter<TKey, TripleWritable> getRecordWriter(Writer writer, Configuration config) {
+        return new RdfXmlWriter<TKey>(writer);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/TriGOutputFormat.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/TriGOutputFormat.java b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/TriGOutputFormat.java
new file mode 100644
index 0000000..0d0d429
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-io/src/main/java/org/apache/jena/hadoop/rdf/io/output/TriGOutputFormat.java
@@ -0,0 +1,41 @@
+package org.apache.jena.hadoop.rdf.io.output;
+
+import java.io.Writer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.jena.hadoop.rdf.io.output.writers.StreamRdfQuadWriter;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+import org.apache.jena.riot.system.StreamRDF;
+import org.apache.jena.riot.writer.WriterStreamRDFBlocks;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * Output format for TriG
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class TriGOutputFormat<TKey> extends
+		AbstractStreamRdfNodeTupleOutputFormat<TKey, Quad, QuadWritable> {
+
+	@Override
+	protected RecordWriter<TKey, QuadWritable> getRecordWriter(
+			StreamRDF stream, Writer writer, Configuration config) {
+		return new StreamRdfQuadWriter<TKey>(stream, writer);
+	}
+
+	@Override
+	protected StreamRDF getStream(Writer writer, Configuration config) {
+		return new WriterStreamRDFBlocks(writer);
+	}
+
+	@Override
+	protected String getFileExtension() {
+		return ".trig";
+	}
+
+}


Mime
View raw message