jena-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rve...@apache.org
Subject svn commit: r1583942 [4/9] - in /jena/Experimental/hadoop-rdf: ./ hadoop-rdf-common/ hadoop-rdf-common/src/ hadoop-rdf-common/src/main/ hadoop-rdf-common/src/main/java/ hadoop-rdf-common/src/main/java/com/ hadoop-rdf-common/src/main/java/com/yarcdata/ ...
Date Wed, 02 Apr 2014 09:20:58 GMT
Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractLineBasedNodeTupleWriter.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractLineBasedNodeTupleWriter.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractLineBasedNodeTupleWriter.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractLineBasedNodeTupleWriter.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.output.writers;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.jena.atlas.io.AWriter;
+import org.apache.jena.atlas.io.Writer2;
+import org.apache.jena.riot.out.NodeFormatter;
+import org.apache.jena.riot.out.NodeFormatterNT;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.hp.hpl.jena.graph.Node;
+import com.yarcdata.urika.hadoop.rdf.types.AbstractNodeTupleWritable;
+
+/**
+ * An abstract implementation of a record writer that writes records to a line
+ * based tuple formats.
+ * <p>
+ * The implementation only writes the value portion of the key value pair since
+ * it is the value portion that is used to convey the node tuples
+ * </p>
+ * 
+ * @author rvesse
+ * @param <TKey>
+ *            Key type
+ * @param <TValue>
+ *            Tuple type
+ * @param <T>
+ *            Writable node tuple type
+ * 
+ */
+public abstract class AbstractLineBasedNodeTupleWriter<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>> extends
+        RecordWriter<TKey, T> {
+    /**
+     * Default separator written between nodes
+     */
+    public static final String DEFAULT_SEPARATOR = " ";
+    /**
+     * Default terminator written at the end of each line
+     */
+    public static final String DEFAULT_TERMINATOR = " .";
+
+    private static final Logger log = LoggerFactory.getLogger(AbstractLineBasedNodeTupleWriter.class);
+
+    private AWriter writer;
+    private NodeFormatter formatter;
+
+    /**
+     * Creates a new tuple writer using the default NTriples node formatter
+     * 
+     * @param writer
+     *            Writer
+     */
+    public AbstractLineBasedNodeTupleWriter(Writer writer) {
+        this(writer, new NodeFormatterNT());
+    }
+
+    /**
+     * Creates a new tuple writer
+     * 
+     * @param writer
+     *            Writer
+     * @param formatter
+     *            Node formatter
+     */
+    public AbstractLineBasedNodeTupleWriter(Writer writer, NodeFormatter formatter) {
+        if (writer == null)
+            throw new NullPointerException("writer cannot be null");
+        if (formatter == null)
+            throw new NullPointerException("formatter cannot be null");
+        this.formatter = formatter;
+        this.writer = Writer2.wrap(writer);
+    }
+
+    @Override
+    public void write(TKey key, T value) throws IOException, InterruptedException {
+        log.debug("write({}={})", key, value);
+
+        Node[] ns = this.getNodes(value);
+        String sep = this.getSeparator();
+        NodeFormatter formatter = this.getNodeFormatter();
+        for (int i = 0; i < ns.length; i++) {
+            formatter.format(this.writer, ns[i]);
+            this.writer.print(sep);
+        }
+        this.writer.println(this.getTerminator());
+        this.writer.flush();
+    }
+
+    /**
+     * Gets the nodes of the tuple in the order they should be written
+     * 
+     * @param tuple
+     *            Tuple
+     * @return Nodes
+     */
+    protected abstract Node[] getNodes(T tuple);
+
+    /**
+     * Gets the node formatter to use for formatting nodes
+     * 
+     * @return Node formatter
+     */
+    protected NodeFormatter getNodeFormatter() {
+        return this.formatter;
+    }
+
+    /**
+     * Gets the separator that is written between nodes
+     * 
+     * @return Separator
+     */
+    protected String getSeparator() {
+        return DEFAULT_SEPARATOR;
+    }
+
+    /**
+     * Gets the terminator that is written at the end of each tuple
+     * 
+     * @return Terminator
+     */
+    protected String getTerminator() {
+        return DEFAULT_TERMINATOR;
+    }
+
+    @Override
+    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+        log.debug("close({})", context);
+        writer.close();
+    }
+}
\ No newline at end of file

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractLineBasedQuadWriter.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractLineBasedQuadWriter.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractLineBasedQuadWriter.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractLineBasedQuadWriter.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.output.writers;
+
+import java.io.Writer;
+
+import org.apache.jena.riot.out.NodeFormatter;
+import org.apache.jena.riot.out.NodeFormatterNT;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.sparql.core.Quad;
+import com.yarcdata.urika.hadoop.rdf.types.QuadWritable;
+
+/**
+ * An abstract writer for line based quad formats
+ * 
+ * @author rvesse
+ * @param <TKey>
+ * 
+ */
+public abstract class AbstractLineBasedQuadWriter<TKey> extends AbstractLineBasedNodeTupleWriter<TKey, Quad, QuadWritable> {
+
+    /**
+     * Creates a new writer using the default NTriples node formatter
+     * 
+     * @param writer
+     *            Writer
+     */
+    public AbstractLineBasedQuadWriter(Writer writer) {
+        this(writer, new NodeFormatterNT());
+    }
+
+    /**
+     * Creates a new writer using the specified node formatter
+     * 
+     * @param writer
+     *            Writer
+     * @param formatter
+     *            Node formatter
+     */
+    public AbstractLineBasedQuadWriter(Writer writer, NodeFormatter formatter) {
+        super(writer, formatter);
+    }
+
+    @Override
+    protected Node[] getNodes(QuadWritable tuple) {
+        Quad q = tuple.get();
+        if (q.isDefaultGraph()) {
+            return new Node[] { q.getSubject(), q.getPredicate(), q.getObject() };
+        } else {
+            return new Node[] { q.getSubject(), q.getPredicate(), q.getObject(), q.getGraph() };
+        }
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractLineBasedTripleWriter.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractLineBasedTripleWriter.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractLineBasedTripleWriter.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractLineBasedTripleWriter.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.output.writers;
+
+import java.io.Writer;
+
+import org.apache.jena.riot.out.NodeFormatter;
+import org.apache.jena.riot.out.NodeFormatterNT;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.Triple;
+import com.yarcdata.urika.hadoop.rdf.types.TripleWritable;
+
+/**
+ * An abstract writer for line based triple formats
+ * 
+ * @author rvesse
+ * @param <TKey> 
+ * 
+ */
+public abstract class AbstractLineBasedTripleWriter<TKey> extends AbstractLineBasedNodeTupleWriter<TKey, Triple, TripleWritable> {
+
+    /**
+     * Creates a new writer using the default NTriples node formatter
+     * 
+     * @param writer
+     *            Writer
+     */
+    public AbstractLineBasedTripleWriter(Writer writer) {
+        this(writer, new NodeFormatterNT());
+    }
+
+    /**
+     * Creates a new writer using the specified node formatter
+     * 
+     * @param writer
+     *            Writer
+     * @param formatter
+     *            Node formatter
+     */
+    public AbstractLineBasedTripleWriter(Writer writer, NodeFormatter formatter) {
+        super(writer, formatter);
+    }
+
+    @Override
+    protected Node[] getNodes(TripleWritable tuple) {
+        Triple t = tuple.get();
+        return new Node[] { t.getSubject(), t.getPredicate(), t.getObject() };
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractNodeWriter.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractNodeWriter.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractNodeWriter.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractNodeWriter.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.output.writers;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.jena.atlas.io.AWriter;
+import org.apache.jena.atlas.io.Writer2;
+import org.apache.jena.riot.out.NodeFormatter;
+import org.apache.jena.riot.out.NodeFormatterNT;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.hp.hpl.jena.graph.Node;
+import com.yarcdata.urika.hadoop.rdf.types.NodeWritable;
+
+/**
+ * Abstract implementation of a record writer which writes pairs of nodes and
+ * arbitrary values to text based files
+ * 
+ * @author rvesse
+ * 
+ * @param <TValue>
+ */
+public abstract class AbstractNodeWriter<TValue> extends RecordWriter<NodeWritable, TValue> {
+
+    /**
+     * Default separator written between nodes and their associated values
+     */
+    public static final String DEFAULT_SEPARATOR = "\t";
+
+    private static final Logger log = LoggerFactory.getLogger(AbstractNodeWriter.class);
+
+    protected AWriter writer;
+    private NodeFormatter formatter;
+
+    /**
+     * Creates a new tuple writer using the default NTriples node formatter
+     * 
+     * @param writer
+     *            Writer
+     */
+    public AbstractNodeWriter(Writer writer) {
+        this(writer, new NodeFormatterNT());
+    }
+
+    /**
+     * Creates a new tuple writer
+     * 
+     * @param writer
+     *            Writer
+     * @param formatter
+     *            Node formatter
+     */
+    public AbstractNodeWriter(Writer writer, NodeFormatter formatter) {
+        if (writer == null)
+            throw new NullPointerException("writer cannot be null");
+        if (formatter == null)
+            throw new NullPointerException("formatter cannot be null");
+        this.formatter = formatter;
+        this.writer = Writer2.wrap(writer);
+    }
+    
+    @Override
+    public final void write(NodeWritable key, TValue value) throws IOException, InterruptedException {
+        this.writeKey(key);
+        this.writer.write(this.getSeparator());
+        this.writeValue(value);
+        this.writer.write('\n');
+    }
+
+    /**
+     * Writes the given key
+     * 
+     * @param key
+     *            Key
+     */
+    protected void writeKey(NodeWritable key) {
+        Node n = key.get();
+        this.getNodeFormatter().format(this.writer, n);
+    }
+
+    /**
+     * Writes the given value
+     * 
+     * @param value
+     */
+    protected void writeValue(TValue value) {
+        if (value instanceof NullWritable)
+            return;
+        this.writer.write(value.toString());
+    }
+
+    @Override
+    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+        log.debug("close({})", context);
+        writer.close();
+    }
+
+    /**
+     * Gets the node formatter to use for formatting nodes
+     * 
+     * @return Node formatter
+     */
+    protected NodeFormatter getNodeFormatter() {
+        return this.formatter;
+    }
+
+    /**
+     * Gets the separator that is written between nodes
+     * 
+     * @return Separator
+     */
+    protected String getSeparator() {
+        return DEFAULT_SEPARATOR;
+    }
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractWholeFileNodeTupleWriter.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractWholeFileNodeTupleWriter.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractWholeFileNodeTupleWriter.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractWholeFileNodeTupleWriter.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.output.writers;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.yarcdata.urika.hadoop.rdf.types.AbstractNodeTupleWritable;
+
+/**
+ * An abstract implementation of a record writer that writes records to whole
+ * file formats.
+ * <p>
+ * It is important to note that the writer does not actually write any output
+ * until the {@link #close(TaskAttemptContext)} method is called as it must
+ * write the entire output in one go otherwise the output would be invalid. Also
+ * writing in one go increases the chances that the writer will be able to
+ * effectively use the syntax compressions of the RDF serialization being used.
+ * </p>
+ * <p>
+ * The implementation only writes the value portion of the key value pair since
+ * it is the value portion that is used to convey the node tuples
+ * </p>
+ * 
+ * @author rvesse
+ * 
+ * @param <TKey>
+ * @param <TValue>
+ * @param <T>
+ */
+public abstract class AbstractWholeFileNodeTupleWriter<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>> extends
+        RecordWriter<TKey, T> {
+    private static final Logger LOG = LoggerFactory.getLogger(AbstractWholeFileNodeTupleWriter.class);
+
+    private Writer writer;
+
+    protected AbstractWholeFileNodeTupleWriter(Writer writer) {
+        if (writer == null)
+            throw new NullPointerException("writer cannot be null");
+        this.writer = writer;
+    }
+
+    @Override
+    public final void write(TKey key, T value) throws IOException, InterruptedException {
+        LOG.debug("write({}={})", key, value);
+        this.add(value);
+    }
+
+    /**
+     * Adds the tuple to the cache of tuples that will be written when the
+     * {@link #close(TaskAttemptContext)} method is called
+     * 
+     * @param value
+     */
+    protected abstract void add(T value);
+
+    @Override
+    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+        if (this.writer != null) {
+            this.writeOutput(writer);
+            this.writer.close();
+            this.writer = null;
+        }
+    }
+
+    /**
+     * Writes the cached tuples to the writer, the writer should not be closed
+     * by this method implementation
+     * 
+     * @param writer
+     *            Writer
+     */
+    protected abstract void writeOutput(Writer writer);
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractWholeFileQuadWriter.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractWholeFileQuadWriter.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractWholeFileQuadWriter.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractWholeFileQuadWriter.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.output.writers;
+
+import java.io.Writer;
+
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFDataMgr;
+import org.apache.jena.riot.RDFWriterRegistry;
+
+import com.hp.hpl.jena.sparql.core.DatasetGraph;
+import com.hp.hpl.jena.sparql.core.DatasetGraphFactory;
+import com.hp.hpl.jena.sparql.core.Quad;
+import com.yarcdata.urika.hadoop.rdf.types.QuadWritable;
+
+/**
+ * An abstract record writer for whole file triple formats
+ * 
+ * @author rvesse
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public abstract class AbstractWholeFileQuadWriter<TKey> extends AbstractWholeFileNodeTupleWriter<TKey, Quad, QuadWritable> {
+
+    private DatasetGraph g = DatasetGraphFactory.createMem();
+
+    protected AbstractWholeFileQuadWriter(Writer writer) {
+        super(writer);
+    }
+
+    @Override
+    protected final void add(QuadWritable value) {
+        this.g.add(value.get());
+    }
+
+    @SuppressWarnings("deprecation")
+    @Override
+    protected void writeOutput(Writer writer) {
+        RDFDataMgr.write(writer, this.g, RDFWriterRegistry.defaultSerialization(this.getRdfLanguage()));
+    }
+
+    /**
+     * Gets the RDF language to write the output in
+     * 
+     * @return RDF language
+     */
+    protected abstract Lang getRdfLanguage();
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractWholeFileTripleWriter.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractWholeFileTripleWriter.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractWholeFileTripleWriter.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/AbstractWholeFileTripleWriter.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.output.writers;
+
+import java.io.Writer;
+
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFDataMgr;
+
+import com.hp.hpl.jena.graph.Graph;
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.sparql.graph.GraphFactory;
+import com.yarcdata.urika.hadoop.rdf.types.TripleWritable;
+
+/**
+ * An abstract record writer for whole file triple formats
+ * 
+ * @author rvesse
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public abstract class AbstractWholeFileTripleWriter<TKey> extends AbstractWholeFileNodeTupleWriter<TKey, Triple, TripleWritable> {
+
+    private Graph g = GraphFactory.createDefaultGraph();
+
+    protected AbstractWholeFileTripleWriter(Writer writer) {
+        super(writer);
+    }
+
+    @Override
+    protected final void add(TripleWritable value) {
+        this.g.add(value.get());
+    }
+
+    @SuppressWarnings("deprecation")
+    @Override
+    protected final void writeOutput(Writer writer) {
+        RDFDataMgr.write(writer, this.g, this.getRdfLanguage());
+    }
+
+    /**
+     * Gets the RDF language to write the output in
+     * 
+     * @return RDF language
+     */
+    protected abstract Lang getRdfLanguage();
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/NQuadsWriter.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/NQuadsWriter.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/NQuadsWriter.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/NQuadsWriter.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.output.writers;
+
+import java.io.Writer;
+
+import org.apache.jena.riot.out.CharSpace;
+import org.apache.jena.riot.out.NodeFormatterNT;
+
+/**
+ * A record writer for NQuads
+ * 
+ * @author rvesse
+ * 
+ * @param <TKey>
+ */
+public class NQuadsWriter<TKey> extends AbstractLineBasedQuadWriter<TKey> {
+
+    /**
+     * Creates a new writer
+     * 
+     * @param writer
+     *            Writer
+     */
+    public NQuadsWriter(Writer writer) {
+        super(writer, new NodeFormatterNT());
+    }
+
+    /**
+     * Creates a new writer using the given character space
+     * 
+     * @param writer
+     *            Writer
+     * @param charSpace
+     *            Character space
+     */
+    public NQuadsWriter(Writer writer, CharSpace charSpace) {
+        super(writer, new NodeFormatterNT(charSpace));
+    }
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/NTriplesNodeWriter.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/NTriplesNodeWriter.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/NTriplesNodeWriter.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/NTriplesNodeWriter.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.output.writers;
+
+import java.io.Writer;
+
+import org.apache.jena.riot.out.CharSpace;
+import org.apache.jena.riot.out.NodeFormatterNT;
+
+/**
+ * A NTriples based node writer
+ * 
+ * @author rvesse
+ * 
+ * @param <TValue>
+ *            Value type
+ */
+public class NTriplesNodeWriter<TValue> extends AbstractNodeWriter<TValue> {
+
+    /**
+     * Creates a new writer
+     * 
+     * @param writer
+     *            Writer
+     */
+    public NTriplesNodeWriter(Writer writer) {
+        super(writer);
+    }
+
+    /**
+     * Creates a new writer
+     * 
+     * @param writer
+     *            Writer
+     * @param charSpace
+     *            Character space to use
+     */
+    public NTriplesNodeWriter(Writer writer, CharSpace charSpace) {
+        super(writer, new NodeFormatterNT(charSpace));
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/NTriplesWriter.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/NTriplesWriter.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/NTriplesWriter.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/NTriplesWriter.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.output.writers;
+
+import java.io.Writer;
+
+import org.apache.jena.riot.out.CharSpace;
+import org.apache.jena.riot.out.NodeFormatterNT;
+
+/**
+ * A record writer for NTriples
+ * 
+ * @author rvesse
+ * @param <TKey>
+ *            Key type
+ * 
+ */
+public class NTriplesWriter<TKey> extends AbstractLineBasedTripleWriter<TKey> {
+
+    /**
+     * Creates a new writer
+     * 
+     * @param writer
+     *            Writer
+     */
+    public NTriplesWriter(Writer writer) {
+        super(writer, new NodeFormatterNT());
+    }
+
+    /**
+     * Creates a new writer using the given character space
+     * 
+     * @param writer
+     *            Writer
+     * @param charSpace
+     *            Character space
+     */
+    public NTriplesWriter(Writer writer, CharSpace charSpace) {
+        super(writer, new NodeFormatterNT(charSpace));
+    }
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/RdfJsonWriter.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/RdfJsonWriter.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/RdfJsonWriter.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/RdfJsonWriter.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.output.writers;
+
+import java.io.Writer;
+
+import org.apache.jena.riot.Lang;
+
+/**
+ * A record writer for RDF/JSON
+ * 
+ * @author rvesse
+ * @param <TKey>
+ *            Key type
+ * 
+ */
+public class RdfJsonWriter<TKey> extends AbstractWholeFileTripleWriter<TKey> {
+
+    /**
+     * Creates a new record writer
+     * 
+     * @param writer
+     *            Writer
+     */
+    public RdfJsonWriter(Writer writer) {
+        super(writer);
+    }
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.RDFJSON;
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/RdfXmlWriter.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/RdfXmlWriter.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/RdfXmlWriter.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/RdfXmlWriter.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.output.writers;
+
+import java.io.Writer;
+
+import org.apache.jena.riot.Lang;
+
+/**
+ * A record writer for RDF/XML
+ * 
+ * @author rvesse
+ * @param <TKey>
+ *            Key type
+ * 
+ */
+public class RdfXmlWriter<TKey> extends AbstractWholeFileTripleWriter<TKey> {
+
+    /**
+     * Creates a new record writer
+     * 
+     * @param writer
+     *            Writer
+     */
+    public RdfXmlWriter(Writer writer) {
+        super(writer);
+    }
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.RDFXML;
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/TriGWriter.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/TriGWriter.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/TriGWriter.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/TriGWriter.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.output.writers;
+
+import java.io.Writer;
+
+import org.apache.jena.riot.Lang;
+
+/**
+ * A record writer for TriG
+ * 
+ * @author rvesse
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class TriGWriter<TKey> extends AbstractBatchedQuadWriter<TKey> {
+
+    /**
+     * Creates a new record writer
+     * 
+     * @param writer
+     *            Writer
+     * @param batchSize
+     *            Batch size
+     */
+    public TriGWriter(Writer writer, long batchSize) {
+        super(writer, batchSize);
+    }
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.TRIG;
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/TurtleWriter.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/TurtleWriter.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/TurtleWriter.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/main/java/com/yarcdata/urika/hadoop/rdf/io/output/writers/TurtleWriter.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.output.writers;
+
+import java.io.Writer;
+
+import org.apache.jena.riot.Lang;
+
+/**
+ * A record writer for Turtle
+ * 
+ * @author rvesse
+ * 
+ * @param <TKey>
+ */
+public class TurtleWriter<TKey> extends AbstractBatchedTripleWriter<TKey> {
+
+    /**
+     * Creates a new record writer
+     * 
+     * @param writer
+     *            Writer
+     * @param batchSize
+     *            Batch size
+     */
+    public TurtleWriter(Writer writer, long batchSize) {
+        super(writer, batchSize);
+    }
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.TURTLE;
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/RdfTriplesInputTestMapper.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/RdfTriplesInputTestMapper.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/RdfTriplesInputTestMapper.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/RdfTriplesInputTestMapper.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */ 
+
+package com.yarcdata.urika.hadoop.rdf.io;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.log4j.Logger;
+
+import com.yarcdata.urika.hadoop.rdf.types.TripleWritable;
+
+/**
+ * A test mapper which takes in line based RDF triple input and just produces triples
+ * @author rvesse
+ *
+ */
+public class RdfTriplesInputTestMapper extends Mapper<LongWritable, TripleWritable, NullWritable, TripleWritable> {
+    
+    private static final Logger LOG = Logger.getLogger(RdfTriplesInputTestMapper.class);
+
+    @Override
+    protected void map(LongWritable key, TripleWritable value, Context context)
+            throws IOException, InterruptedException {
+        LOG.info("Line " + key.toString() + " => " + value.toString());
+        context.write(NullWritable.get(), value);
+    }
+
+    
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractBlockedQuadInputFormatTests.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractBlockedQuadInputFormatTests.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractBlockedQuadInputFormatTests.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractBlockedQuadInputFormatTests.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+/**
+ * Abstract tests for blocked triple input formats
+ * 
+ * @author rvesse
+ * 
+ */
+public abstract class AbstractBlockedQuadInputFormatTests extends AbstractWholeFileQuadInputFormatTests {
+
+    @Override
+    protected boolean canSplitInputs() {
+        return true;
+    }
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractBlockedTripleInputFormatTests.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractBlockedTripleInputFormatTests.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractBlockedTripleInputFormatTests.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractBlockedTripleInputFormatTests.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+/**
+ * Abstract tests for blocked triple input formats
+ * 
+ * @author rvesse
+ * 
+ */
+public abstract class AbstractBlockedTripleInputFormatTests extends AbstractWholeFileTripleInputFormatTests {
+
+    @Override
+    protected boolean canSplitInputs() {
+        return true;
+    }
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractNodeTupleInputFormatTests.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,592 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.Writer;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
+import org.apache.hadoop.mapreduce.task.JobContextImpl;
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.yarcdata.urika.hadoop.rdf.io.HadoopIOConstants;
+import com.yarcdata.urika.hadoop.rdf.io.RdfIOConstants;
+import com.yarcdata.urika.hadoop.rdf.types.AbstractNodeTupleWritable;
+
+/**
+ * Abstract node tuple input format tests
+ * 
+ * @author rvesse
+ * 
+ * @param <TValue>
+ * @param <T>
+ */
+public abstract class AbstractNodeTupleInputFormatTests<TValue, T extends AbstractNodeTupleWritable<TValue>> {
+
+    private static final Logger LOG = LoggerFactory.getLogger(AbstractNodeTupleInputFormatTests.class);
+
+    protected static final int EMPTY_SIZE = 0, SMALL_SIZE = 100, LARGE_SIZE = 10000, BAD_SIZE = 100, MIXED_SIZE = 100;
+    protected static final String EMPTY = "empty";
+    protected static final String SMALL = "small";
+    protected static final String LARGE = "large";
+    protected static final String BAD = "bad";
+    protected static final String MIXED = "mixed";
+
+    /**
+     * Temporary folder for the tests
+     */
+    @Rule
+    public TemporaryFolder folder = new TemporaryFolder();
+
+    protected File empty, small, large, bad, mixed;
+
+    /**
+     * Prepares the inputs for the tests
+     * 
+     * @throws IOException
+     */
+    @Before
+    public void beforeTest() throws IOException {
+        this.prepareInputs();
+    }
+
+    /**
+     * Cleans up the inputs after each test
+     */
+    @After
+    public void afterTest() {
+        // Should be unnecessary since JUnit will clean up the temporary folder
+        // anyway but best to do this regardless
+        empty.delete();
+        small.delete();
+        large.delete();
+        bad.delete();
+        mixed.delete();
+    }
+
+    /**
+     * Prepares a fresh configuration
+     * 
+     * @return Configuration
+     */
+    protected Configuration prepareConfiguration() {
+        Configuration config = new Configuration(true);
+        // Nothing else to do
+        return config;
+    }
+
+    /**
+     * Prepares the inputs
+     * 
+     * @throws IOException
+     */
+    protected void prepareInputs() throws IOException {
+        String ext = this.getFileExtension();
+        empty = folder.newFile(EMPTY + ext);
+        this.generateTuples(empty, EMPTY_SIZE);
+        small = folder.newFile(SMALL + ext);
+        this.generateTuples(small, SMALL_SIZE);
+        large = folder.newFile(LARGE + ext);
+        this.generateTuples(large, LARGE_SIZE);
+        bad = folder.newFile(BAD + ext);
+        this.generateBadTuples(bad, BAD_SIZE);
+        mixed = folder.newFile(MIXED + ext);
+        this.generateMixedTuples(mixed, MIXED_SIZE);
+    }
+
+    /**
+     * Gets the extra file extension to add to the filenames
+     * 
+     * @return File extension
+     */
+    protected abstract String getFileExtension();
+
+    /**
+     * Generates tuples used for tests
+     * 
+     * @param f
+     *            File
+     * @param num
+     *            Number of tuples to generate
+     * @throws IOException
+     */
+    protected final void generateTuples(File f, int num) throws IOException {
+        this.generateTuples(this.getWriter(f), num);
+    }
+
+    /**
+     * Gets the writer to use for generating tuples
+     * 
+     * @param f
+     *            File
+     * @return Writer
+     * @throws IOException
+     */
+    protected Writer getWriter(File f) throws IOException {
+        return new FileWriter(f, false);
+    }
+
+    /**
+     * Generates tuples used for tests
+     * 
+     * @param writer
+     *            Writer to write to
+     * @param num
+     *            Number of tuples to generate
+     * @throws IOException
+     */
+    protected abstract void generateTuples(Writer writer, int num) throws IOException;
+
+    /**
+     * Generates bad tuples used for tests
+     * 
+     * @param f
+     *            File
+     * @param num
+     *            Number of bad tuples to generate
+     * @throws IOException
+     */
+    protected final void generateBadTuples(File f, int num) throws IOException {
+        this.generateBadTuples(this.getWriter(f), num);
+    }
+
+    /**
+     * Generates bad tuples used for tests
+     * 
+     * @param writer
+     *            Writer to write to
+     * @param num
+     *            Number of bad tuples to generate
+     * @throws IOException
+     */
+    protected abstract void generateBadTuples(Writer writer, int num) throws IOException;
+
+    /**
+     * Generates a mixture of good and bad tuples used for tests
+     * 
+     * @param f
+     *            File
+     * @param num
+     *            Number of tuples to generate, they should be a 50/50 mix of
+     *            good and bad tuples
+     * @throws IOException
+     */
+    protected final void generateMixedTuples(File f, int num) throws IOException {
+        this.generateMixedTuples(this.getWriter(f), num);
+    }
+
+    /**
+     * Generates a mixture of good and bad tuples used for tests
+     * 
+     * @param writer
+     *            Writer to write to
+     * @param num
+     *            Number of tuples to generate, they should be a 50/50 mix of
+     *            good and bad tuples
+     * @throws IOException
+     */
+    protected abstract void generateMixedTuples(Writer write, int num) throws IOException;
+
+    /**
+     * Adds an input path to the job configuration
+     * 
+     * @param f
+     *            File
+     * @param config
+     *            Configuration
+     * @param job
+     *            Job
+     * @throws IOException
+     */
+    protected void addInputPath(File f, Configuration config, Job job) throws IOException {
+        FileSystem fs = FileSystem.getLocal(config);
+        Path inputPath = fs.makeQualified(new Path(f.getAbsolutePath()));
+        FileInputFormat.addInputPath(job, inputPath);
+    }
+
+    protected final int countTuples(RecordReader<LongWritable, T> reader) throws IOException, InterruptedException {
+        int count = 0;
+
+        // Check initial progress
+        LOG.info(String.format("Initial Reported Progress %f", reader.getProgress()));
+        float progress = reader.getProgress();
+        if (Float.compare(0.0f, progress) == 0) {
+            Assert.assertEquals(0.0d, reader.getProgress(), 0.0d);
+        } else if (Float.compare(1.0f, progress) == 0) {
+            // If reader is reported 1.0 straight away then we expect there to
+            // be no key values
+            Assert.assertEquals(1.0d, reader.getProgress(), 0.0d);
+            Assert.assertFalse(reader.nextKeyValue());
+        } else {
+            Assert.fail(String.format(
+                    "Expected progress of 0.0 or 1.0 before reader has been accessed for first time but got %f", progress));
+        }
+
+        // Count tuples
+        boolean debug = LOG.isDebugEnabled();
+        while (reader.nextKeyValue()) {
+            count++;
+            progress = reader.getProgress();
+            if (debug)
+                LOG.debug(String.format("Current Reported Progress %f", progress));
+            Assert.assertTrue(String.format("Progress should be in the range 0.0 < p <= 1.0 but got %f", progress),
+                    progress > 0.0f && progress <= 1.0f);
+        }
+        reader.close();
+        LOG.info(String.format("Got %d tuples from this record reader", count));
+
+        // Check final progress
+        LOG.info(String.format("Final Reported Progress %f", reader.getProgress()));
+        Assert.assertEquals(1.0d, reader.getProgress(), 0.0d);
+
+        return count;
+    }
+
+    protected final void checkTuples(RecordReader<LongWritable, T> reader, int expected) throws IOException, InterruptedException {
+        Assert.assertEquals(expected, this.countTuples(reader));
+    }
+
+    /**
+     * Runs a test with a single input
+     * 
+     * @param input
+     *            Input
+     * @param expectedTuples
+     *            Expected tuples
+     * @throws IOException
+     * @throws InterruptedException
+     */
+    protected final void testSingleInput(File input, int expectedSplits, int expectedTuples) throws IOException,
+            InterruptedException {
+        // Prepare configuration
+        Configuration config = this.prepareConfiguration();
+        this.testSingleInput(config, input, expectedSplits, expectedTuples);
+    }
+
+    /**
+     * Runs a test with a single input
+     * 
+     * @param config
+     *            Configuration
+     * @param input
+     *            Input
+     * @param expectedTuples
+     *            Expected tuples
+     * @throws IOException
+     * @throws InterruptedException
+     */
+    protected final void testSingleInput(Configuration config, File input, int expectedSplits, int expectedTuples)
+            throws IOException, InterruptedException {
+        // Set up fake job
+        InputFormat<LongWritable, T> inputFormat = this.getInputFormat();
+        Job job = Job.getInstance(config);
+        job.setInputFormatClass(inputFormat.getClass());
+        this.addInputPath(input, job.getConfiguration(), job);
+        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
+        Assert.assertEquals(1, FileInputFormat.getInputPaths(context).length);
+        NLineInputFormat.setNumLinesPerSplit(job, LARGE_SIZE);
+
+        // Check splits
+        List<InputSplit> splits = inputFormat.getSplits(context);
+        Assert.assertEquals(expectedSplits, splits.size());
+
+        // Check tuples
+        for (InputSplit split : splits) {
+            TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
+            RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext);
+            reader.initialize(split, taskContext);
+            this.checkTuples(reader, expectedTuples);
+        }
+    }
+
+    protected abstract InputFormat<LongWritable, T> getInputFormat();
+
+    /**
+     * Basic tuples input test
+     * 
+     * @throws IOException
+     * @throws ClassNotFoundException
+     * @throws InterruptedException
+     */
+    @Test
+    public final void single_input_01() throws IOException, InterruptedException, ClassNotFoundException {
+        testSingleInput(empty, this.canSplitInputs() ? 0 : 1, EMPTY_SIZE);
+    }
+
+    /**
+     * Basic tuples input test
+     * 
+     * @throws IOException
+     * @throws ClassNotFoundException
+     * @throws InterruptedException
+     */
+    @Test
+    public final void single_input_02() throws IOException, InterruptedException, ClassNotFoundException {
+        testSingleInput(small, 1, SMALL_SIZE);
+    }
+
+    /**
+     * Basic tuples input test
+     * 
+     * @throws IOException
+     * @throws ClassNotFoundException
+     * @throws InterruptedException
+     */
+    @Test
+    public final void single_input_03() throws IOException, InterruptedException, ClassNotFoundException {
+        testSingleInput(large, 1, LARGE_SIZE);
+    }
+
+    /**
+     * Basic tuples input test
+     * 
+     * @throws IOException
+     * @throws ClassNotFoundException
+     * @throws InterruptedException
+     */
+    @Test
+    public final void single_input_04() throws IOException, InterruptedException, ClassNotFoundException {
+        testSingleInput(bad, 1, 0);
+    }
+
+    /**
+     * Basic tuples input test
+     * 
+     * @throws IOException
+     * @throws ClassNotFoundException
+     * @throws InterruptedException
+     */
+    @Test
+    public final void single_input_05() throws IOException, InterruptedException, ClassNotFoundException {
+        testSingleInput(mixed, 1, MIXED_SIZE / 2);
+    }
+
+    /**
+     * Tests behaviour when ignoring bad tuples is disabled
+     * 
+     * @throws InterruptedException
+     * @throws IOException
+     */
+    @Test(expected = IOException.class)
+    public final void fail_on_bad_input_01() throws IOException, InterruptedException {
+        Configuration config = this.prepareConfiguration();
+        config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false);
+        Assert.assertFalse(config.getBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, true));
+        testSingleInput(config, bad, 1, 0);
+    }
+
+    /**
+     * Tests behaviour when ignoring bad tuples is disabled
+     * 
+     * @throws InterruptedException
+     * @throws IOException
+     */
+    @Test(expected = IOException.class)
+    public final void fail_on_bad_input_02() throws IOException, InterruptedException {
+        Configuration config = this.prepareConfiguration();
+        config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false);
+        Assert.assertFalse(config.getBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, true));
+        testSingleInput(config, mixed, 1, MIXED_SIZE / 2);
+    }
+
+    /**
+     * Runs a multiple input test
+     * 
+     * @param inputs
+     *            Inputs
+     * @param expectedSplits
+     *            Number of splits expected
+     * @param expectedTuples
+     *            Number of tuples expected
+     * @throws IOException
+     * @throws InterruptedException
+     */
+    protected final void testMultipleInputs(File[] inputs, int expectedSplits, int expectedTuples) throws IOException,
+            InterruptedException {
+        // Prepare configuration and inputs
+        Configuration config = this.prepareConfiguration();
+
+        // Set up fake job
+        InputFormat<LongWritable, T> inputFormat = this.getInputFormat();
+        Job job = Job.getInstance(config);
+        job.setInputFormatClass(inputFormat.getClass());
+        for (File input : inputs) {
+            this.addInputPath(input, job.getConfiguration(), job);
+        }
+        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
+        Assert.assertEquals(inputs.length, FileInputFormat.getInputPaths(context).length);
+        NLineInputFormat.setNumLinesPerSplit(job, expectedTuples);
+
+        // Check splits
+        List<InputSplit> splits = inputFormat.getSplits(context);
+        Assert.assertEquals(expectedSplits, splits.size());
+
+        // Check tuples
+        int count = 0;
+        for (InputSplit split : splits) {
+            TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
+            RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext);
+            reader.initialize(split, taskContext);
+            count += this.countTuples(reader);
+        }
+        Assert.assertEquals(expectedTuples, count);
+    }
+
+    /**
+     * tuples test with multiple inputs
+     * 
+     * @throws IOException
+     * @throws ClassNotFoundException
+     * @throws InterruptedException
+     */
+    @Test
+    public final void multiple_inputs_01() throws IOException, InterruptedException, ClassNotFoundException {
+        testMultipleInputs(new File[] { empty, small, large }, this.canSplitInputs() ? 2 : 3, EMPTY_SIZE + SMALL_SIZE
+                + LARGE_SIZE);
+    }
+
+    /**
+     * tuples test with multiple inputs
+     * 
+     * @throws IOException
+     * @throws ClassNotFoundException
+     * @throws InterruptedException
+     */
+    @Test
+    public final void multiple_inputs_02() throws IOException, InterruptedException, ClassNotFoundException {
+        testMultipleInputs(new File[] { folder.getRoot() }, this.canSplitInputs() ? 4 : 5, EMPTY_SIZE + SMALL_SIZE + LARGE_SIZE
+                + (MIXED_SIZE / 2));
+    }
+
+    protected final void testSplitInputs(Configuration config, File[] inputs, int expectedSplits, int expectedTuples)
+            throws IOException, InterruptedException {
+        // Set up fake job
+        InputFormat<LongWritable, T> inputFormat = this.getInputFormat();
+        Job job = Job.getInstance(config);
+        job.setInputFormatClass(inputFormat.getClass());
+        for (File input : inputs) {
+            this.addInputPath(input, job.getConfiguration(), job);
+        }
+        JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
+        Assert.assertEquals(inputs.length, FileInputFormat.getInputPaths(context).length);
+
+        // Check splits
+        List<InputSplit> splits = inputFormat.getSplits(context);
+        Assert.assertEquals(expectedSplits, splits.size());
+
+        // Check tuples
+        int count = 0;
+        for (InputSplit split : splits) {
+            // Validate split
+            Assert.assertTrue(this.isValidSplit(split, config));
+
+            // Read split
+            TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
+            RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext);
+            reader.initialize(split, taskContext);
+            count += this.countTuples(reader);
+        }
+        Assert.assertEquals(expectedTuples, count);
+    }
+
+    /**
+     * Determines whether an input split is valid
+     * 
+     * @param split
+     *            Input split
+     * @return True if a valid split, false otherwise
+     * @throws IOException 
+     */
+    protected boolean isValidSplit(InputSplit split, Configuration config) throws IOException {
+        return split instanceof FileSplit;
+    }
+
+    /**
+     * Indicates whether inputs can be split, defaults to true
+     * 
+     * @return Whether inputs can be split
+     */
+    protected boolean canSplitInputs() {
+        return true;
+    }
+
+    /**
+     * Tests for input splitting
+     * 
+     * @throws IOException
+     * @throws InterruptedException
+     * @throws ClassNotFoundException
+     */
+    @Test
+    public final void split_input_01() throws IOException, InterruptedException, ClassNotFoundException {
+        Assume.assumeTrue(this.canSplitInputs());
+
+        Configuration config = this.prepareConfiguration();
+        config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false);
+        Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE));
+        this.testSplitInputs(config, new File[] { small }, 100, SMALL_SIZE);
+    }
+
+    /**
+     * Tests for input splitting
+     * 
+     * @throws IOException
+     * @throws InterruptedException
+     * @throws ClassNotFoundException
+     */
+    @Test
+    public final void split_input_02() throws IOException, InterruptedException, ClassNotFoundException {
+        Assume.assumeTrue(this.canSplitInputs());
+
+        Configuration config = this.prepareConfiguration();
+        config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false);
+        config.setLong(NLineInputFormat.LINES_PER_MAP, 10);
+        Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE));
+        this.testSplitInputs(config, new File[] { small }, 10, SMALL_SIZE);
+    }
+
+    /**
+     * Tests for input splitting
+     * 
+     * @throws IOException
+     * @throws InterruptedException
+     * @throws ClassNotFoundException
+     */
+    @Test
+    public final void split_input_03() throws IOException, InterruptedException, ClassNotFoundException {
+        Assume.assumeTrue(this.canSplitInputs());
+
+        Configuration config = this.prepareConfiguration();
+        config.setBoolean(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, false);
+        config.setLong(NLineInputFormat.LINES_PER_MAP, 100);
+        Assert.assertEquals(Integer.MAX_VALUE, config.getInt(HadoopIOConstants.MAX_LINE_LENGTH, Integer.MAX_VALUE));
+        this.testSplitInputs(config, new File[] { large }, 100, LARGE_SIZE);
+    }
+}
\ No newline at end of file

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractQuadsInputFormatTests.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */ 
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+import com.yarcdata.urika.hadoop.rdf.types.QuadWritable;
+
+/**
+ * Abstract tests for Quad input formats
+ * @author rvesse
+ *
+ */
+public abstract class AbstractQuadsInputFormatTests extends AbstractNodeTupleInputFormatTests<Quad, QuadWritable> {
+
+    @Override
+    protected void generateTuples(Writer writer, int num) throws IOException {
+        for (int i = 0; i < num; i++) {
+            writer.write("<http://subjects/" + i + "> <http://predicate> \"" + i + "\" <http://graphs/" + i + "> .\n");
+        }
+        writer.flush();
+        writer.close();
+    }
+    
+    @Override
+    protected void generateBadTuples(Writer writer, int num) throws IOException {
+        for (int i = 0; i < num; i++) {
+            writer.write("<http://broken\n");
+        }
+        writer.flush();
+        writer.close();
+    }
+
+    @Override
+    protected void generateMixedTuples(Writer writer, int num) throws IOException {
+        boolean bad = false;
+        for (int i = 0; i < num; i++, bad = !bad) {
+            if (bad) {
+                writer.write("<http://broken\n");
+            } else {
+                writer.write("<http://subjects/" + i + "> <http://predicate> \"" + i + "\" <http://graphs/" + i + "> .\n");
+            }
+        }
+        writer.flush();
+        writer.close();
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractTriplesInputFormatTests.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import com.hp.hpl.jena.graph.Triple;
+import com.yarcdata.urika.hadoop.rdf.types.TripleWritable;
+
+/**
+ * Abstract tests for Triple input formats
+ * 
+ * @author rvesse
+ * 
+ */
+public abstract class AbstractTriplesInputFormatTests extends AbstractNodeTupleInputFormatTests<Triple, TripleWritable> {
+
+    @Override
+    protected void generateTuples(Writer writer, int num) throws IOException {
+        for (int i = 0; i < num; i++) {
+            writer.write("<http://subjects/" + i + "> <http://predicate> \"" + i + "\" .\n");
+        }
+        writer.flush();
+        writer.close();
+    }
+
+    @Override
+    protected void generateBadTuples(Writer writer, int num) throws IOException {
+        for (int i = 0; i < num; i++) {
+            writer.write("<http://broken\n");
+        }
+        writer.flush();
+        writer.close();
+    }
+
+    @Override
+    protected void generateMixedTuples(Writer writer, int num) throws IOException {
+        boolean bad = false;
+        for (int i = 0; i < num; i++, bad = !bad) {
+            if (bad) {
+                writer.write("<http://broken\n");
+            } else {
+                writer.write("<http://subjects/" + i + "> <http://predicate> \"" + i + "\" .\n");
+            }
+        }
+        writer.flush();
+        writer.close();
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractWholeFileQuadInputFormatTests.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFDataMgr;
+import org.apache.jena.riot.RDFWriterRegistry;
+
+import com.hp.hpl.jena.query.Dataset;
+import com.hp.hpl.jena.query.DatasetFactory;
+import com.hp.hpl.jena.rdf.model.Model;
+import com.hp.hpl.jena.rdf.model.ModelFactory;
+import com.hp.hpl.jena.rdf.model.Property;
+import com.hp.hpl.jena.rdf.model.Resource;
+import com.hp.hpl.jena.sparql.core.Quad;
+import com.yarcdata.urika.hadoop.rdf.types.QuadWritable;
+
+/**
+ * Abstract tests for Quad input formats
+ * 
+ * @author rvesse
+ * 
+ */
+public abstract class AbstractWholeFileQuadInputFormatTests extends AbstractNodeTupleInputFormatTests<Quad, QuadWritable> {
+
+    @Override
+    protected boolean canSplitInputs() {
+        return false;
+    }
+
+    @SuppressWarnings("deprecation")
+    private void writeTuples(Dataset ds, Writer writer) {
+        RDFDataMgr.write(writer, ds, RDFWriterRegistry.defaultSerialization(this.getRdfLanguage()));
+    }
+
+    /**
+     * Gets the RDF language to write out generate tuples in
+     * 
+     * @return RDF language
+     */
+    protected abstract Lang getRdfLanguage();
+
+    private void writeGoodTuples(Writer writer, int num) throws IOException {
+        Dataset ds = DatasetFactory.createMem();
+        Model m = ModelFactory.createDefaultModel();
+        Resource currSubj = m.createResource("http://example.org/subjects/0");
+        Property predicate = m.createProperty("http://example.org/predicate");
+        for (int i = 0; i < num; i++) {
+            if (i % 100 == 0) {
+                ds.addNamedModel("http://example.org/graphs/" + (i / 100), m);
+                m = ModelFactory.createDefaultModel();
+            }
+            if (i % 10 == 0) {
+                currSubj = m.createResource("http://example.org/subjects/" + (i / 10));
+            }
+            m.add(currSubj, predicate, m.createTypedLiteral(i));
+        }
+        if (!m.isEmpty()) {
+            ds.addNamedModel("http://example.org/graphs/extra", m);
+        }
+        this.writeTuples(ds, writer);
+    }
+
+    @Override
+    protected final void generateTuples(Writer writer, int num) throws IOException {
+        this.writeGoodTuples(writer, num);
+        writer.close();
+    }
+
+    @Override
+    protected final void generateMixedTuples(Writer writer, int num) throws IOException {
+        // Write good data
+        this.writeGoodTuples(writer, num / 2);
+
+        // Write junk data
+        for (int i = 0; i < num / 2; i++) {
+            writer.write("junk data\n");
+        }
+
+        writer.flush();
+        writer.close();
+    }
+
+    @Override
+    protected final void generateBadTuples(Writer writer, int num) throws IOException {
+        for (int i = 0; i < num; i++) {
+            writer.write("junk data\n");
+        }
+        writer.flush();
+        writer.close();
+    }
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/AbstractWholeFileTripleInputFormatTests.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFDataMgr;
+
+import com.hp.hpl.jena.graph.Triple;
+import com.hp.hpl.jena.rdf.model.Model;
+import com.hp.hpl.jena.rdf.model.ModelFactory;
+import com.hp.hpl.jena.rdf.model.Property;
+import com.hp.hpl.jena.rdf.model.Resource;
+import com.yarcdata.urika.hadoop.rdf.types.TripleWritable;
+
+/**
+ * Abstract tests for Triple input formats
+ * 
+ * @author rvesse
+ * 
+ */
+public abstract class AbstractWholeFileTripleInputFormatTests extends AbstractNodeTupleInputFormatTests<Triple, TripleWritable> {
+
+    @Override
+    protected boolean canSplitInputs() {
+        return false;
+    }
+    
+    @SuppressWarnings("deprecation")
+    private void writeTuples(Model m, Writer writer) {
+        RDFDataMgr.write(writer, m, this.getRdfLanguage());
+    }
+    
+    /**
+     * Gets the RDF language to write out generate tuples in
+     * @return RDF language
+     */
+    protected abstract Lang getRdfLanguage();
+    
+    @Override
+    protected final void generateTuples(Writer writer, int num) throws IOException {
+        Model m = ModelFactory.createDefaultModel();
+        Resource currSubj = m.createResource("http://example.org/subjects/0");
+        Property predicate = m.createProperty("http://example.org/predicate");
+        for (int i = 0; i < num; i++) {
+            if (i % 10 == 0) {
+                currSubj = m.createResource("http://example.org/subjects/" + (i / 10));
+            }
+            m.add(currSubj, predicate, m.createTypedLiteral(i));
+        }
+        this.writeTuples(m, writer);
+        writer.close();
+    }
+    
+    @Override
+    protected final void generateMixedTuples(Writer writer, int num) throws IOException {
+        // Write good data
+        Model m = ModelFactory.createDefaultModel();
+        Resource currSubj = m.createResource("http://example.org/subjects/0");
+        Property predicate = m.createProperty("http://example.org/predicate");
+        for (int i = 0; i < num / 2; i++) {
+            if (i % 10 == 0) {
+                currSubj = m.createResource("http://example.org/subjects/" + (i / 10));
+            }
+            m.add(currSubj, predicate, m.createTypedLiteral(i));
+        }
+        this.writeTuples(m, writer);
+        
+        // Write junk data
+        for (int i = 0; i < num / 2; i++) {
+            writer.write("junk data\n");
+        }
+        
+        writer.flush();
+        writer.close();
+    }
+
+    @Override
+    protected final void generateBadTuples(Writer writer, int num) throws IOException {
+        for (int i = 0; i < num; i++) {
+            writer.write("junk data\n");
+        }
+        writer.flush();
+        writer.close();
+    }
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/BlockedNQuadsInputTest.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/BlockedNQuadsInputTest.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/BlockedNQuadsInputTest.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/BlockedNQuadsInputTest.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.jena.riot.Lang;
+
+import com.yarcdata.urika.hadoop.rdf.types.QuadWritable;
+
+/**
+ * Tests for blocked NTriples input
+ * 
+ * @author rvesse
+ * 
+ */
+public class BlockedNQuadsInputTest extends AbstractBlockedQuadInputFormatTests {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.NQUADS;
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return ".nq";
+    }
+
+    @Override
+    protected InputFormat<LongWritable, QuadWritable> getInputFormat() {
+        return new BlockedNQuadsInputFormat();
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/BlockedNTriplesInputTest.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/BlockedNTriplesInputTest.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/BlockedNTriplesInputTest.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/BlockedNTriplesInputTest.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */ 
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.jena.riot.Lang;
+
+import com.yarcdata.urika.hadoop.rdf.types.TripleWritable;
+
+/**
+ * Tests for blocked NTriples input
+ * @author rvesse
+ *
+ */
+public class BlockedNTriplesInputTest extends AbstractBlockedTripleInputFormatTests {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.NTRIPLES;
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return ".nt";
+    }
+
+    @Override
+    protected InputFormat<LongWritable, TripleWritable> getInputFormat() {
+        return new BlockedNTriplesInputFormat();
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/NQuadsInputTest.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/NQuadsInputTest.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/NQuadsInputTest.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/NQuadsInputTest.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */ 
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+
+import com.yarcdata.urika.hadoop.rdf.types.QuadWritable;
+
+/**
+ * Tests for the NQuads input format
+ * @author rvesse
+ *
+ */
+public class NQuadsInputTest extends AbstractQuadsInputFormatTests {
+
+    @Override
+    protected InputFormat<LongWritable, QuadWritable> getInputFormat() {
+        return new NQuadsInputFormat();
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return ".nq";
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/NTriplesInputTest.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/NTriplesInputTest.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/NTriplesInputTest.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/NTriplesInputTest.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+
+import com.yarcdata.urika.hadoop.rdf.types.TripleWritable;
+
+/**
+ * Tests for the {@link NTriplesInputFormat}
+ * 
+ * @author rvesse
+ * 
+ */
+public class NTriplesInputTest extends AbstractTriplesInputFormatTests {
+
+    @Override
+    protected InputFormat<LongWritable, TripleWritable> getInputFormat() {
+        return new NTriplesInputFormat();
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return ".nt";
+    }
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/RdfJsonInputTest.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/RdfJsonInputTest.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/RdfJsonInputTest.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/RdfJsonInputTest.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.jena.riot.Lang;
+
+import com.yarcdata.urika.hadoop.rdf.types.TripleWritable;
+
+/**
+ * Tests for RDF/JSON input
+ * 
+ * @author rvesse
+ * 
+ */
+public class RdfJsonInputTest extends AbstractWholeFileTripleInputFormatTests {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.RDFJSON;
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return ".rj";
+    }
+
+    @Override
+    protected InputFormat<LongWritable, TripleWritable> getInputFormat() {
+        return new RdfJsonInputFormat();
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/RdfXmlInputTest.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/RdfXmlInputTest.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/RdfXmlInputTest.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/RdfXmlInputTest.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.jena.riot.Lang;
+
+import com.yarcdata.urika.hadoop.rdf.types.TripleWritable;
+
+/**
+ * Tests for RDF/XML input
+ * 
+ * @author rvesse
+ * 
+ */
+public class RdfXmlInputTest extends AbstractWholeFileTripleInputFormatTests {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.RDFXML;
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return ".rdf";
+    }
+
+    @Override
+    protected InputFormat<LongWritable, TripleWritable> getInputFormat() {
+        return new RdfXmlInputFormat();
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/TriGInputTest.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/TriGInputTest.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/TriGInputTest.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/TriGInputTest.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.jena.riot.Lang;
+
+import com.yarcdata.urika.hadoop.rdf.types.QuadWritable;
+
+/**
+ * Tests for TriG input
+ * @author rvesse
+ *
+ */
+public class TriGInputTest extends AbstractWholeFileQuadInputFormatTests {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.TRIG;
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return ".trig";
+    }
+
+    @Override
+    protected InputFormat<LongWritable, QuadWritable> getInputFormat() {
+        return new TriGInputFormat();
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/TurtleInputTest.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/TurtleInputTest.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/TurtleInputTest.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/TurtleInputTest.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.jena.riot.Lang;
+
+import com.yarcdata.urika.hadoop.rdf.types.TripleWritable;
+
+/**
+ * Tests for turtle input format
+ * 
+ * @author rvesse
+ * 
+ */
+public class TurtleInputTest extends AbstractWholeFileTripleInputFormatTests {
+
+    @Override
+    protected final String getFileExtension() {
+        return ".ttl";
+    }
+
+    @Override
+    protected final Lang getRdfLanguage() {
+        return Lang.TURTLE;
+    }
+    
+    @Override
+    protected InputFormat<LongWritable, TripleWritable> getInputFormat() {
+        return new TurtleInputFormat();
+    }
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/WholeFileNQuadsInputTest.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/WholeFileNQuadsInputTest.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/WholeFileNQuadsInputTest.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/WholeFileNQuadsInputTest.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.jena.riot.Lang;
+
+import com.yarcdata.urika.hadoop.rdf.types.QuadWritable;
+
+/**
+ * Tests for NQuads input
+ * 
+ * @author rvesse
+ * 
+ */
+public class WholeFileNQuadsInputTest extends AbstractWholeFileQuadInputFormatTests {
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.NQUADS;
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return ".nq";
+    }
+
+    @Override
+    protected InputFormat<LongWritable, QuadWritable> getInputFormat() {
+        return new WholeFileNQuadsInputFormat();
+    }
+
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/WholeFileNTriplesInputTest.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/WholeFileNTriplesInputTest.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/WholeFileNTriplesInputTest.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/WholeFileNTriplesInputTest.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.jena.riot.Lang;
+
+import com.yarcdata.urika.hadoop.rdf.types.TripleWritable;
+
+/**
+ * Tests for the {@link NTriplesInputFormat}
+ * 
+ * @author rvesse
+ * 
+ */
+public class WholeFileNTriplesInputTest extends AbstractWholeFileTripleInputFormatTests {
+
+    @Override
+    protected InputFormat<LongWritable, TripleWritable> getInputFormat() {
+        return new WholeFileNTriplesInputFormat();
+    }
+
+    @Override
+    protected String getFileExtension() {
+        return ".nt";
+    }
+
+    @Override
+    protected Lang getRdfLanguage() {
+        return Lang.NTRIPLES;
+    }
+
+    
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/compressed/AbstractCompressedNodeTupleInputFormatTests.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/compressed/AbstractCompressedNodeTupleInputFormatTests.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/compressed/AbstractCompressedNodeTupleInputFormatTests.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/compressed/AbstractCompressedNodeTupleInputFormatTests.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input.compressed;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import com.yarcdata.urika.hadoop.rdf.io.HadoopIOConstants;
+import com.yarcdata.urika.hadoop.rdf.io.input.AbstractNodeTupleInputFormatTests;
+import com.yarcdata.urika.hadoop.rdf.types.AbstractNodeTupleWritable;
+
+/**
+ * @author rvesse
+ * 
+ * @param <TValue>
+ * @param <T>
+ */
+public abstract class AbstractCompressedNodeTupleInputFormatTests<TValue, T extends AbstractNodeTupleWritable<TValue>> extends
+        AbstractNodeTupleInputFormatTests<TValue, T> {
+
+    @Override
+    protected Configuration prepareConfiguration() {
+        Configuration config = super.prepareConfiguration();
+        config.set(HadoopIOConstants.IO_COMPRESSION_CODECS, this.getCompressionCodec().getClass().getCanonicalName());
+        return config;
+    }
+
+    @Override
+    protected Writer getWriter(File f) throws IOException {
+        CompressionCodec codec = this.getCompressionCodec();
+        if (codec instanceof Configurable) {
+            ((Configurable) codec).setConf(this.prepareConfiguration());
+        }
+        FileOutputStream fileOutput = new FileOutputStream(f, false);
+        OutputStream output = codec.createOutputStream(fileOutput);
+        return new OutputStreamWriter(output);
+    }
+
+    /**
+     * Gets the compression codec to use
+     * 
+     * @return Compression codec
+     */
+    protected abstract CompressionCodec getCompressionCodec();
+
+    /**
+     * Indicates whether inputs can be split, defaults to false for compressed
+     * input tests
+     */
+    @Override
+    protected boolean canSplitInputs() {
+        return false;
+    }
+}

Added: jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/compressed/AbstractCompressedQuadsInputFormatTests.java
URL: http://svn.apache.org/viewvc/jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/compressed/AbstractCompressedQuadsInputFormatTests.java?rev=1583942&view=auto
==============================================================================
--- jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/compressed/AbstractCompressedQuadsInputFormatTests.java (added)
+++ jena/Experimental/hadoop-rdf/hadoop-rdf-io/src/test/java/com/yarcdata/urika/hadoop/rdf/io/input/compressed/AbstractCompressedQuadsInputFormatTests.java Wed Apr  2 09:20:51 2014
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2013 YarcData LLC All Rights Reserved.
+ */
+
+package com.yarcdata.urika.hadoop.rdf.io.input.compressed;
+
+import java.io.IOException;
+import java.io.Writer;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+import com.yarcdata.urika.hadoop.rdf.types.QuadWritable;
+
+/**
+ * Abstract tests for Quad input formats
+ * 
+ * @author rvesse
+ * 
+ */
+public abstract class AbstractCompressedQuadsInputFormatTests extends
+        AbstractCompressedNodeTupleInputFormatTests<Quad, QuadWritable> {
+
+    @Override
+    protected void generateTuples(Writer writer, int num) throws IOException {
+        for (int i = 0; i < num; i++) {
+            writer.write("<http://subjects/" + i + "> <http://predicate> \"" + i + "\" <http://graphs/" + i + "> .\n");
+        }
+        writer.flush();
+        writer.close();
+    }
+
+    @Override
+    protected void generateBadTuples(Writer writer, int num) throws IOException {
+        for (int i = 0; i < num; i++) {
+            writer.write("<http://broken\n");
+        }
+        writer.flush();
+        writer.close();
+    }
+
+    @Override
+    protected void generateMixedTuples(Writer writer, int num) throws IOException {
+        boolean bad = false;
+        for (int i = 0; i < num; i++, bad = !bad) {
+            if (bad) {
+                writer.write("<http://broken\n");
+            } else {
+                writer.write("<http://subjects/" + i + "> <http://predicate> \"" + i + "\" <http://graphs/" + i + "> .\n");
+            }
+        }
+        writer.flush();
+        writer.close();
+    }
+}



Mime
View raw message