jena-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rve...@apache.org
Subject [31/50] [abbrv] Get Jena Hadoop RDF Tools building
Date Mon, 20 Oct 2014 14:47:53 GMT
http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducer.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducer.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducer.java
new file mode 100644
index 0000000..daf61d4
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducer.java
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.characteristics;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
+import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable;
+import org.apache.jena.hadoop.rdf.types.CharacteristicWritable;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Abstract reducer which takes in tuples grouped by some node and generating
+ * initial characteristic sets.
+ * <p>
+ * This produces the characteristic sets as both the key and value so that in a
+ * subsequent job the characteristic steps may be further combined together to
+ * total up the usage counts appropriately.
+ * </p>
+ * <p>
+ * It is important to note that the output from this mapper can be very large
+ * and since it typically needs to be written to HDFS before being processed by
+ * further jobs it is strongly recommended that you use appropriate output
+ * compression
+ * </p>
+ * 
+ * 
+ * 
+ * @param <TValue>
+ *            Tuple type
+ * @param <T>
+ *            Writable tuple type
+ */
+public abstract class AbstractCharacteristicSetGeneratingReducer<TValue, T extends AbstractNodeTupleWritable<TValue>> extends
+        Reducer<NodeWritable, T, CharacteristicSetWritable, NullWritable> {
+
+    private static final Logger LOG = LoggerFactory.getLogger(AbstractCharacteristicSetGeneratingReducer.class);
+
+    private boolean tracing = false;
+
+    @Override
+    protected void setup(Context context) throws IOException, InterruptedException {
+        super.setup(context);
+        this.tracing = LOG.isTraceEnabled();
+    }
+
+    @Override
+    protected void reduce(NodeWritable key, Iterable<T> values, Context context) throws IOException, InterruptedException {
+        Map<NodeWritable, CharacteristicWritable> characteristics = new TreeMap<NodeWritable, CharacteristicWritable>();
+
+        // Firstly need to find individual characteristics
+        Iterator<T> iter = values.iterator();
+        while (iter.hasNext()) {
+            T tuple = iter.next();
+            NodeWritable predicate = this.getPredicate(tuple);
+
+            if (characteristics.containsKey(predicate)) {
+                characteristics.get(predicate).increment();
+            } else {
+                characteristics.put(predicate, new CharacteristicWritable(predicate.get()));
+            }
+        }
+
+        // Then we need to produce all the possible characteristic sets based on
+        // this information
+        List<CharacteristicWritable> cs = new ArrayList<CharacteristicWritable>(characteristics.values());
+        if (cs.size() == 0)
+            return;
+        for (int i = 1; i <= cs.size(); i++) {
+            this.outputSets(cs, i, context);
+        }
+    }
+
+    /**
+     * Output all sets of a given size
+     * 
+     * @param cs
+     *            Characteristics
+     * @param perSet
+     *            Set size
+     * @param context
+     *            Context to output sets to
+     * @throws IOException
+     * @throws InterruptedException
+     */
+    protected void outputSets(List<CharacteristicWritable> cs, int perSet, Context context) throws IOException,
+            InterruptedException {
+        if (perSet == 1) {
+            for (CharacteristicWritable c : cs) {
+                CharacteristicSetWritable set = new CharacteristicSetWritable(c);
+                context.write(set, NullWritable.get());
+                if (this.tracing) {
+                    LOG.trace("Key = {}", set);
+                }
+            }
+        } else if (perSet == cs.size()) {
+            CharacteristicSetWritable set = new CharacteristicSetWritable();
+            for (CharacteristicWritable c : cs) {
+                set.add(c);
+            }
+            context.write(set, NullWritable.get());
+            if (this.tracing) {
+                LOG.trace("Key = {}", set);
+            }
+        } else {
+            CharacteristicWritable[] members = new CharacteristicWritable[perSet];
+            this.combinations(cs, perSet, 0, members, context);
+        }
+    }
+
+    /**
+     * Calculate all available combinations of N elements from the given
+     * characteristics
+     * 
+     * @param cs
+     *            Characteristics
+     * @param len
+     *            Desired number of elements
+     * @param startPosition
+     *            Start position
+     * @param result
+     *            Result array to fill
+     * @param context
+     *            Context to write completed combinations to
+     * @throws IOException
+     * @throws InterruptedException
+     */
+    protected final void combinations(List<CharacteristicWritable> cs, int len, int startPosition,
+            CharacteristicWritable[] result, Context context) throws IOException, InterruptedException {
+        if (len == 0) {
+            CharacteristicSetWritable set = new CharacteristicSetWritable(result);
+            context.write(set, NullWritable.get());
+            if (this.tracing) {
+                LOG.trace("Key = {}", set);
+            }
+            return;
+        }
+        for (int i = startPosition; i <= cs.size() - len; i++) {
+            result[result.length - len] = cs.get(i);
+            combinations(cs, len - 1, i + 1, result, context);
+        }
+    }
+
+    /**
+     * Gets the predicate for the tuple
+     * 
+     * @param tuple
+     *            Tuple
+     * @return
+     */
+    protected abstract NodeWritable getPredicate(T tuple);
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducer.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducer.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducer.java
new file mode 100644
index 0000000..e70698a
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducer.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.characteristics;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Reducer which takes in characteristic sets and sums up all their usage counts
+ * 
+ * 
+ */
+public class CharacteristicSetReducer extends
+        Reducer<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> {
+
+    private static final Logger LOG = LoggerFactory.getLogger(CharacteristicSetReducer.class);
+    private boolean tracing = false;
+
+    @Override
+    protected void setup(Context context) throws IOException, InterruptedException {
+        super.setup(context);
+        this.tracing = LOG.isTraceEnabled();
+    }
+
+    @Override
+    protected void reduce(CharacteristicSetWritable key, Iterable<CharacteristicSetWritable> values, Context context)
+            throws IOException, InterruptedException {
+        Iterator<CharacteristicSetWritable> iter = values.iterator();
+        CharacteristicSetWritable output = new CharacteristicSetWritable(0);
+
+        if (this.tracing) {
+            LOG.trace("Key = {}", key);
+        }
+
+        while (iter.hasNext()) {
+            CharacteristicSetWritable set = iter.next();
+            if (this.tracing) {
+                LOG.trace("Value = {}", set);
+            }
+            output.add(set);
+        }
+
+        context.write(output, NullWritable.get());
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/QuadCharacteristicSetGeneratingReducer.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/QuadCharacteristicSetGeneratingReducer.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/QuadCharacteristicSetGeneratingReducer.java
new file mode 100644
index 0000000..d11cd56
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/QuadCharacteristicSetGeneratingReducer.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.characteristics;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A reducer which converts quads grouped by some node into characteristic sets
+ * 
+ * 
+ * 
+ */
+public class QuadCharacteristicSetGeneratingReducer extends AbstractCharacteristicSetGeneratingReducer<Quad, QuadWritable> {
+
+    @Override
+    protected NodeWritable getPredicate(QuadWritable tuple) {
+        return new NodeWritable(tuple.get().getPredicate());
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducer.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducer.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducer.java
new file mode 100644
index 0000000..6515c91
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducer.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.characteristics;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * A reducer which converts triples grouped by some node into characteristic
+ * sets
+ * 
+ * 
+ * 
+ */
+public class TripleCharacteristicSetGeneratingReducer extends AbstractCharacteristicSetGeneratingReducer<Triple, TripleWritable> {
+
+    @Override
+    protected NodeWritable getPredicate(TripleWritable tuple) {
+        return new NodeWritable(tuple.get().getPredicate());
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountMapper.java
new file mode 100644
index 0000000..7c56d1c
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountMapper.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+
+
+/**
+ * Abstract mapper class for mappers which split node tuple values into pairs of
+ * node keys with a long value of 1. Can be used in conjunction with a
+ * {@link NodeCountReducer} to count the usages of each unique node.
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ * @param <TValue>
+ *            Tuple type
+ * @param <T>
+ *            Writable tuple type
+ */
+public abstract class AbstractNodeTupleNodeCountMapper<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>> extends
+        Mapper<TKey, T, NodeWritable, LongWritable> {
+    
+    private LongWritable initialCount = new LongWritable(1);
+
+    @Override
+    protected void map(TKey key, T value, Context context) throws IOException,
+            InterruptedException {
+        NodeWritable[] ns = this.getNodes(value);
+        for (NodeWritable n : ns) {
+            context.write(n, this.initialCount);
+        }
+    }
+
+    /**
+     * Gets the nodes of the tuple which are to be counted
+     * 
+     * @param tuple
+     *            Tuple
+     * @return Nodes
+     */
+    protected abstract NodeWritable[] getNodes(T tuple);
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/NodeCountReducer.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/NodeCountReducer.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/NodeCountReducer.java
new file mode 100644
index 0000000..a5460f6
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/NodeCountReducer.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+
+
+/**
+ * A reducer which takes node keys with a sequence of longs representing counts
+ * as the values and sums the counts together into pairs consisting of a node
+ * key and a count value.
+ * 
+ * 
+ * 
+ */
+public class NodeCountReducer extends Reducer<NodeWritable, LongWritable, NodeWritable, LongWritable> {
+
+    @Override
+    protected void reduce(NodeWritable key, Iterable<LongWritable> values, Context context) throws IOException,
+            InterruptedException {
+        long count = 0;
+        Iterator<LongWritable> iter = values.iterator();
+        while (iter.hasNext()) {
+            count += iter.next().get();
+        }
+        context.write(key, new LongWritable(count));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/QuadNodeCountMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/QuadNodeCountMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/QuadNodeCountMapper.java
new file mode 100644
index 0000000..5c2485b
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/QuadNodeCountMapper.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A mapper for counting node usages within quads designed primarily for use in
+ * conjunction with {@link NodeCountReducer}
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class QuadNodeCountMapper<TKey> extends AbstractNodeTupleNodeCountMapper<TKey, Quad, QuadWritable> {
+
+    @Override
+    protected NodeWritable[] getNodes(QuadWritable tuple) {
+        Quad q = tuple.get();
+        return new NodeWritable[] { new NodeWritable(q.getGraph()), new NodeWritable(q.getSubject()),
+                new NodeWritable(q.getPredicate()), new NodeWritable(q.getObject()) };
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/TripleNodeCountMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/TripleNodeCountMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/TripleNodeCountMapper.java
new file mode 100644
index 0000000..4108841
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/TripleNodeCountMapper.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count;
+
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * A mapper for counting node usages within triples designed primarily for use
+ * in conjunction with {@link NodeCountReducer}
+ * 
+ * 
+ * 
+ * @param <TKey> Key type
+ */
+public class TripleNodeCountMapper<TKey> extends AbstractNodeTupleNodeCountMapper<TKey, Triple, TripleWritable> {
+
+    @Override
+    protected NodeWritable[] getNodes(TripleWritable tuple) {
+        Triple t = tuple.get();
+        return new NodeWritable[] { new NodeWritable(t.getSubject()), new NodeWritable(t.getPredicate()),
+                new NodeWritable(t.getObject()) };
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/datatypes/QuadDataTypeCountMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/datatypes/QuadDataTypeCountMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/datatypes/QuadDataTypeCountMapper.java
new file mode 100644
index 0000000..7ea7ead
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/datatypes/QuadDataTypeCountMapper.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count.datatypes;
+
+import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.count.QuadNodeCountMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.NodeFactory;
+
+/**
+ * A mapper for counting data type usages within quads designed primarily for
+ * use in conjunction with {@link NodeCountReducer}
+ * <p>
+ * This mapper extracts the data types for typed literal objects and converts
+ * them into nodes so they can be counted
+ * </p>
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class QuadDataTypeCountMapper<TKey> extends QuadNodeCountMapper<TKey> {
+
+    private static final NodeWritable[] EMPTY = new NodeWritable[0];
+
+    @Override
+    protected NodeWritable[] getNodes(QuadWritable tuple) {
+        Node object = tuple.get().getObject();
+        if (!object.isLiteral())
+            return EMPTY;
+        String dtUri = object.getLiteralDatatypeURI();
+        if (dtUri == null)
+            return EMPTY;
+        return new NodeWritable[] { new NodeWritable(NodeFactory.createURI(dtUri)) };
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/datatypes/TripleDataTypeCountMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/datatypes/TripleDataTypeCountMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/datatypes/TripleDataTypeCountMapper.java
new file mode 100644
index 0000000..2c294be
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/datatypes/TripleDataTypeCountMapper.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count.datatypes;
+
+import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.NodeFactory;
+
+/**
+ * A mapper for counting data type usages within triples designed primarily for
+ * use in conjunction with {@link NodeCountReducer}
+ * <p>
+ * This mapper extracts the data types for typed literal objects and converts
+ * them into nodes so they can be counted
+ * </p>
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class TripleDataTypeCountMapper<TKey> extends TripleNodeCountMapper<TKey> {
+
+    private static final NodeWritable[] EMPTY = new NodeWritable[0];
+
+    @Override
+    protected NodeWritable[] getNodes(TripleWritable tuple) {
+        Node object = tuple.get().getObject();
+        if (!object.isLiteral())
+            return EMPTY;
+        String dtUri = object.getLiteralDatatypeURI();
+        if (dtUri == null)
+            return EMPTY;
+        return new NodeWritable[] { new NodeWritable(NodeFactory.createURI(dtUri)) };
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/namespaces/AbstractNodeTupleNamespaceCountMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/namespaces/AbstractNodeTupleNamespaceCountMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/namespaces/AbstractNodeTupleNamespaceCountMapper.java
new file mode 100644
index 0000000..ba47765
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/namespaces/AbstractNodeTupleNamespaceCountMapper.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count.namespaces;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.jena.hadoop.rdf.mapreduce.TextCountReducer;
+import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+
+import com.hp.hpl.jena.graph.Node;
+
+/**
+ * Abstract mapper class for mappers which split node tuple values and extract
+ * the namespace URIs they use and outputs pairs of namespaces keys with a long
+ * value of 1. Can be used in conjunction with a {@link TextCountReducer} to
+ * count the usages of each unique namespace.
+ * 
+ * 
+ * 
+ * @param <TKey>
+ * @param <TValue>
+ * @param <T>
+ */
+public abstract class AbstractNodeTupleNamespaceCountMapper<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>> extends
+        Mapper<TKey, T, Text, LongWritable> {
+
+    private LongWritable initialCount = new LongWritable(1);
+    protected static final String NO_NAMESPACE = null;
+
+    @Override
+    protected void map(TKey key, T value, Context context) throws IOException, InterruptedException {
+        NodeWritable[] ns = this.getNodes(value);
+        for (NodeWritable n : ns) {
+            String namespace = this.extractNamespace(n);
+            if (namespace != null) {
+                context.write(new Text(namespace), this.initialCount);
+            }
+        }
+    }
+
+    /**
+     * Extracts the namespace from a node
+     * <p>
+     * Finds the URI for the node (if any) and then invokes
+     * {@link #extractNamespace(String)} to extract the actual namespace URI.
+     * </p>
+     * <p>
+     * Derived classes may override this to change the logic of how namespaces
+     * are extracted.
+     * </p>
+     * 
+     * @param nw
+     *            Node
+     * @return Namespace
+     */
+    protected String extractNamespace(NodeWritable nw) {
+        Node n = nw.get();
+        if (n.isBlank() || n.isVariable())
+            return NO_NAMESPACE;
+        if (n.isLiteral()) {
+            String dtUri = n.getLiteralDatatypeURI();
+            if (dtUri == null)
+                return NO_NAMESPACE;
+            return extractNamespace(dtUri);
+        }
+        return extractNamespace(n.getURI());
+    }
+
+    /**
+     * Extracts the namespace from a URI
+     * <p>
+     * First tries to extract a hash based namespace. If that is not possible it
+     * tries to extract a slash based namespace, if this is not possible then
+     * the full URI is returned.
+     * </p>
+     * <p>
+     * Derived classes may override this to change the logic of how namespaces
+     * are extracted.
+     * </p>
+     * 
+     * @param uri
+     *            URI
+     * @return Namespace
+     */
+    protected String extractNamespace(String uri) {
+        if (uri.contains("#")) {
+            // Extract hash namespace
+            return uri.substring(0, uri.lastIndexOf('#') + 1);
+        } else if (uri.contains("/")) {
+            // Ensure that this is not immediately after the scheme component or
+            // at end of URI
+            int index = uri.lastIndexOf('/');
+            int schemeSepIndex = uri.indexOf(':');
+            if (index - schemeSepIndex <= 2 || index == uri.length() - 1) {
+                // Use full URI
+                return uri;
+            }
+
+            // Otherwise safe to extract slash namespace
+            return uri.substring(0, uri.lastIndexOf('/') + 1);
+        } else {
+            // Use full URI
+            return uri;
+        }
+    }
+
+    /**
+     * Gets the nodes of the tuple whose namespaces are to be counted
+     * 
+     * @param tuple
+     *            Tuple
+     * @return Nodes
+     */
+    protected abstract NodeWritable[] getNodes(T tuple);
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/namespaces/QuadNamespaceCountMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/namespaces/QuadNamespaceCountMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/namespaces/QuadNamespaceCountMapper.java
new file mode 100644
index 0000000..97ee546
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/namespaces/QuadNamespaceCountMapper.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count.namespaces;
+
+import org.apache.jena.hadoop.rdf.mapreduce.TextCountReducer;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A mapper for counting namespace usages within quads designed primarily for
+ * use in conjunction with the {@link TextCountReducer}
+ * 
+ * 
+ * 
+ * @param <TKey>
+ */
+public class QuadNamespaceCountMapper<TKey> extends AbstractNodeTupleNamespaceCountMapper<TKey, Quad, QuadWritable> {
+
+    @Override
+    protected NodeWritable[] getNodes(QuadWritable tuple) {
+        Quad q = tuple.get();
+        return new NodeWritable[] { new NodeWritable(q.getGraph()), new NodeWritable(q.getSubject()),
+                new NodeWritable(q.getPredicate()), new NodeWritable(q.getObject()) };
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/namespaces/TripleNamespaceCountMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/namespaces/TripleNamespaceCountMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/namespaces/TripleNamespaceCountMapper.java
new file mode 100644
index 0000000..eacef7a
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/namespaces/TripleNamespaceCountMapper.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count.namespaces;
+
+import org.apache.jena.hadoop.rdf.mapreduce.TextCountReducer;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * A mapper for counting namespace usages within triples designed primarily for
+ * use in conjunction with the {@link TextCountReducer}
+ * 
+ * 
+ * 
+ * @param <TKey>
+ */
+public class TripleNamespaceCountMapper<TKey> extends AbstractNodeTupleNamespaceCountMapper<TKey, Triple, TripleWritable> {
+
+    @Override
+    protected NodeWritable[] getNodes(TripleWritable tuple) {
+        Triple t = tuple.get();
+        return new NodeWritable[] { new NodeWritable(t.getSubject()), new NodeWritable(t.getPredicate()),
+                new NodeWritable(t.getObject()) };
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/QuadObjectCountMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/QuadObjectCountMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/QuadObjectCountMapper.java
new file mode 100644
index 0000000..ef19623
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/QuadObjectCountMapper.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count.positional;
+
+import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.count.QuadNodeCountMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+
+/**
+ * A mapper for counting object node usages within quads designed primarily for
+ * use in conjunction with {@link NodeCountReducer}
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class QuadObjectCountMapper<TKey> extends QuadNodeCountMapper<TKey> {
+
+    @Override
+    protected NodeWritable[] getNodes(QuadWritable tuple) {
+        return new NodeWritable[] { new NodeWritable(tuple.get().getObject()) };
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/QuadPredicateCountMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/QuadPredicateCountMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/QuadPredicateCountMapper.java
new file mode 100644
index 0000000..0ef0731
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/QuadPredicateCountMapper.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count.positional;
+
+import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.count.QuadNodeCountMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+
+/**
+ * A mapper for counting predicate node usages within quads designed primarily
+ * for use in conjunction with {@link NodeCountReducer}
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class QuadPredicateCountMapper<TKey> extends QuadNodeCountMapper<TKey> {
+
+    @Override
+    protected NodeWritable[] getNodes(QuadWritable tuple) {
+        return new NodeWritable[] { new NodeWritable(tuple.get().getPredicate()) };
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/QuadSubjectCountMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/QuadSubjectCountMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/QuadSubjectCountMapper.java
new file mode 100644
index 0000000..2b48e79
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/QuadSubjectCountMapper.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count.positional;
+
+import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.count.QuadNodeCountMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+
+/**
+ * A mapper for counting subject node usages within quads designed primarily for use
+ * in conjunction with {@link NodeCountReducer}
+ * 
+ * 
+ * 
+ * @param <TKey> Key type
+ */
+public class QuadSubjectCountMapper<TKey> extends QuadNodeCountMapper<TKey> {
+
+    @Override
+    protected NodeWritable[] getNodes(QuadWritable tuple) {
+        return new NodeWritable[] { new NodeWritable(tuple.get().getSubject()) };
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/TripleObjectCountMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/TripleObjectCountMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/TripleObjectCountMapper.java
new file mode 100644
index 0000000..16250eb
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/TripleObjectCountMapper.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count.positional;
+
+import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+
+/**
+ * A mapper for counting object node usages within triples designed primarily for use
+ * in conjunction with {@link NodeCountReducer}
+ * 
+ * 
+ * 
+ * @param <TKey> Key type
+ */
+public class TripleObjectCountMapper<TKey> extends TripleNodeCountMapper<TKey> {
+
+    @Override
+    protected NodeWritable[] getNodes(TripleWritable tuple) {
+        return new NodeWritable[] { new NodeWritable(tuple.get().getObject()) };
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/TriplePredicateCountMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/TriplePredicateCountMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/TriplePredicateCountMapper.java
new file mode 100644
index 0000000..de97a9b
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/TriplePredicateCountMapper.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count.positional;
+
+import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+
+/**
+ * A mapper for counting predicate node usages within triples designed primarily
+ * for use in conjunction with {@link NodeCountReducer}
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class TriplePredicateCountMapper<TKey> extends TripleNodeCountMapper<TKey> {
+
+    @Override
+    protected NodeWritable[] getNodes(TripleWritable tuple) {
+        return new NodeWritable[] { new NodeWritable(tuple.get().getPredicate()) };
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/TripleSubjectCountMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/TripleSubjectCountMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/TripleSubjectCountMapper.java
new file mode 100644
index 0000000..5dc3838
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/count/positional/TripleSubjectCountMapper.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.count.positional;
+
+import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer;
+import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper;
+import org.apache.jena.hadoop.rdf.types.NodeWritable;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+
+/**
+ * A mapper for counting subject node usages within triples designed primarily for use
+ * in conjunction with {@link NodeCountReducer}
+ * 
+ * 
+ * 
+ * @param <TKey> Key type
+ */
+public class TripleSubjectCountMapper<TKey> extends TripleNodeCountMapper<TKey> {
+
+    @Override
+    protected NodeWritable[] getNodes(TripleWritable tuple) {
+        return new NodeWritable[] { new NodeWritable(tuple.get().getSubject()) };
+    }
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/AbstractNodeTupleFilterMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/AbstractNodeTupleFilterMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/AbstractNodeTupleFilterMapper.java
new file mode 100644
index 0000000..1ecec75
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/AbstractNodeTupleFilterMapper.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.filter;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.jena.hadoop.rdf.mapreduce.RdfMapReduceConstants;
+import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
+
+
+/**
+ * Abstract mapper implementation which helps in filtering tuples from the
+ * input, derived implementations provide an implementation of the
+ * {@link #accepts(TKey, T)}
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ * @param <TValue>
+ *            Tuple type
+ * @param <T>
+ *            Writable tuple type
+ */
+@SuppressWarnings("javadoc")
+public abstract class AbstractNodeTupleFilterMapper<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>> extends
+        Mapper<TKey, T, TKey, T> {
+
+    private boolean invert = false;
+
+    @Override
+    protected void setup(Context context) throws IOException, InterruptedException {
+        super.setup(context);
+        this.invert = context.getConfiguration().getBoolean(RdfMapReduceConstants.FILTER_INVERT, this.invert);
+    }
+
+    @Override
+    protected final void map(TKey key, T value, Context context) throws IOException, InterruptedException {
+        if (this.accepts(key, value)) {
+            if (!this.invert)
+                context.write(key, value);
+        } else if (this.invert) {
+            context.write(key, value);
+        }
+    }
+
+    /**
+     * Gets whether the mapper accepts the key value pair and will pass it as
+     * output
+     * 
+     * @param key
+     *            Key
+     * @param tuple
+     *            Tuple value
+     * @return True if the mapper accepts the given key value pair, false
+     *         otherwise
+     */
+    protected abstract boolean accepts(TKey key, T tuple);
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/AbstractQuadFilterMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/AbstractQuadFilterMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/AbstractQuadFilterMapper.java
new file mode 100644
index 0000000..3caf051
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/AbstractQuadFilterMapper.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.filter;
+
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * Abstract mapper implementation for filtering quads
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public abstract class AbstractQuadFilterMapper<TKey> extends AbstractNodeTupleFilterMapper<TKey, Quad, QuadWritable> {
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/AbstractTripleFilterMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/AbstractTripleFilterMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/AbstractTripleFilterMapper.java
new file mode 100644
index 0000000..e99e369
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/AbstractTripleFilterMapper.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.filter;
+
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * Abstract mapper implementation for filtering triples
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public abstract class AbstractTripleFilterMapper<TKey> extends AbstractNodeTupleFilterMapper<TKey, Triple, TripleWritable> {
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/GroundQuadFilterMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/GroundQuadFilterMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/GroundQuadFilterMapper.java
new file mode 100644
index 0000000..c2a6ab9
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/GroundQuadFilterMapper.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.filter;
+
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A quad filter which accepts only ground quads i.e. those with no blank nodes
+ * or variables
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class GroundQuadFilterMapper<TKey> extends AbstractQuadFilterMapper<TKey> {
+
+    @Override
+    protected boolean accepts(Object key, QuadWritable tuple) {
+        Quad q = tuple.get();
+        if (!q.isConcrete())
+            return false;
+        // Ground if all nodes are URI/Literal
+        return (q.getGraph().isURI() || q.getGraph().isLiteral()) && (q.getSubject().isURI() || q.getSubject().isLiteral())
+                && (q.getPredicate().isURI() || q.getPredicate().isLiteral())
+                && (q.getObject().isURI() || q.getObject().isLiteral());
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/GroundTripleFilterMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/GroundTripleFilterMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/GroundTripleFilterMapper.java
new file mode 100644
index 0000000..f83a0e5
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/GroundTripleFilterMapper.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.filter;
+
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * A triple filter which accepts only ground triples i.e. those with no blank
+ * nodes or variables
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class GroundTripleFilterMapper<TKey> extends AbstractTripleFilterMapper<TKey> {
+
+    @Override
+    protected boolean accepts(Object key, TripleWritable tuple) {
+        Triple t = tuple.get();
+        if (!t.isConcrete())
+            return false;
+        // Ground if all nodes are URI/Literal
+        return (t.getSubject().isURI() || t.getSubject().isLiteral())
+                && (t.getPredicate().isURI() || t.getPredicate().isLiteral())
+                && (t.getObject().isURI() || t.getObject().isLiteral());
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/ValidQuadFilterMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/ValidQuadFilterMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/ValidQuadFilterMapper.java
new file mode 100644
index 0000000..86771fa
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/ValidQuadFilterMapper.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.filter;
+
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * A quad filter mapper which accepts only valid quads, by which we mean they
+ * meet the following criteria:
+ * <ul>
+ * <li>Graph is a URI or Blank Node</li>
+ * <li>Subject is a URI or Blank Node</li>
+ * <li>Predicate is a URI</li>
+ * <li>Object is a URI, Blank Node or Literal</li>
+ * </ul>
+ * 
+ * 
+ * 
+ * @param <TKey>
+ */
+public final class ValidQuadFilterMapper<TKey> extends AbstractQuadFilterMapper<TKey> {
+
+    @Override
+    protected final boolean accepts(TKey key, QuadWritable tuple) {
+        Quad q = tuple.get();
+        return (q.getGraph().isURI() || q.getGraph().isBlank()) && (q.getSubject().isURI() || q.getSubject().isBlank())
+                && q.getPredicate().isURI() && (q.getObject().isURI() || q.getObject().isBlank() || q.getObject().isLiteral());
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/ValidTripleFilterMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/ValidTripleFilterMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/ValidTripleFilterMapper.java
new file mode 100644
index 0000000..3a13172
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/ValidTripleFilterMapper.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.filter;
+
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * A triple filter mapper which accepts only valid triples, by which we mean they
+ * meet the following criteria:
+ * <ul>
+ * <li>Subject is a URI or Blank Node</li>
+ * <li>Predicate is a URI</li>
+ * <li>Object is a URI, Blank Node or Literal</li>
+ * </ul>
+ * 
+ * 
+ * 
+ * @param <TKey>
+ */
+public final class ValidTripleFilterMapper<TKey> extends AbstractTripleFilterMapper<TKey> {
+
+    @Override
+    protected final boolean accepts(TKey key, TripleWritable tuple) {
+        Triple t = tuple.get();
+        return (t.getSubject().isURI() || t.getSubject().isBlank()) && t.getPredicate().isURI()
+                && (t.getObject().isURI() || t.getObject().isBlank() || t.getObject().isLiteral());
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/AbstractQuadFilterByPositionMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/AbstractQuadFilterByPositionMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/AbstractQuadFilterByPositionMapper.java
new file mode 100644
index 0000000..f5b5876
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/AbstractQuadFilterByPositionMapper.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.filter.positional;
+
+import org.apache.jena.hadoop.rdf.mapreduce.filter.AbstractQuadFilterMapper;
+import org.apache.jena.hadoop.rdf.types.QuadWritable;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.sparql.core.Quad;
+
+/**
+ * An abstract triple filter that filters quads based on different criteria for
+ * each position (graph, subject, predicate and object) within the quad.
+ * <p>
+ * By default this implementation eliminates all quads it sees, derived
+ * implementations need to override one or more of the specific accept methods
+ * in order to actually accept some triples. See
+ * {@link QuadFilterByPredicateMapper} for an example implementation.
+ * </p>
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public abstract class AbstractQuadFilterByPositionMapper<TKey> extends AbstractQuadFilterMapper<TKey> {
+
+    @Override
+    protected final boolean accepts(Object key, QuadWritable tuple) {
+        Quad q = tuple.get();
+        if (!this.acceptsAllGraphs()) {
+            Node g = q.getGraph();
+            if (!this.acceptsGraph(g))
+                return false;
+        }
+        if (!this.acceptsAllSubjects()) {
+            Node s = q.getSubject();
+            if (!this.acceptsSubject(s))
+                return false;
+        }
+        if (!this.acceptsAllPredicates()) {
+            Node p = q.getPredicate();
+            if (!this.acceptsPredicate(p))
+                return false;
+        }
+        if (!this.acceptsAllObjects()) {
+            Node o = q.getObject();
+            if (!this.acceptsObject(o))
+                return false;
+        }
+
+        return true;
+    }
+
+    /**
+     * Gets whether this filter accepts all graphs, if false then the
+     * {@link #acceptsGraph(Node)} method will be called to determine if a
+     * specific graph is acceptable
+     * <p>
+     * Default behaviour if not overridden is to return {@code false}
+     * </p>
+     * 
+     * @return True if all graphs are accepted, false otherwise
+     */
+    protected boolean acceptsAllGraphs() {
+        return false;
+    }
+
+    /**
+     * Gets whether a specific graph is acceptable
+     * 
+     * @param graph
+     *            Graph
+     * @return True if accepted, false otherwise
+     */
+    protected boolean acceptsGraph(Node graph) {
+        return false;
+    }
+
+    /**
+     * Gets whether this filter accepts all subjects, if false then the
+     * {@link #acceptsSubject(Node)} method will be called to determine if a
+     * specific subject is acceptable
+     * <p>
+     * Default behaviour if not overridden is to return {@code false}
+     * </p>
+     * 
+     * @return True if all subjects are accepted, false otherwise
+     */
+    protected boolean acceptsAllSubjects() {
+        return false;
+    }
+
+    /**
+     * Gets whether a specific subject is acceptable
+     * 
+     * @param subject
+     *            Subject
+     * @return True if accepted, false otherwise
+     */
+    protected boolean acceptsSubject(Node subject) {
+        return false;
+    }
+
+    /**
+     * Gets whether this filter accepts all predicate, if false then the
+     * {@link #acceptsPredicate(Node)} method will be called to determine if a
+     * specific predicate is acceptable
+     * <p>
+     * Default behaviour if not overridden is to return {@code false}
+     * </p>
+     * 
+     * @return True if all predicates are accepted, false otherwise
+     */
+    protected boolean acceptsAllPredicates() {
+        return false;
+    }
+
+    /**
+     * Gets whether a specific predicate is acceptable
+     * 
+     * @param predicate
+     *            Predicate
+     * @return True if accepted, false otherwise
+     */
+    protected boolean acceptsPredicate(Node predicate) {
+        return false;
+    }
+
+    /**
+     * Gets whether this filter accepts all objects, if false then the
+     * {@link #acceptsObject(Node)} method will be called to determine if a
+     * specific object is acceptable
+     * <p>
+     * Default behaviour if not overridden is to return {@code false}
+     * </p>
+     * 
+     * @return True if all objects are accepted, false otherwise
+     */
+    protected boolean acceptsAllObjects() {
+        return false;
+    }
+
+    /**
+     * Gets whether a specific object is acceptable
+     * 
+     * @param object
+     *            Object
+     * @return True if accepted, false otherwise
+     */
+    protected boolean acceptsObject(Node object) {
+        return false;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/AbstractTripleFilterByPositionMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/AbstractTripleFilterByPositionMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/AbstractTripleFilterByPositionMapper.java
new file mode 100644
index 0000000..973d651
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/AbstractTripleFilterByPositionMapper.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.filter.positional;
+
+import org.apache.jena.hadoop.rdf.mapreduce.filter.AbstractTripleFilterMapper;
+import org.apache.jena.hadoop.rdf.types.TripleWritable;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.Triple;
+
+/**
+ * An abstract triple filter that filters triples based on different criteria
+ * for each position (subject, predicate and object) within the triple.
+ * <p>
+ * By default this implementation eliminates all triples it sees, derived
+ * implementations need to override one or more of the specific accept methods
+ * in order to actually accept some triples. See
+ * {@link TripleFilterByPredicateUriMapper} for an example implementation.
+ * </p>
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public abstract class AbstractTripleFilterByPositionMapper<TKey> extends AbstractTripleFilterMapper<TKey> {
+
+    @Override
+    protected final boolean accepts(Object key, TripleWritable tuple) {
+        Triple t = tuple.get();
+        if (!this.acceptsAllSubjects()) {
+            Node s = t.getSubject();
+            if (!this.acceptsSubject(s))
+                return false;
+        }
+        if (!this.acceptsAllPredicates()) {
+            Node p = t.getPredicate();
+            if (!this.acceptsPredicate(p))
+                return false;
+        }
+        if (!this.acceptsAllObjects()) {
+            Node o = t.getObject();
+            if (!this.acceptsObject(o))
+                return false;
+        }
+
+        return true;
+    }
+
+    /**
+     * Gets whether this filter accepts all subjects, if false then the
+     * {@link #acceptsSubject(Node)} method will be called to determine if a
+     * specific subject is acceptable
+     * <p>
+     * Default behaviour if not overridden is to return {@code false}
+     * </p>
+     * 
+     * @return True if all subjects are accepted, false otherwise
+     */
+    protected boolean acceptsAllSubjects() {
+        return false;
+    }
+
+    /**
+     * Gets whether a specific subject is acceptable
+     * 
+     * @param subject
+     *            Subject
+     * @return True if accepted, false otherwise
+     */
+    protected boolean acceptsSubject(Node subject) {
+        return false;
+    }
+
+    /**
+     * Gets whether this filter accepts all predicate, if false then the
+     * {@link #acceptsPredicate(Node)} method will be called to determine if a
+     * specific predicate is acceptable
+     * <p>
+     * Default behaviour if not overridden is to return {@code false}
+     * </p>
+     * 
+     * @return True if all predicates are accepted, false otherwise
+     */
+    protected boolean acceptsAllPredicates() {
+        return false;
+    }
+
+    /**
+     * Gets whether a specific predicate is acceptable
+     * 
+     * @param predicate
+     *            Predicate
+     * @return True if accepted, false otherwise
+     */
+    protected boolean acceptsPredicate(Node predicate) {
+        return false;
+    }
+
+    /**
+     * Gets whether this filter accepts all objects, if false then the
+     * {@link #acceptsObject(Node)} method will be called to determine if a
+     * specific object is acceptable
+     * <p>
+     * Default behaviour if not overridden is to return {@code false}
+     * </p>
+     * 
+     * @return True if all objects are accepted, false otherwise
+     */
+    protected boolean acceptsAllObjects() {
+        return false;
+    }
+
+    /**
+     * Gets whether a specific object is acceptable
+     * 
+     * @param object
+     *            Object
+     * @return True if accepted, false otherwise
+     */
+    protected boolean acceptsObject(Node object) {
+        return false;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/jena/blob/92fb810a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/QuadFilterByGraphUriMapper.java
----------------------------------------------------------------------
diff --git a/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/QuadFilterByGraphUriMapper.java b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/QuadFilterByGraphUriMapper.java
new file mode 100644
index 0000000..1c2b29e
--- /dev/null
+++ b/jena-hadoop-rdf/hadoop-rdf-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/QuadFilterByGraphUriMapper.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.hadoop.rdf.mapreduce.filter.positional;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.jena.hadoop.rdf.mapreduce.RdfMapReduceConstants;
+
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.NodeFactory;
+
+/**
+ * A quad filter which selects quads which have matching subjects
+ * 
+ * 
+ * 
+ * @param <TKey>
+ *            Key type
+ */
+public class QuadFilterByGraphUriMapper<TKey> extends AbstractQuadFilterByPositionMapper<TKey> {
+
+    private List<Node> graphs = new ArrayList<Node>();
+
+    @Override
+    protected void setup(Context context) throws IOException, InterruptedException {
+        super.setup(context);
+
+        // Get the subject URIs we are filtering on
+        String[] graphUris = context.getConfiguration().getStrings(RdfMapReduceConstants.FILTER_GRAPH_URIS);
+        if (graphUris != null) {
+            for (String graphUri : graphUris) {
+                this.graphs.add(NodeFactory.createURI(graphUri));
+            }
+        }
+    }
+
+    @Override
+    protected boolean acceptsAllSubjects() {
+        return true;
+    }
+
+    @Override
+    protected boolean acceptsGraph(Node graph) {
+        if (this.graphs.size() == 0)
+            return false;
+        return this.graphs.contains(graph);
+    }
+
+    @Override
+    protected boolean acceptsAllPredicates() {
+        return true;
+    }
+
+    @Override
+    protected boolean acceptsAllObjects() {
+        return true;
+    }
+}


Mime
View raw message