Return-Path: X-Original-To: apmail-jena-commits-archive@www.apache.org Delivered-To: apmail-jena-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 84611CA0E for ; Mon, 5 Jan 2015 15:07:10 +0000 (UTC) Received: (qmail 71651 invoked by uid 500); 5 Jan 2015 15:07:11 -0000 Delivered-To: apmail-jena-commits-archive@jena.apache.org Received: (qmail 71602 invoked by uid 500); 5 Jan 2015 15:07:11 -0000 Mailing-List: contact commits-help@jena.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@jena.apache.org Delivered-To: mailing list commits@jena.apache.org Received: (qmail 70911 invoked by uid 99); 5 Jan 2015 15:07:10 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 05 Jan 2015 15:07:10 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id B4F32A3FAD1; Mon, 5 Jan 2015 15:07:10 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: rvesse@apache.org To: commits@jena.apache.org Date: Mon, 05 Jan 2015 15:07:48 -0000 Message-Id: <79b97d5023f74a28b6e62bdc6cba1567@git.apache.org> In-Reply-To: <2076f9a3be3443209fa50b5f6042c11d@git.apache.org> References: <2076f9a3be3443209fa50b5f6042c11d@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [40/52] [abbrv] jena git commit: Further rebranding to Elephas http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/TripleFilterBySubjectUriMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/TripleFilterBySubjectUriMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/TripleFilterBySubjectUriMapper.java new file mode 100644 index 0000000..d6ac375 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/filter/positional/TripleFilterBySubjectUriMapper.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.filter.positional; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.jena.hadoop.rdf.mapreduce.RdfMapReduceConstants; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.NodeFactory; + +/** + * A triple filter which selects triples which have matching subjects + * + * + * + * @param + * Key type + */ +public class TripleFilterBySubjectUriMapper extends AbstractTripleFilterByPositionMapper { + + private List subjects = new ArrayList(); + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + super.setup(context); + + // Get the subject URIs we are filtering on + String[] subjectUris = context.getConfiguration().getStrings(RdfMapReduceConstants.FILTER_SUBJECT_URIS); + if (subjectUris != null) { + for (String subjectUri : subjectUris) { + this.subjects.add(NodeFactory.createURI(subjectUri)); + } + } + } + + @Override + protected boolean acceptsSubject(Node subject) { + if (this.subjects.size() == 0) + return false; + return this.subjects.contains(subject); + } + + @Override + protected boolean acceptsAllPredicates() { + return true; + } + + @Override + protected boolean acceptsAllObjects() { + return true; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractNodeTupleGroupingMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractNodeTupleGroupingMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractNodeTupleGroupingMapper.java new file mode 100644 index 0000000..e3d51e4 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractNodeTupleGroupingMapper.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.group; + +import java.io.IOException; + +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; +import org.apache.jena.hadoop.rdf.types.NodeWritable; + + +/** + * Abstract mapper implementation which helps in grouping tuples by assigning + * them a {@link NodeWritable} key in place of their existing key. Derived + * implementations of this may select the key based on some component of the + * tuple or by other custom logic. + * + * + * + * @param + * Key type + * @param + * Tuple type + * @param + * Writable tuple type + */ +public abstract class AbstractNodeTupleGroupingMapper> extends + Mapper { + + @Override + protected final void map(TKey key, T value, Context context) throws IOException, InterruptedException { + NodeWritable newKey = this.selectKey(value); + context.write(newKey, value); + } + + /** + * Gets the key to associated with the tuple + * + * @param tuple + * Tuple + * @return Node to use as key + */ + protected abstract NodeWritable selectKey(T tuple); +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractQuadGroupingMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractQuadGroupingMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractQuadGroupingMapper.java new file mode 100644 index 0000000..2b96110 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractQuadGroupingMapper.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.group; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.QuadWritable; + +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * Abstract mapper implementation which helps in grouping quads by assigning + * them a {@link NodeWritable} key in place of their existing key. Derived + * implementations of this may select the key based on some component of the + * quad or by other custom logic. + * + * + * + * @param + */ +public abstract class AbstractQuadGroupingMapper extends AbstractNodeTupleGroupingMapper { + + protected final NodeWritable selectKey(QuadWritable tuple) { + return this.selectKey(tuple.get()); + } + + /** + * Selects the key to use + * + * @param quad + * Quad + * @return Key to use + */ + protected abstract NodeWritable selectKey(Quad quad); +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractTripleGroupingMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractTripleGroupingMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractTripleGroupingMapper.java new file mode 100644 index 0000000..3f44eb0 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/AbstractTripleGroupingMapper.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.group; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; + +import com.hp.hpl.jena.graph.Triple; + +/** + * Abstract mapper implementation which helps in grouping triples by assigning + * them a {@link NodeWritable} key in place of their existing key. Derived + * implementations of this may select the key based on some component of the + * triple or by other custom logic. + * + * + * + * @param + */ +public abstract class AbstractTripleGroupingMapper extends AbstractNodeTupleGroupingMapper { + + @Override + protected final NodeWritable selectKey(TripleWritable tuple) { + return this.selectKey(tuple.get()); + } + + protected abstract NodeWritable selectKey(Triple triple); +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByGraphMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByGraphMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByGraphMapper.java new file mode 100644 index 0000000..3b9fd8d --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByGraphMapper.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.group; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; + +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * A mapper which assists in grouping quads by graph by reassigning their keys + * to be their graphs + * + * + * + * @param + */ +public class QuadGroupByGraphMapper extends AbstractQuadGroupingMapper { + + @Override + protected NodeWritable selectKey(Quad quad) { + return new NodeWritable(quad.getGraph()); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByObjectMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByObjectMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByObjectMapper.java new file mode 100644 index 0000000..eb26e0b --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByObjectMapper.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.group; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; + +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * A mapper which assists in grouping quads by object by reassigning their keys + * to be their objects + * + * + * + * @param + */ +public class QuadGroupByObjectMapper extends AbstractQuadGroupingMapper { + + @Override + protected NodeWritable selectKey(Quad quad) { + return new NodeWritable(quad.getObject()); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByPredicateMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByPredicateMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByPredicateMapper.java new file mode 100644 index 0000000..2670cf4 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupByPredicateMapper.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.group; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; + +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * A mapper which assists in grouping quads by predicate by reassigning their keys + * to be their predicates + * + * + * + * @param + */ +public class QuadGroupByPredicateMapper extends AbstractQuadGroupingMapper { + + @Override + protected NodeWritable selectKey(Quad quad) { + return new NodeWritable(quad.getPredicate()); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupBySubjectMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupBySubjectMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupBySubjectMapper.java new file mode 100644 index 0000000..73809e8 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/QuadGroupBySubjectMapper.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.group; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; + +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * A mapper which assists in grouping quads by subject by reassigning their keys + * to be their subjects + * + * + * + * @param + */ +public class QuadGroupBySubjectMapper extends AbstractQuadGroupingMapper { + + @Override + protected NodeWritable selectKey(Quad quad) { + return new NodeWritable(quad.getSubject()); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByObjectMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByObjectMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByObjectMapper.java new file mode 100644 index 0000000..9fde939 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByObjectMapper.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.group; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; + +import com.hp.hpl.jena.graph.Triple; + +/** + * A mapper which assists in grouping triples by object by reassigning their + * keys to be their objects + * + * + * + * @param + * Key type + */ +public class TripleGroupByObjectMapper extends AbstractTripleGroupingMapper { + + @Override + protected NodeWritable selectKey(Triple triple) { + return new NodeWritable(triple.getObject()); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByPredicateMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByPredicateMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByPredicateMapper.java new file mode 100644 index 0000000..dd15ef5 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupByPredicateMapper.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.group; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; + +import com.hp.hpl.jena.graph.Triple; + +/** + * A mapper which assists in grouping triples by predicate by reassigning their + * keys to be their predicates + * + * + * + * @param + * Key type + */ +public class TripleGroupByPredicateMapper extends AbstractTripleGroupingMapper { + + @Override + protected NodeWritable selectKey(Triple triple) { + return new NodeWritable(triple.getPredicate()); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupBySubjectMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupBySubjectMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupBySubjectMapper.java new file mode 100644 index 0000000..f1116c1 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/group/TripleGroupBySubjectMapper.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.group; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; + +import com.hp.hpl.jena.graph.Triple; + +/** + * A mapper which assists in grouping triples by subject by reassigning their + * keys to be their subjects + * + * + * + * @param + * Key type + */ +public class TripleGroupBySubjectMapper extends AbstractTripleGroupingMapper { + + @Override + protected NodeWritable selectKey(Triple triple) { + return new NodeWritable(triple.getSubject()); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitToNodesMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitToNodesMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitToNodesMapper.java new file mode 100644 index 0000000..840d78c --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitToNodesMapper.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.split; + +import java.io.IOException; + +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; +import org.apache.jena.hadoop.rdf.types.NodeWritable; + + +/** + * Abstract mapper implementation which splits the tuples into their constituent + * nodes preserving the keys as-is + * + * + * + * @param + * Key type + * @param + * Tuple type + * @param + * Writable tuple type + */ +public abstract class AbstractNodeTupleSplitToNodesMapper> extends + Mapper { + + @Override + protected final void map(TKey key, T value, Context context) throws IOException, InterruptedException { + NodeWritable[] ns = this.split(value); + for (NodeWritable n : ns) { + context.write(key, n); + } + } + + /** + * Splits the node tuple type into the individual nodes + * + * @param tuple + * Tuple + * @return Nodes + */ + protected abstract NodeWritable[] split(T tuple); +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitWithNodesMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitWithNodesMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitWithNodesMapper.java new file mode 100644 index 0000000..7dc85fd --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/AbstractNodeTupleSplitWithNodesMapper.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.split; + +import java.io.IOException; + +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; +import org.apache.jena.hadoop.rdf.types.NodeWritable; + + +/** + * Abstract mapper implementation which splits the tuples into their constituent + * nodes using the tuples as the keys and the nodes as the values + * + * + * + * @param + * Key type + * @param + * Tuple type + * @param + * Writable tuple type + */ +public abstract class AbstractNodeTupleSplitWithNodesMapper> extends + Mapper { + + @Override + protected void map(TKey key, T value, Context context) throws IOException, InterruptedException { + NodeWritable[] ns = this.split(value); + for (NodeWritable n : ns) { + context.write(value, n); + } + } + + /** + * Splits the node tuple type into the individual nodes + * + * @param tuple + * Tuple + * @return Nodes + */ + protected abstract NodeWritable[] split(T tuple); +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapper.java new file mode 100644 index 0000000..c993810 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitToNodesMapper.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.split; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.QuadWritable; + +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * A mapper which splits quads into their constituent nodes preserving the + * existing keys as-is + * + * + * + * @param + * Key type + */ +public class QuadSplitToNodesMapper extends AbstractNodeTupleSplitToNodesMapper { + + @Override + protected NodeWritable[] split(QuadWritable tuple) { + Quad q = tuple.get(); + return new NodeWritable[] { new NodeWritable(q.getGraph()), new NodeWritable(q.getSubject()), + new NodeWritable(q.getPredicate()), new NodeWritable(q.getObject()) }; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapper.java new file mode 100644 index 0000000..09caef6 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/QuadSplitWithNodesMapper.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.split; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.QuadWritable; + +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * A mapper which splits quads into their constituent nodes using the quad as + * the key and the nodes as the values + * + * + * + * @param + * Key type + */ +public class QuadSplitWithNodesMapper extends AbstractNodeTupleSplitWithNodesMapper { + + @Override + protected NodeWritable[] split(QuadWritable tuple) { + Quad q = tuple.get(); + return new NodeWritable[] { new NodeWritable(q.getGraph()), new NodeWritable(q.getSubject()), + new NodeWritable(q.getPredicate()), new NodeWritable(q.getObject()) }; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapper.java new file mode 100644 index 0000000..0ef02d9 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitToNodesMapper.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.split; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; + +import com.hp.hpl.jena.graph.Triple; + +/** + * A mapper which splits triples into their constituent nodes + * + * + * + * @param Key type + */ +public class TripleSplitToNodesMapper extends AbstractNodeTupleSplitToNodesMapper { + + @Override + protected NodeWritable[] split(TripleWritable tuple) { + Triple t = tuple.get(); + return new NodeWritable[] { new NodeWritable(t.getSubject()), new NodeWritable(t.getPredicate()), + new NodeWritable(t.getObject()) }; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapper.java new file mode 100644 index 0000000..7b18f55 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/split/TripleSplitWithNodesMapper.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.split; + +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; + +import com.hp.hpl.jena.graph.Triple; + +/** + * A mapper which splits triples into their constituent nodes + * + * + * + * @param Key type + */ +public class TripleSplitWithNodesMapper extends AbstractNodeTupleSplitWithNodesMapper { + + @Override + protected NodeWritable[] split(TripleWritable tuple) { + Triple t = tuple.get(); + return new NodeWritable[] { new NodeWritable(t.getSubject()), new NodeWritable(t.getPredicate()), + new NodeWritable(t.getObject()) }; + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/AbstractTriplesToQuadsMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/AbstractTriplesToQuadsMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/AbstractTriplesToQuadsMapper.java new file mode 100644 index 0000000..76137fe --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/AbstractTriplesToQuadsMapper.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.transform; + +import java.io.IOException; + +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * An abstract mapper which transforms triples into quads. Derived + * implementations may choose how the graph to which triples are assigned is + * decided. + *

+ * Keys are left as is by this mapper. + *

+ * + * + * + * @param + */ +public abstract class AbstractTriplesToQuadsMapper extends Mapper { + + @Override + protected final void map(TKey key, TripleWritable value, Context context) throws IOException, InterruptedException { + Triple triple = value.get(); + Node graphNode = this.selectGraph(triple); + context.write(key, new QuadWritable(new Quad(graphNode, triple))); + } + + /** + * Selects the graph name to use for converting the given triple into a quad + * + * @param triple + * Triple + * @return Tuple + */ + protected abstract Node selectGraph(Triple triple); +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapper.java new file mode 100644 index 0000000..048e669 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/QuadsToTriplesMapper.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.transform; + +import java.io.IOException; + +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.jena.hadoop.rdf.types.QuadWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; + + +/** + * A mapper which transforms quads into triples + *

+ * Keys are left as is by this mapper. + *

+ * + * + * + * @param + * Key type + */ +public class QuadsToTriplesMapper extends Mapper { + + @Override + protected void map(TKey key, QuadWritable value, Context context) throws IOException, InterruptedException { + context.write(key, new TripleWritable(value.get().asTriple())); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapper.java new file mode 100644 index 0000000..394d5fd --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsBySubjectMapper.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.transform; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.Triple; + +/** + * A mapper which converts triples into quads using the subjects of the triples + * as the graph nodes + * + * + * @param + * Key type + * + */ +public class TriplesToQuadsBySubjectMapper extends AbstractTriplesToQuadsMapper { + + @Override + protected final Node selectGraph(Triple triple) { + return triple.getSubject(); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapper.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapper.java b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapper.java new file mode 100644 index 0000000..ef19edf --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/main/java/org/apache/jena/hadoop/rdf/mapreduce/transform/TriplesToQuadsConstantGraphMapper.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.transform; + +import java.io.IOException; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * A mapper which converts triples to quads where all triples are placed in the + * same graph + * + * + * + * @param + * Key type + */ +public class TriplesToQuadsConstantGraphMapper extends AbstractTriplesToQuadsMapper { + + private Node graphNode; + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + super.setup(context); + this.graphNode = this.getGraphNode(); + } + + /** + * Gets the graph node that will be used for all quads, this will be called + * once and only once during the + * {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)} method and the + * value returned cached for use throughout the lifetime of this mapper. + *

+ * This implementation always used the default graph as the graph for + * generated quads. You can override this method in your own derived + * implementation to put triples into a different graph than the default + * graph. + *

+ *

+ * If instead you wanted to select different graphs for each triple you + * should extend {@link AbstractTriplesToQuadsMapper} instead and override + * the {@link #selectGraph(Triple)} method which is sealed in this + * implementation. + *

+ * + * @return + */ + protected Node getGraphNode() { + return Quad.defaultGraphNodeGenerated; + } + + @Override + protected final Node selectGraph(Triple triple) { + return this.graphNode; + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapReduceTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapReduceTests.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapReduceTests.java new file mode 100644 index 0000000..32c40f7 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapReduceTests.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce; + +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; + +/** + * Abstract tests for mappers + * + * + * @param + * Mapper input key type + * @param + * Mapper input value type + * @param + * Mapper output/Reducer input key type + * @param + * Mapper output/Reducer input value type + * @param + * Reducer output key type + * @param + * Reducer output value type + * + * + */ +public abstract class AbstractMapReduceTests { + + /** + * Gets the mapper instance to test + * + * @return Mapper instance + */ + protected abstract Mapper getMapperInstance(); + + /** + * Gets the reducer instance to test + * + * @return Reducer instance + */ + protected abstract Reducer getReducerInstance(); + + /** + * Gets a map reduce driver that can be used to create a test case + * + * @return Map reduce driver + */ + protected MapReduceDriver getMapReduceDriver() { + return new MapReduceDriver( + this.getMapperInstance(), this.getReducerInstance()); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapperTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapperTests.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapperTests.java new file mode 100644 index 0000000..ce6ab9d --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/AbstractMapperTests.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce; + +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mrunit.mapreduce.MapDriver; + +/** + * Abstract tests for mappers + * + * + * @param + * Input key type + * @param + * Input value type + * @param + * Output key type + * @param + * Output value type + * + */ +public abstract class AbstractMapperTests { + + /** + * Gets the mapper instance to test + * + * @return Mapper instance + */ + protected abstract Mapper getInstance(); + + /** + * Gets a map driver that can be used to create a test case + * + * @return Map driver + */ + protected MapDriver getMapDriver() { + MapDriver driver = new MapDriver( + this.getInstance()); + this.configureDriver(driver); + return driver; + } + + /** + * Method that may be overridden by test harnesses which need to configure + * the driver in more detail e.g. add configuration keys + * + * @param driver + * Driver + */ + protected void configureDriver(MapDriver driver) { + // Does nothing + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java new file mode 100644 index 0000000..af32dac --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/TestDistinctTriples.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce; + +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; +import org.apache.jena.hadoop.rdf.types.TripleWritable; +import org.junit.Assert; +import org.junit.Test; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.graph.Triple; + +public class TestDistinctTriples + extends + AbstractMapReduceTests { + + @Override + protected Mapper getMapperInstance() { + return new ValuePlusNullMapper(); + } + + @Override + protected Reducer getReducerInstance() { + return new NullPlusKeyReducer(); + } + + @Test + public void distinct_triples_01() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + + Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"), + NodeFactory.createLiteral("1")); + TripleWritable tw = new TripleWritable(t); + driver.addInput(new LongWritable(1), tw); + driver.addOutput(NullWritable.get(), tw); + + driver.runTest(); + } + + @Test + public void distinct_triples_02() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + + Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"), + NodeFactory.createLiteral("1")); + TripleWritable tw = new TripleWritable(t); + for (int i = 0; i < 100; i++) { + driver.addInput(new LongWritable(i), tw); + } + driver.addOutput(NullWritable.get(), tw); + + driver.runTest(); + } + + @Test + public void distinct_triples_03() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + + Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"), + NodeFactory.createLiteral("1")); + Triple t2 = new Triple(t.getSubject(), t.getPredicate(), NodeFactory.createLiteral("2")); + Assert.assertNotEquals(t, t2); + + TripleWritable tw = new TripleWritable(t); + TripleWritable tw2 = new TripleWritable(t2); + Assert.assertNotEquals(tw, tw2); + + driver.addInput(new LongWritable(1), tw); + driver.addInput(new LongWritable(2), tw2); + driver.addOutput(NullWritable.get(), tw); + driver.addOutput(NullWritable.get(), tw2); + + driver.runTest(false); + } + + @Test + public void distinct_triples_04() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + + Node s1 = NodeFactory.createURI("urn:nf#cbf2b2c7-109e-4097-bbea-f67f272c7fcc"); + Node s2 = NodeFactory.createURI("urn:nf#bb08b75c-1ad2-47ef-acd2-eb2d92b94b89"); + Node p = NodeFactory.createURI("urn:p"); + Node o = NodeFactory.createURI("urn:66.230.159.118"); + Assert.assertNotEquals(s1, s2); + + Triple t1 = new Triple(s1, p, o); + Triple t2 = new Triple(s2, p, o); + Assert.assertNotEquals(t1, t2); + + TripleWritable tw1 = new TripleWritable(t1); + TripleWritable tw2 = new TripleWritable(t2); + Assert.assertNotEquals(tw1, tw2); + Assert.assertNotEquals(0, tw1.compareTo(tw2)); + + driver.addInput(new LongWritable(1), tw1); + driver.addInput(new LongWritable(2), tw2); + driver.addOutput(NullWritable.get(), tw1); + driver.addOutput(NullWritable.get(), tw2); + + driver.runTest(false); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducerTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducerTests.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducerTests.java new file mode 100644 index 0000000..b2d0b92 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/AbstractCharacteristicSetGeneratingReducerTests.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.characteristics; + +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; +import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapReduceTests; +import org.apache.jena.hadoop.rdf.mapreduce.characteristics.AbstractCharacteristicSetGeneratingReducer; +import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; +import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable; +import org.apache.jena.hadoop.rdf.types.CharacteristicWritable; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.junit.Test; + +import com.hp.hpl.jena.graph.NodeFactory; + +/** + * Abstract tests for the {@link AbstractCharacteristicSetGeneratingReducer} + * + * + * + * @param + * @param + */ +public abstract class AbstractCharacteristicSetGeneratingReducerTests> + extends AbstractMapReduceTests { + + /** + * Create a tuple + * + * @param i + * Key to use in creating the subject + * @param predicateUri + * Predicate URI string + * @return Tuple + */ + protected abstract T createTuple(int i, String predicateUri); + + /** + * Creates a set consisting of the given predicates + * + * @param predicates + * Predicates + * @return Set + */ + protected CharacteristicSetWritable createSet(MapReduceDriver driver, int occurrences, String... predicates) { + CharacteristicSetWritable set = new CharacteristicSetWritable(); + for (String predicateUri : predicates) { + set.add(new CharacteristicWritable(NodeFactory.createURI(predicateUri))); + } + for (int i = 1; i <= occurrences; i++) { + driver.addOutput(set, NullWritable.get()); + } + return set; + } + + /** + * Test basic characteristic set computation + * + * @throws IOException + */ + @Test + public void characteristic_set_generating_reducer_01() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + T tuple = this.createTuple(1, "http://predicate"); + driver.addInput(new LongWritable(1), tuple); + + this.createSet(driver, 1, "http://predicate"); + + driver.runTest(false); + } + + /** + * Test basic characteristic set computation + * + * @throws IOException + */ + @Test + public void characteristic_set_generating_reducer_02() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + T tuple = this.createTuple(1, "http://predicate"); + driver.addInput(new LongWritable(1), tuple); + driver.addInput(new LongWritable(1), tuple); + + this.createSet(driver, 1, "http://predicate"); + + driver.runTest(false); + } + + /** + * Test basic characteristic set computation + * + * @throws IOException + */ + @Test + public void characteristic_set_generating_reducer_03() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + T tuple = this.createTuple(1, "http://predicate"); + driver.addInput(new LongWritable(1), tuple); + tuple = this.createTuple(2, "http://predicate"); + driver.addInput(new LongWritable(2), tuple); + + this.createSet(driver, 2, "http://predicate"); + + driver.runTest(false); + } + + /** + * Test basic characteristic set computation + * + * @throws IOException + */ + @Test + public void characteristic_set_generating_reducer_04() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + T tuple = this.createTuple(1, "http://predicate"); + driver.addInput(new LongWritable(1), tuple); + tuple = this.createTuple(1, "http://other"); + driver.addInput(new LongWritable(1), tuple); + + // Single entry sets + this.createSet(driver, 1, "http://predicate"); + this.createSet(driver, 1, "http://other"); + + // Two entry sets + this.createSet(driver, 1, "http://predicate", "http://other"); + + driver.runTest(false); + } + + /** + * Test basic characteristic set computation + * + * @throws IOException + */ + @Test + public void characteristic_set_generating_reducer_05() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + T tuple = this.createTuple(1, "http://predicate"); + driver.addInput(new LongWritable(1), tuple); + tuple = this.createTuple(1, "http://other"); + driver.addInput(new LongWritable(2), tuple); + tuple = this.createTuple(1, "http://third"); + driver.addInput(new LongWritable(3), tuple); + + // Single entry sets + this.createSet(driver, 1, "http://predicate"); + this.createSet(driver, 1, "http://other"); + this.createSet(driver, 1, "http://third"); + + // Two entry sets + this.createSet(driver, 1, "http://predicate", "http://other"); + this.createSet(driver, 1, "http://predicate", "http://third"); + this.createSet(driver, 1, "http://other", "http://third"); + + // Three entry sets + this.createSet(driver, 1, "http://predicate", "http://other", "http://third"); + + driver.runTest(false); + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducerTest.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducerTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducerTest.java new file mode 100644 index 0000000..30da730 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/CharacteristicSetReducerTest.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.characteristics; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; +import org.apache.hadoop.mrunit.types.Pair; +import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapReduceTests; +import org.apache.jena.hadoop.rdf.mapreduce.characteristics.CharacteristicSetReducer; +import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable; +import org.apache.jena.hadoop.rdf.types.CharacteristicWritable; +import org.junit.Assert; +import org.junit.Test; + +import com.hp.hpl.jena.graph.NodeFactory; + +/** + * Abstract tests for the {@link CharacteristicSetReducer} + * + * + */ +public class CharacteristicSetReducerTest + extends + AbstractMapReduceTests { + + @Override + protected final Mapper getMapperInstance() { + // Identity mapper + return new Mapper(); + } + + @Override + protected final Reducer getReducerInstance() { + return new CharacteristicSetReducer(); + } + + /** + * Creates a set consisting of the given predicates + * + * @param predicates + * Predicates + * @return Set + */ + protected CharacteristicSetWritable createSet( + MapReduceDriver driver, + int inputOccurrences, int outputOccurrences, String... predicates) { + CharacteristicSetWritable set = new CharacteristicSetWritable(); + for (String predicateUri : predicates) { + set.add(new CharacteristicWritable(NodeFactory.createURI(predicateUri))); + } + for (int i = 1; i <= inputOccurrences; i++) { + driver.addInput(set, set); + } + for (int i = 1; i <= outputOccurrences; i++) { + driver.addOutput(set, NullWritable.get()); + } + return set; + } + + /** + * Test characteristic set reduction + * + * @throws IOException + */ + @Test + public void characteristic_set_reducer_01() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + + this.createSet(driver, 1, 1, "http://predicate"); + + driver.runTest(false); + } + + /** + * Test characteristic set reduction + * + * @throws IOException + */ + @Test + public void characteristic_set_reducer_02() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + + this.createSet(driver, 2, 1, "http://predicate"); + + driver.runTest(false); + + List> results = driver.run(); + CharacteristicSetWritable cw = results.get(0).getFirst(); + Assert.assertEquals(2, cw.getCount().get()); + } + + /** + * Test characteristic set reduction + * + * @throws IOException + */ + @Test + public void characteristic_set_reducer_03() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + + this.createSet(driver, 1, 1, "http://predicate"); + this.createSet(driver, 1, 1, "http://other"); + + driver.runTest(false); + } + + /** + * Test characteristic set reduction + * + * @throws IOException + */ + @Test + public void characteristic_set_reducer_04() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + + this.createSet(driver, 2, 1, "http://predicate"); + this.createSet(driver, 1, 1, "http://other"); + + driver.runTest(false); + + List> results = driver.run(); + for (Pair pair : results) { + CharacteristicSetWritable cw = pair.getFirst(); + boolean expectTwo = cw.getCharacteristics().next().getNode().get().hasURI("http://predicate"); + Assert.assertEquals(expectTwo ? 2 : 1, cw.getCount().get()); + } + } + + /** + * Test characteristic set reduction + * + * @throws IOException + */ + @Test + public void characteristic_set_reducer_05() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + + this.createSet(driver, 1, 1, "http://predicate", "http://other"); + this.createSet(driver, 1, 1, "http://other"); + + driver.runTest(false); + } + + /** + * Test characteristic set reduction + * + * @throws IOException + */ + @Test + public void characteristic_set_reducer_06() throws IOException { + MapReduceDriver driver = this + .getMapReduceDriver(); + + this.createSet(driver, 2, 1, "http://predicate", "http://other"); + this.createSet(driver, 1, 1, "http://other"); + + driver.runTest(false); + + List> results = driver.run(); + for (Pair pair : results) { + CharacteristicSetWritable cw = pair.getFirst(); + boolean expectTwo = cw.hasCharacteristic("http://predicate"); + Assert.assertEquals(expectTwo ? 2 : 1, cw.getCount().get()); + } + } +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducerTest.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducerTest.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducerTest.java new file mode 100644 index 0000000..e647b68 --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/characteristics/TripleCharacteristicSetGeneratingReducerTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.characteristics; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.jena.hadoop.rdf.mapreduce.characteristics.TripleCharacteristicSetGeneratingReducer; +import org.apache.jena.hadoop.rdf.mapreduce.group.TripleGroupBySubjectMapper; +import org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.apache.jena.hadoop.rdf.types.TripleWritable; + +import com.hp.hpl.jena.datatypes.xsd.XSDDatatype; +import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.graph.Triple; + +/** + * Tests for the {@link TripleCharacteristicSetGeneratingReducer} + * + * + * + */ +public class TripleCharacteristicSetGeneratingReducerTest extends AbstractCharacteristicSetGeneratingReducerTests { + + @Override + protected Mapper getMapperInstance() { + return new TripleGroupBySubjectMapper(); + } + + @Override + protected Reducer getReducerInstance() { + return new TripleCharacteristicSetGeneratingReducer(); + } + + @Override + protected TripleWritable createTuple(int i, String predicateUri) { + return new TripleWritable(new Triple(NodeFactory.createURI("http://subjects/" + i), NodeFactory.createURI(predicateUri), + NodeFactory.createLiteral(Integer.toString(i), XSDDatatype.XSDinteger))); + } + +} http://git-wip-us.apache.org/repos/asf/jena/blob/49c4cffe/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountReducedTests.java ---------------------------------------------------------------------- diff --git a/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountReducedTests.java b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountReducedTests.java new file mode 100644 index 0000000..ebdbcde --- /dev/null +++ b/jena-elephas/jena-elephas-mapreduce/src/test/java/org/apache/jena/hadoop/rdf/mapreduce/count/AbstractNodeTupleNodeCountReducedTests.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.hadoop.rdf.mapreduce.count; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; +import org.apache.jena.hadoop.rdf.mapreduce.AbstractMapReduceTests; +import org.apache.jena.hadoop.rdf.mapreduce.count.AbstractNodeTupleNodeCountMapper; +import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable; +import org.apache.jena.hadoop.rdf.types.NodeWritable; +import org.junit.Test; + + +/** + * Abstract tests for mappers derived from + * {@link AbstractNodeTupleNodeCountMapper} + * + * + * + * @param + * Tuple type + * @param + * Writable tuple type + */ +public abstract class AbstractNodeTupleNodeCountReducedTests> extends + AbstractMapReduceTests { + + /** + * Generates tuples for the tests + * + * @param driver + * Driver + * @param num + * Number of tuples to generate + */ + protected void generateData(MapReduceDriver driver, int num) { + Map counts = new HashMap(); + for (int i = 0; i < num; i++) { + LongWritable key = new LongWritable(i); + T value = this.createValue(i); + NodeWritable[] nodes = this.getNodes(value); + + driver.addInput(key, value); + for (NodeWritable n : nodes) { + if (counts.containsKey(n)) { + counts.put(n, counts.get(n) + 1); + } else { + counts.put(n, 1l); + } + } + } + + for (Entry kvp : counts.entrySet()) { + driver.addOutput(kvp.getKey(), new LongWritable(kvp.getValue())); + } + } + + /** + * Creates a tuple value + * + * @param i + * Index + * @return Tuple value + */ + protected abstract T createValue(int i); + + /** + * Splits the tuple value into its constituent nodes + * + * @param tuple + * Tuple value + * @return Nodes + */ + protected abstract NodeWritable[] getNodes(T tuple); + + /** + * Runs a node count test + * + * @param num + * Number of tuples to generate + * @throws IOException + */ + protected void testNodeCount(int num) throws IOException { + MapReduceDriver driver = this.getMapReduceDriver(); + this.generateData(driver, num); + driver.runTest(false); + } + + /** + * Tests node counting + * + * @throws IOException + */ + @Test + public void node_count_01() throws IOException { + this.testNodeCount(1); + } + + /** + * Tests node counting + * + * @throws IOException + */ + @Test + public void node_count_02() throws IOException { + this.testNodeCount(100); + } + + /** + * Tests node counting + * + * @throws IOException + */ + @Test + public void node_count_03() throws IOException { + this.testNodeCount(1000); + } + + /** + * Tests node counting + * + * @throws IOException + */ + @Test + public void node_count_04() throws IOException { + this.testNodeCount(2500); + } +}