clerezza-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject svn commit: r919022 - in /incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src: main/java/org/apache/clerezza/rdf/utils/Smusher.java test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java
Date Thu, 04 Mar 2010 15:30:46 GMT
Author: reto
Date: Thu Mar  4 15:30:46 2010
New Revision: 919022

URL: http://svn.apache.org/viewvc?rev=919022&view=rev
Log:
CLEREZZA-145: added Smusher doing IFP smushing

Added:
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java

Added: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java?rev=919022&view=auto
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
(added)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/main/java/org/apache/clerezza/rdf/utils/Smusher.java
Thu Mar  4 15:30:46 2010
@@ -0,0 +1,210 @@
+/*
+ *  Copyright 2010 reto.
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+package org.apache.clerezza.rdf.utils;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TripleCollection;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.ontologies.OWL;
+import org.apache.clerezza.rdf.ontologies.RDF;
+
+/**
+ * A utility to equate duplicate nodes in an Mgarph, currently only nodes with 
+ * a shared ifp are equated.
+ *
+ * @author reto
+ */
+public class Smusher {
+
+	/**
+	 * smush mGaph given the ontological facts. Currently it does only
+	 * one step ifp smushin, i.e. only ifps are taken in account and only
+	 * nodes that have the same node as ifp object in the orignal graph are
+	 * equates. (calling the method a second time might lead to additional
+	 * smushings.)
+	 *
+	 * @param mGraph
+	 * @param tBox
+	 */
+	public static void smush(MGraph mGraph, TripleCollection tBox) {
+		final Set<UriRef> ifps = getIfps(tBox);
+		final Map<PredicateObject, Set<NonLiteral>> ifp2nodesMap = new HashMap<PredicateObject,
Set<NonLiteral>>();
+		for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
+			final Triple triple = it.next();
+			final UriRef predicate = triple.getPredicate();
+			if (!ifps.contains(predicate)) {
+				continue;
+			}
+			final PredicateObject po = new PredicateObject(predicate, triple.getObject());
+			Set<NonLiteral> equivalentNodes = ifp2nodesMap.get(po);
+			if (equivalentNodes == null) {
+				equivalentNodes = new HashSet<NonLiteral>();
+				ifp2nodesMap.put(po, equivalentNodes);
+			}
+			equivalentNodes.add(triple.getSubject());
+		}
+		Set<Set<NonLiteral>> unitedEquivalenceSets = uniteEquivalenceSets(ifp2nodesMap.values());
+		Map<NonLiteral, NonLiteral> current2ReplacementMap = new HashMap<NonLiteral, NonLiteral>();
+		final MGraph owlSameAsGraph = new SimpleMGraph();
+		for (Set<NonLiteral> equivalenceSet : unitedEquivalenceSets) {
+			final NonLiteral replacement = getReplacementFor(equivalenceSet, owlSameAsGraph);
+			for (NonLiteral current : equivalenceSet) {
+				if (!current.equals(replacement)) {
+					current2ReplacementMap.put(current, replacement);
+				}
+			}
+		}
+		final Set<Triple> newTriples = new HashSet<Triple>();
+		for (Iterator<Triple> it = mGraph.iterator(); it.hasNext();) {
+			final Triple triple = it.next();
+			Triple replacementTriple = null;
+			final NonLiteral subject = triple.getSubject();
+			NonLiteral subjectReplacement =
+					current2ReplacementMap.get(subject);
+			final Resource object = triple.getObject();
+			@SuppressWarnings("element-type-mismatch")
+			Resource objectReplacement = current2ReplacementMap.get(object);
+			if ((subjectReplacement != null) || (objectReplacement != null)) {
+				it.remove();
+				if (subjectReplacement == null) {
+					subjectReplacement = subject;
+				}
+				if (objectReplacement == null) {
+					objectReplacement = object;
+				}
+				newTriples.add(new TripleImpl(subjectReplacement,
+						triple.getPredicate(), objectReplacement));
+			}
+		}
+		for (Triple triple : newTriples) {
+			mGraph.add(triple);
+		}
+		mGraph.addAll(owlSameAsGraph);
+	}
+
+	private static Set<UriRef> getIfps(TripleCollection tBox) {
+		final Iterator<Triple> ifpDefinitions = tBox.filter(null, RDF.type,
+				OWL.InverseFunctionalProperty);
+		final Set<UriRef> ifps = new HashSet<UriRef>();
+		while (ifpDefinitions.hasNext()) {
+			final Triple triple = ifpDefinitions.next();
+			ifps.add((UriRef) triple.getSubject());
+		}
+		return ifps;
+	}
+
+	private static NonLiteral getReplacementFor(Set<NonLiteral> equivalenceSet, 
+			MGraph owlSameAsGraph) {
+		final Set<UriRef> uriRefs = new HashSet<UriRef>();
+		for (NonLiteral nonLiteral : equivalenceSet) {
+			if (nonLiteral instanceof UriRef) {
+				uriRefs.add((UriRef) nonLiteral);
+			}
+		}
+		switch (uriRefs.size()) {
+			case 1:
+				return uriRefs.iterator().next();
+			case 0:
+				return new BNode();
+		}
+		final Iterator<UriRef> uriRefIter = uriRefs.iterator();
+		//instead of an arbitrary one we might either decide lexicographically
+		//or look at their frequency in mGraph
+		final UriRef first = uriRefIter.next();
+		while (uriRefIter.hasNext()) {
+			UriRef uriRef = uriRefIter.next();
+			owlSameAsGraph.add(new TripleImpl(uriRef, OWL.sameAs, first));
+		}
+		return first;
+	}
+
+	private static Set<Set<NonLiteral>> uniteEquivalenceSets(
+			Collection<Set<NonLiteral>> originalSets) {
+		final Map<NonLiteral, Set<Set<NonLiteral>>> node2OriginalSets =
+				new HashMap<NonLiteral, Set<Set<NonLiteral>>>();
+		for (Set<NonLiteral> set : originalSets) {
+			for (NonLiteral nonLiteral : set) {
+				Set<Set<NonLiteral>> sets = node2OriginalSets.get(nonLiteral);
+				if (sets == null) {
+					sets = new HashSet<Set<NonLiteral>>();
+					node2OriginalSets.put(nonLiteral, sets);
+				}
+				sets.add(set);
+			}
+		}
+		Set<Set<NonLiteral>> result = new HashSet<Set<NonLiteral>>();
+		for (Set<Set<NonLiteral>> sets2Unite : node2OriginalSets.values()) {
+			Set<NonLiteral> newSet = new HashSet<NonLiteral>();
+			for (Set<NonLiteral> existingSet : sets2Unite) {
+				newSet.addAll(existingSet);
+			}
+			result.add(newSet);
+		}
+		return result;
+
+	}
+
+	static class PredicateObject {
+
+		final UriRef predicate;
+		final Resource object;
+
+		public PredicateObject(UriRef predicate, Resource object) {
+			this.predicate = predicate;
+			this.object = object;
+		}
+
+		@Override
+		public boolean equals(Object obj) {
+			if (obj == null) {
+				return false;
+			}
+			if (getClass() != obj.getClass()) {
+				return false;
+			}
+			final PredicateObject other = (PredicateObject) obj;
+			if (this.predicate != other.predicate || !this.predicate.equals(other.predicate)) {
+				return false;
+			}
+			if (this.object != other.object && !this.object.equals(other.object)) {
+				return false;
+			}
+			return true;
+		}
+
+		@Override
+		public int hashCode() {
+			int hash = 3;
+			hash = 29 * hash + this.predicate.hashCode();
+			hash = 13 * hash + this.object.hashCode();
+			return hash;
+		}
+	};
+}

Added: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java?rev=919022&view=auto
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java
(added)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.rdf.utils/src/test/java/org/apache/clerezza/rdf/utils/IfpSmushTest.java
Thu Mar  4 15:30:46 2010
@@ -0,0 +1,118 @@
+/*
+ *  Copyright 2010 reto.
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+
+package org.apache.clerezza.rdf.utils;
+
+import org.apache.clerezza.rdf.core.BNode;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.serializedform.Serializer;
+import org.apache.clerezza.rdf.ontologies.FOAF;
+import org.apache.clerezza.rdf.ontologies.OWL;
+import org.apache.clerezza.rdf.ontologies.RDF;
+import org.apache.clerezza.rdf.ontologies.RDFS;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ *
+ * @author reto
+ */
+public class IfpSmushTest {
+
+	private MGraph ontology = new SimpleMGraph();
+	{
+		ontology.add(new TripleImpl(FOAF.mbox, RDF.type, OWL.InverseFunctionalProperty));
+	}
+
+	@Test
+	public void simpleBNode()  {
+		MGraph mGraph = new SimpleMGraph();
+		UriRef mbox1 = new UriRef("mailto:foo@example.org");
+		final BNode bNode1 = new BNode();
+		mGraph.add(new TripleImpl(bNode1, FOAF.mbox, mbox1));
+		mGraph.add(new TripleImpl(bNode1, RDFS.comment, 
+				new PlainLiteralImpl("a comment")));
+		final BNode bNode2 = new BNode();
+		mGraph.add(new TripleImpl(bNode2, FOAF.mbox, mbox1));
+		mGraph.add(new TripleImpl(bNode2, RDFS.comment, 
+				new PlainLiteralImpl("another comment")));
+		Smusher.smush(mGraph, ontology);
+		Assert.assertEquals(3, mGraph.size());
+	}
+
+	@Test
+	public void overlappingEquivalenceClasses()  {
+		MGraph mGraph = new SimpleMGraph();
+		UriRef mbox1 = new UriRef("mailto:foo@example.org");
+		final BNode bNode1 = new BNode();
+		mGraph.add(new TripleImpl(bNode1, FOAF.mbox, mbox1));
+		mGraph.add(new TripleImpl(bNode1, RDFS.comment,
+				new PlainLiteralImpl("a comment")));
+		final BNode bNode2 = new BNode();
+		UriRef mbox2 = new UriRef("mailto:bar@example.org");
+		mGraph.add(new TripleImpl(bNode2, FOAF.mbox, mbox1));
+		mGraph.add(new TripleImpl(bNode2, FOAF.mbox, mbox2));
+		mGraph.add(new TripleImpl(bNode2, RDFS.comment,
+				new PlainLiteralImpl("another comment")));
+		final BNode bNode3 = new BNode();
+		mGraph.add(new TripleImpl(bNode3, FOAF.mbox, mbox2));
+		mGraph.add(new TripleImpl(bNode3, RDFS.comment,
+				new PlainLiteralImpl("yet another comment")));
+		Smusher.smush(mGraph, ontology);
+		Assert.assertEquals(5, mGraph.size());
+	}
+
+	@Test
+	public void oneUriRef()  {
+		MGraph mGraph = new SimpleMGraph();
+		UriRef mbox1 = new UriRef("mailto:foo@example.org");
+		final UriRef resource = new UriRef("http://example.org/");
+		mGraph.add(new TripleImpl(resource, FOAF.mbox, mbox1));
+		mGraph.add(new TripleImpl(resource, RDFS.comment,
+				new PlainLiteralImpl("a comment")));
+		final BNode bNode2 = new BNode();
+		mGraph.add(new TripleImpl(bNode2, FOAF.mbox, mbox1));
+		mGraph.add(new TripleImpl(bNode2, RDFS.comment,
+				new PlainLiteralImpl("another comment")));
+		Smusher.smush(mGraph, ontology);
+		Assert.assertEquals(3, mGraph.size());
+	}
+
+	@Test
+	public void twoUriRefs()  {
+		MGraph mGraph = new SimpleMGraph();
+		UriRef mbox1 = new UriRef("mailto:foo@example.org");
+		final UriRef resource1 = new UriRef("http://example.org/");
+		mGraph.add(new TripleImpl(resource1, FOAF.mbox, mbox1));
+		mGraph.add(new TripleImpl(resource1, RDFS.comment,
+				new PlainLiteralImpl("a comment")));
+		final UriRef resource2 = new UriRef("http://2.example.org/");
+		mGraph.add(new TripleImpl(resource2, FOAF.mbox, mbox1));
+		mGraph.add(new TripleImpl(resource2, RDFS.comment,
+				new PlainLiteralImpl("another comment")));
+		Smusher.smush(mGraph, ontology);
+		for (Object object : mGraph) {
+			System.out.println(object);
+		}
+		Assert.assertEquals(4, mGraph.size());
+	}
+
+}



Mime
View raw message