commonsrdf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject [1/6] incubator-commonsrdf git commit: Always use RDFLoader
Date Mon, 05 Sep 2016 15:36:07 GMT
Repository: incubator-commonsrdf
Updated Branches:
  refs/heads/rdf4j-jena-compat [created] 84c703204


Always use RDFLoader

but for Path support we'll always open the inputstream
ourselves - both to support multiple providers
and also to have more consistent handling of
basePath for symlinked files.


Project: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/commit/81b1be80
Tree: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/tree/81b1be80
Diff: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/diff/81b1be80

Branch: refs/heads/rdf4j-jena-compat
Commit: 81b1be803e1a7d60fb272036e4bf596e7d8a8901
Parents: 9c66ae0
Author: Stian Soiland-Reyes <stain@apache.org>
Authored: Mon Jun 20 14:38:07 2016 +0100
Committer: Stian Soiland-Reyes <stain@apache.org>
Committed: Mon Jun 20 14:38:07 2016 +0100

----------------------------------------------------------------------
 .../commons/rdf/rdf4j/RDF4JParserBuilder.java   | 74 +++++++++++++++-----
 1 file changed, 56 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/81b1be80/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParserBuilder.java
----------------------------------------------------------------------
diff --git a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParserBuilder.java b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParserBuilder.java
index 0c1aa12..d0b2c6c 100644
--- a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParserBuilder.java
+++ b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParserBuilder.java
@@ -18,25 +18,39 @@
 package org.apache.commons.rdf.rdf4j;
 
 import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Optional;
 import java.util.function.Consumer;
 
+import org.apache.commons.rdf.api.IRI;
 import org.apache.commons.rdf.api.Quad;
 import org.apache.commons.rdf.api.RDFParserBuilder;
-import org.apache.commons.rdf.rdf4j.RDF4JDataset;
-import org.apache.commons.rdf.rdf4j.RDF4JGraph;
-import org.apache.commons.rdf.rdf4j.RDF4JTermFactory;
+import org.apache.commons.rdf.api.RDFSyntax;
 import org.apache.commons.rdf.simple.AbstractRDFParserBuilder;
 import org.eclipse.rdf4j.model.Model;
 import org.eclipse.rdf4j.repository.util.RDFInserter;
+import org.eclipse.rdf4j.repository.util.RDFLoader;
+import org.eclipse.rdf4j.rio.ParserConfig;
 import org.eclipse.rdf4j.rio.RDFFormat;
 import org.eclipse.rdf4j.rio.RDFHandler;
 import org.eclipse.rdf4j.rio.RDFHandlerException;
-import org.eclipse.rdf4j.rio.RDFParser;
 import org.eclipse.rdf4j.rio.Rio;
 import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler;
 
+/**
+ * RDF4J-based parser.
+ * <p>
+ * This can handle the RDF syntaxes {@link RDFSyntax#JSONLD},
+ * {@link RDFSyntax#NQUADS}, {@link RDFSyntax#NTRIPLES},
+ * {@link RDFSyntax#RDFXML}, {@link RDFSyntax#TRIG} and {@link RDFSyntax#TURTLE}
+ * - additional syntaxes can be supported by including the corresponding
+ * <em>rdf4j-rio-*</em> module on the classpath.
+ *
+ */
 public class RDF4JParserBuilder extends AbstractRDFParserBuilder implements RDFParserBuilder
{
 
 	private final class AddToQuadConsumer extends AbstractRDFHandler {
@@ -74,7 +88,7 @@ public class RDF4JParserBuilder extends AbstractRDFParserBuilder implements
RDFP
 				throws org.eclipse.rdf4j.rio.RDFHandlerException {
 			model.add(st);
 		}
-		
+
 		@Override
 		public void handleNamespace(String prefix, String uri) throws RDFHandlerException {
 			model.setNamespace(prefix, uri);
@@ -100,19 +114,43 @@ public class RDF4JParserBuilder extends AbstractRDFParserBuilder implements
RDFP
 
 	@Override
 	protected void parseSynchronusly() throws IOException, RDFParseException {
-		if (getContentType().isPresent()) {
-			Rio.getParserFormatForMIMEType(getContentType().get());
-		}
-
 		Optional<RDFFormat> formatByMimeType = getContentType().flatMap(Rio::getParserFormatForMIMEType);
-		Optional<RDFFormat> formatByFilename = getSourceFile().map(Path::getFileName).map(Path::toString)
-				.flatMap(Rio::getParserFormatForFileName);
-		RDFFormat format = formatByMimeType.orElse(
-				formatByFilename.orElseThrow(() -> new RDFParseException("Unrecognized or missing
content type")));
-
-		RDFParser parser = Rio.createParser(format);
-
-		parser.setRDFHandler(makeRDFHandler());
+		String base = getBase().map(IRI::getIRIString).orElse(null);
+		
+		ParserConfig parserConfig = new ParserConfig();
+		// TODO: Should we need to set anything?
+		RDFLoader loader = new RDFLoader(parserConfig, rdf4jTermFactory.getValueFactory());
+		RDFHandler rdfHandler = makeRDFHandler();		
+		if (getSourceFile().isPresent()) {			
+			// NOTE: While we could have used  
+			// loader.load(sourcePath.toFile()
+			// if the path fs provider == FileSystems.getDefault(), 			
+			// that RDFLoader method does not use absolute path
+			// as the base URI, so to be consistent 
+			// we'll always do it with our own input stream
+			//
+			// That means we may have to guess format by extensions:			
+			Optional<RDFFormat> formatByFilename = getSourceFile().map(Path::getFileName).map(Path::toString)
+					.flatMap(Rio::getParserFormatForFileName);
+			// TODO: for the excited.. what about the extension after following symlinks? 
+			
+			RDFFormat format = formatByMimeType.orElse(formatByFilename.orElse(null));
+			try (InputStream in = Files.newInputStream(getSourceFile().get())) {
+				loader.load(in, base, format, rdfHandler);
+			}
+		} else if (getSourceIri().isPresent()) {
+			try {
+				// TODO: Handle international IRIs properly
+				// (Unicode support for for hostname, path and query)
+				URL url = new URL(getSourceIri().get().getIRIString());
+				// TODO: This probably does not support https:// -> http:// redirections
+				loader.load(url, base, formatByMimeType.orElse(null), makeRDFHandler());
+			} catch (MalformedURLException ex) {
+				throw new IOException("Can't handle source URL: " + getSourceIri().get(), ex);
+			}			
+		}
+		// must be getSourceInputStream then, this is guaranteed by super.checkSource(); 		
+		loader.load(getSourceInputStream().get(), base, formatByMimeType.orElse(null), rdfHandler);
 	}
 
 	protected RDFHandler makeRDFHandler() {
@@ -124,7 +162,7 @@ public class RDF4JParserBuilder extends AbstractRDFParserBuilder implements
RDFP
 		if (getTargetDataset().filter(RDF4JDataset.class::isInstance).isPresent()) {
 			// One of us, we can add them as Statements directly
 			RDF4JDataset dataset = (RDF4JDataset) getTargetDataset().get();
-			if (dataset.asRepository().isPresent()) {				
+			if (dataset.asRepository().isPresent()) {
 				return new RDFInserter(dataset.asRepository().get().getConnection());
 			}
 			if (dataset.asModel().isPresent()) {


Mime
View raw message