jena-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject svn commit: r1356854 - in /jena/trunk/jena-arq/src: main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java
Date Tue, 03 Jul 2012 18:12:03 GMT
Author: andy
Date: Tue Jul  3 18:12:02 2012
New Revision: 1356854

URL: http://svn.apache.org/viewvc?rev=1356854&view=rev
Log:
Do some RDF term parsing but keep the efficiency of not invoking the SSE parser on each string.

Modified:
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java
    jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java?rev=1356854&r1=1356853&r2=1356854&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java
(original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/resultset/TSVInputIterator.java
Tue Jul  3 18:12:02 2012
@@ -18,6 +18,8 @@
 
 package com.hp.hpl.jena.sparql.resultset;
 
+import static java.lang.String.format ;
+
 import java.io.BufferedReader ;
 import java.io.IOException ;
 import java.util.List ;
@@ -25,17 +27,16 @@ import java.util.NoSuchElementException 
 
 import org.openjena.atlas.io.IO ;
 import org.openjena.atlas.io.IndentedWriter ;
+import org.openjena.riot.tokens.Tokenizer ;
+import org.openjena.riot.tokens.TokenizerFactory ;
 
 import com.hp.hpl.jena.graph.Node ;
-import com.hp.hpl.jena.query.QueryException ;
-import com.hp.hpl.jena.rdf.model.AnonId;
 import com.hp.hpl.jena.sparql.core.Var ;
 import com.hp.hpl.jena.sparql.engine.binding.Binding ;
 import com.hp.hpl.jena.sparql.engine.binding.BindingFactory ;
 import com.hp.hpl.jena.sparql.engine.binding.BindingMap ;
 import com.hp.hpl.jena.sparql.engine.iterator.QueryIteratorBase ;
 import com.hp.hpl.jena.sparql.serializer.SerializationContext ;
-import com.hp.hpl.jena.sparql.util.NodeFactory ;
 
 /**
  * Class used to do streaming parsing of actual result rows from the TSV
@@ -93,14 +94,14 @@ public class TSVInputIterator extends Qu
 	        this.lineNum++;
 	    } 
 	    catch (IOException e) 
-	    { throw new QueryException("Error parsing TSV results - " + e.getMessage()); }
+	    { throw new ResultSetException("Error parsing TSV results - " + e.getMessage()); }
 
 	    if ( line.isEmpty() )
 	    {
 	        // Empty input line - no bindings.
 	    	// Only valid when we expect zero/one values as otherwise we should get a sequence
of tab characters
 	    	// which means a non-empty string which we handle normally
-	    	if (expectedItems > 1) throw new QueryException(String.format("Error Parsing TSV
results at Line %d - The result row had 0/1 values when %d were expected", this.lineNum, expectedItems));
+	    	if (expectedItems > 1) throw new ResultSetException(format("Error Parsing TSV results
at Line %d - The result row had 0/1 values when %d were expected", this.lineNum, expectedItems));
 	        this.binding = BindingFactory.create() ;
 	        return true ;
 	    }
@@ -108,11 +109,9 @@ public class TSVInputIterator extends Qu
         String[] tokens = TSVInput.pattern.split(line, -1);
 	    
         if (tokens.length != expectedItems)
-        	 throw new QueryException(String.format("Error Parsing TSV results at Line %d -
The result row '%s' has %d values instead of the expected %d.", this.lineNum, line, tokens.length,
expectedItems));
-
+        	 throw new ResultSetException(format("Error Parsing TSV results at Line %d - The
result row '%s' has %d values instead of the expected %d.", this.lineNum, line, tokens.length,
expectedItems));
         this.binding = BindingFactory.create();
 
-
         try
         {
 	        for ( int i = 0; i < tokens.length; i++ ) 
@@ -123,24 +122,33 @@ public class TSVInputIterator extends Qu
 	        	if (token.equals("")) continue; 
 	
         		//Bound value so parse it and add to the binding
-        		Node node = parseNode(token);
+        		Node node = parseNode(token, lineNum);
         		this.binding.add(this.vars.get(i), node);
 	        }
     	} catch (Exception e) {
-    		throw new QueryException(String.format("Error Parsing TSV results at Line %d - The
result row '%s' contains an invalid encoding of a Node", this.lineNum, line));
+    		throw new ResultSetException(format("Error Parsing TSV results at Line %d - The result
row '%s' contains an invalid encoding of a Node", this.lineNum, line));
     	}
 
         return true;
 	}
 	
-	private Node parseNode(String token) {
-		if (token.startsWith("_:")) {
-			return Node.createAnon(new AnonId(token.substring(2)));
-		} else if (token.startsWith("<")) {
-			return Node.createURI(token.substring(1, token.length()-1));
-		} else {
-			return NodeFactory.parseNode(token, null);
-		}
+	private static Node parseNode(String token, long lineNum) {
+	    Tokenizer tokenizer = TokenizerFactory.makeTokenizerString(token) ;
+	    if ( ! tokenizer.hasNext() )
+	        throw new ResultSetException(format("Error Parsing TSV results at Line %d, item
'%s' - The result row contains an empty term", lineNum, token)) ; 
+	    Node node = tokenizer.next().asNode() ;
+	    if ( ! node.isConcrete() )
+	        throw new ResultSetException(format("Error Parsing TSV results at Line %d, item
'%s' - Bad RDF term", lineNum, token)) ;
+	    if ( tokenizer.hasNext() )
+	        throw new ResultSetException(format("Error Parsing TSV results at Line %d, item
'%s' - Trailing characters", lineNum, token)) ;
+	    if ( node.isURI() )
+	    {
+	        // Lightly test for bad URIs.
+	        String x = node.getURI() ;
+	        if ( x.indexOf(' ') >= 0 )
+	            throw new ResultSetException(format("Error Parsing TSV results at Line %d, item
'%s' - Space(s) in  IRI", lineNum, token)) ;
+	    }
+	    return node ;
 	}
 
 	@Override

Modified: jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java?rev=1356854&r1=1356853&r2=1356854&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java
(original)
+++ jena/trunk/jena-arq/src/test/java/com/hp/hpl/jena/sparql/resultset/TestResultSetFormat2.java
Tue Jul  3 18:12:02 2012
@@ -23,7 +23,6 @@ import java.io.ByteArrayInputStream ;
 import org.junit.Test ;
 import org.openjena.atlas.lib.StrUtils ;
 
-import com.hp.hpl.jena.query.QueryException ;
 import com.hp.hpl.jena.query.ResultSet ;
 import com.hp.hpl.jena.query.ResultSetFactory ;
 import com.hp.hpl.jena.sparql.ARQException;
@@ -86,7 +85,54 @@ public class TestResultSetFormat2
     	String x = "?x\t?y\t?z\n\t\t";
     }
     
-    @Test (expected=QueryException.class) 
+    // various values
+    
+    @Test
+    public void resultset_tsv_08()
+    {
+        String x = "?x\n<http://example/foo>\n";
+        parseTSV(x);
+    }
+    
+    @Test
+    public void resultset_tsv_09()
+    {
+        String x = "?x\n_:abc\n";
+        parseTSV(x);
+    }
+    
+    @Test
+    public void resultset_tsv_11()
+    {
+        String x = "?x\n123\n";
+        parseTSV(x);
+    }
+    
+    @Test
+    public void resultset_tsv_12()
+    {
+        // We allow leading white space.
+        String x = "?x\n  123\n";
+        parseTSV(x);
+    }
+    
+    @Test
+    public void resultset_tsv_13()
+    {
+        // We allow trailing white space.
+        String x = "?x\n123   \n";
+        parseTSV(x);
+    }
+        
+    @Test
+    public void resultset_tsv_14()
+    {
+        // We allow trailing white space.
+        String x = "?x\n<http://example/>    \n";
+        parseTSV(x);
+    }
+
+    @Test (expected=ResultSetException.class) 
     public void resultset_bad_tsv_01()
     {
         // Two vars, row of 3 values.
@@ -94,7 +140,7 @@ public class TestResultSetFormat2
         parseTSV(x);
     }
 
-    @Test (expected=QueryException.class) 
+    @Test (expected=ResultSetException.class) 
     public void resultset_bad_tsv_02()
     {
         // Two vars, row of 1 value only.
@@ -109,14 +155,45 @@ public class TestResultSetFormat2
     	parseTSV("");
     }
     
-    @Test (expected=QueryException.class)
+    @Test (expected=ResultSetException.class)
     public void resultset_bad_tsv_04()
     {
     	//Two vars but a completely empty row (should contain a tab)
     	String x = "?x\t?y\n\n";
     	parseTSV(x);
     }
-        
+    
+    // various values - broken
+    
+    @Test(expected=ResultSetException.class)
+    public void resultset_bad_tsv_05()
+    {
+        String x = "?x\n<http://example/\n";
+        parseTSV(x);
+    }
+    
+    @Test(expected=ResultSetException.class)
+    public void resultset_bad_tsv_06()
+    {
+        String x = "?x\n<http://example/ white space >\n";
+        parseTSV(x);
+    }
+
+    @Test(expected=ResultSetException.class)
+    public void resultset_bad_tsv_07()
+    {
+        String x = "?x\n<<<<http://example/>>>>\n";
+        parseTSV(x);
+    }
+
+
+    @Test (expected=ResultSetException.class)
+    public void resultset_bad_tsv_08()
+    {
+        String x = "?x\n_:abc def\n";
+        parseTSV(x);
+    }
+
     public void parseTSV(String x)
     {
         byte[] b = StrUtils.asUTF8bytes(x) ;



Mime
View raw message