incubator-any23-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mosta...@apache.org
Subject svn commit: r1229627 [4/5] - in /incubator/any23/trunk: ./ any23-core/ any23-core/bin/ any23-core/src/main/java/org/deri/any23/ any23-core/src/main/java/org/deri/any23/cli/ any23-core/src/main/java/org/deri/any23/eval/ any23-core/src/main/java/org/deri...
Date Tue, 10 Jan 2012 16:32:33 GMT
Added: incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/io/nquads/NQuadsParserTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/io/nquads/NQuadsParserTest.java?rev=1229627&view=auto
==============================================================================
--- incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/io/nquads/NQuadsParserTest.java (added)
+++ incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/io/nquads/NQuadsParserTest.java Tue Jan 10 16:32:28 2012
@@ -0,0 +1,579 @@
+/**
+ * Copyright 2008-2010 Digital Enterprise Research Institute (DERI)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *          http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.deri.any23.io.nquads;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.openrdf.model.BNode;
+import org.openrdf.model.Literal;
+import org.openrdf.model.Resource;
+import org.openrdf.model.Statement;
+import org.openrdf.model.URI;
+import org.openrdf.model.Value;
+import org.openrdf.model.impl.URIImpl;
+import org.openrdf.rio.ParseLocationListener;
+import org.openrdf.rio.RDFHandler;
+import org.openrdf.rio.RDFHandlerException;
+import org.openrdf.rio.RDFParseException;
+import org.openrdf.rio.RDFParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.hamcrest.core.Is.is;
+
+/**
+ * Test case for {@link org.deri.any23.io.nquads.NQuadsParser}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class NQuadsParserTest {
+
+    private static final Logger logger = LoggerFactory.getLogger(NQuadsParser.class);
+
+    private NQuadsParser parser;
+
+    @Before
+    public void setUp() {
+        parser = new NQuadsParser();
+        parser.setVerifyData(true);
+        parser.setDatatypeHandling(RDFParser.DatatypeHandling.VERIFY);
+        parser.setStopAtFirstError(true);
+    }
+
+    @After
+    public void tearDown() {
+        parser = null;
+    }
+
+    /**
+     * Tests the correct behavior with incomplete input.
+     *
+     * @throws RDFHandlerException
+     * @throws IOException
+     * @throws RDFParseException
+     */
+    @Test(expected = RDFParseException.class)
+    public void testIncompleteParsing() throws RDFHandlerException, IOException, RDFParseException {
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+                "<http://s> <http://p> <http://o> <http://g>".getBytes()
+        );
+        parser.parse(bais, "http://base-uri");
+    }
+
+    /**
+     * Tests parsing of empty lines and comments.
+     *
+     * @throws java.io.IOException
+     */
+    @Test
+    public void testParseEmptyLinesAndComments() throws RDFHandlerException, IOException, RDFParseException {
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+            "  \n\n\n# This is a comment\n\n#this is another comment."
+            .getBytes()
+        );
+        final TestRDFHandler rdfHandler = new TestRDFHandler();
+        parser.setRDFHandler(rdfHandler);
+        parser.parse(bais, "http://test.base.uri");
+        Assert.assertEquals(rdfHandler.getStatements().size(), 0);
+    }
+
+    /**
+     * Tests basic N-Quads parsing.
+     *
+     * @throws RDFHandlerException
+     * @throws IOException
+     * @throws RDFParseException
+     */
+    @Test
+    public void testParseBasic() throws RDFHandlerException, IOException, RDFParseException {
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+            "<http://www.v/dat/4b><http://www.w3.org/20/ica#dtend><http://sin/value/2><http://sin.siteserv.org/def/>."
+            .getBytes()
+        );
+        final TestRDFHandler rdfHandler = new TestRDFHandler();
+        parser.setRDFHandler(rdfHandler);
+        parser.parse(bais, "http://test.base.uri");
+        Assert.assertThat(rdfHandler.getStatements().size(), is(1));
+        final Statement statement = rdfHandler.getStatements().get(0);
+        Assert.assertEquals("http://www.v/dat/4b", statement.getSubject().stringValue());
+        Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue());
+        Assert.assertTrue(statement.getObject() instanceof URI);
+        Assert.assertEquals("http://sin/value/2", statement.getObject().stringValue());
+        Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue());
+    }
+
+    /**
+     * Tests basic N-Quads parsing with blank node.
+     *
+     * @throws RDFHandlerException
+     * @throws IOException
+     * @throws RDFParseException
+     */
+    @Test
+    public void testParseBasicBNode() throws RDFHandlerException, IOException, RDFParseException {
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+            "_:123456768<http://www.w3.org/20/ica#dtend><http://sin/value/2><http://sin.siteserv.org/def/>."
+            .getBytes()
+        );
+        final TestRDFHandler rdfHandler = new TestRDFHandler();
+        parser.setRDFHandler(rdfHandler);
+        parser.parse(bais, "http://test.base.uri");
+        Assert.assertThat(rdfHandler.getStatements().size(), is(1));
+        final Statement statement = rdfHandler.getStatements().get(0);
+        Assert.assertTrue(statement.getSubject() instanceof BNode);
+        Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue());
+        Assert.assertTrue(statement.getObject() instanceof URI);
+        Assert.assertEquals("http://sin/value/2", statement.getObject().stringValue());
+        Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue());
+    }
+
+    /**
+     * Tests basic N-Quads parsing with literal.
+     *
+     * @throws RDFHandlerException
+     * @throws IOException
+     * @throws RDFParseException
+     */
+    @Test
+    public void testParseBasicLiteral() throws RDFHandlerException, IOException, RDFParseException {
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+            "_:123456768<http://www.w3.org/20/ica#dtend>\"2010-05-02\"<http://sin.siteserv.org/def/>."
+            .getBytes()
+        );
+        final TestRDFHandler rdfHandler = new TestRDFHandler();
+        parser.setRDFHandler(rdfHandler);
+        parser.parse(bais, "http://test.base.uri");
+        Assert.assertThat(rdfHandler.getStatements().size(), is(1));
+        final Statement statement = rdfHandler.getStatements().get(0);
+        Assert.assertTrue(statement.getSubject() instanceof BNode);
+        Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue());
+        Assert.assertTrue(statement.getObject() instanceof Literal);
+        Assert.assertEquals("2010-05-02", statement.getObject().stringValue());
+        Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue());
+    }
+
+    /**
+     * Tests N-Quads parsing with literal and language.
+     * 
+     * @throws RDFHandlerException
+     * @throws IOException
+     * @throws RDFParseException
+     */
+    @Test
+    public void testParseBasicLiteralLang() throws RDFHandlerException, IOException, RDFParseException {
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+            "<http://www.v/dat/4b2-21><http://www.w3.org/20/ica#dtend>\"2010-05-02\"@en<http://sin.siteserv.org/def/>."
+            .getBytes()
+        );
+        final TestRDFHandler rdfHandler = new TestRDFHandler();
+        parser.setRDFHandler(rdfHandler);
+        parser.parse(bais, "http://test.base.uri");
+        final Statement statement = rdfHandler.getStatements().get(0);
+        Assert.assertEquals("http://www.v/dat/4b2-21", statement.getSubject().stringValue());
+        Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue());
+        Assert.assertTrue(statement.getObject() instanceof Literal);
+        Literal object = (Literal) statement.getObject();
+        Assert.assertEquals("2010-05-02", object.stringValue());
+        Assert.assertEquals("en", object.getLanguage());
+        Assert.assertNull("en", object.getDatatype());
+        Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue());
+    }
+
+    /**
+     * Tests N-Quads parsing with literal and datatype.
+     * 
+     * @throws RDFHandlerException
+     * @throws IOException
+     * @throws RDFParseException
+     */
+    @Test
+    public void testParseBasicLiteraDatatype() throws RDFHandlerException, IOException, RDFParseException {
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+            ("<http://www.v/dat/4b2-21>" +
+             "<http://www.w3.org/20/ica#dtend>" +
+             "\"2010\"^^<http://www.w3.org/2001/XMLSchema#integer>" +
+             "<http://sin.siteserv.org/def/>."
+            ).getBytes()
+        );
+        final TestRDFHandler rdfHandler = new TestRDFHandler();
+        parser.setRDFHandler(rdfHandler);
+        parser.parse(bais, "http://test.base.uri");
+        final Statement statement = rdfHandler.getStatements().get(0);
+        Assert.assertEquals("http://www.v/dat/4b2-21", statement.getSubject().stringValue());
+        Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue());
+        Assert.assertTrue(statement.getObject() instanceof Literal);
+        Literal object = (Literal) statement.getObject();
+        Assert.assertEquals("2010", object.stringValue());
+        Assert.assertNull(object.getLanguage());
+        Assert.assertEquals("http://www.w3.org/2001/XMLSchema#integer", object.getDatatype().toString());
+        Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue());
+    }
+
+    /**
+     * Tests the correct support for literal escaping.
+     *
+     * @throws RDFHandlerException
+     * @throws IOException
+     * @throws RDFParseException
+     */
+    @Test
+    public void testLiteralEscapeManagement1()
+    throws RDFHandlerException, IOException, RDFParseException {
+        TestParseLocationListener parseLocationListener = new TestParseLocationListener();
+        TestRDFHandler rdfHandler = new TestRDFHandler();
+        parser.setParseLocationListener(parseLocationListener);
+        parser.setRDFHandler(rdfHandler);
+
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+            "<http://a> <http://b> \"\\\\\" <http://c> .".getBytes()
+        );
+        parser.parse(bais, "http://base-uri");
+
+        rdfHandler.assertHandler(1);
+        parseLocationListener.assertListener(1, 40);
+    }
+
+    /**
+     * Tests the correct support for literal escaping.
+     *
+     * @throws RDFHandlerException
+     * @throws IOException
+     * @throws RDFParseException
+     */
+    @Test
+    public void testLiteralEscapeManagement2()
+    throws RDFHandlerException, IOException, RDFParseException {
+        TestParseLocationListener parseLocationListener = new TestParseLocationListener();
+        TestRDFHandler rdfHandler = new TestRDFHandler();
+        parser.setParseLocationListener(parseLocationListener);
+        parser.setRDFHandler(rdfHandler);
+
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+            "<http://a> <http://b> \"Line text 1\\nLine text 2\" <http://c> .".getBytes()
+        );
+        parser.parse(bais, "http://base-uri");
+
+        rdfHandler.assertHandler(1);
+        final Value object = rdfHandler.getStatements().get(0).getObject();
+        Assert.assertTrue( object instanceof Literal);
+        final String literalContent = ((Literal) object).getLabel();
+        Assert.assertEquals("Line text 1\nLine text 2", literalContent);
+    }
+
+    /**
+     * Tests the correct decoding of UTF-8 encoded chars in URIs.
+     *
+     * @throws RDFHandlerException
+     * @throws IOException
+     * @throws RDFParseException
+     */
+    @Test
+    public void testURIDecodingManagement() throws RDFHandlerException, IOException, RDFParseException {
+        TestParseLocationListener parseLocationListener = new TestParseLocationListener();
+        TestRDFHandler rdfHandler = new TestRDFHandler();
+        parser.setParseLocationListener(parseLocationListener);
+        parser.setRDFHandler(rdfHandler);
+
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+            "<http://s/\\u306F\\u3080> <http://p/\\u306F\\u3080> <http://o/\\u306F\\u3080> <http://g/\\u306F\\u3080> ."
+            .getBytes()
+        );
+        parser.parse(bais, "http://base-uri");
+
+        rdfHandler.assertHandler(1);
+        final Statement statement = rdfHandler.getStatements().get(0);
+
+        final Resource subject = statement.getSubject();
+        Assert.assertTrue( subject instanceof URI);
+        final String subjectURI = subject.toString();
+        Assert.assertEquals("http://s/はむ", subjectURI);
+
+        final Resource predicate = statement.getPredicate();
+        Assert.assertTrue( predicate instanceof URI);
+        final String predicateURI = predicate.toString();
+        Assert.assertEquals("http://p/はむ", predicateURI);
+
+        final Value object = statement.getObject();
+        Assert.assertTrue( object instanceof URI);
+        final String objectURI = object.toString();
+        Assert.assertEquals("http://o/はむ", objectURI);
+
+        final Resource graph = statement.getContext();
+        Assert.assertTrue( graph instanceof URI);
+        final String graphURI = graph.toString();
+        Assert.assertEquals("http://g/はむ", graphURI);
+    }
+
+    @Test
+    public void testUnicodeLiteralManagement() throws RDFHandlerException, IOException, RDFParseException {
+        TestRDFHandler rdfHandler = new TestRDFHandler();
+        parser.setRDFHandler(rdfHandler);
+        final String INPUT_LITERAL = "[は、イギリスおよびイングランドの首都である] [是大不列顛及北愛爾蘭聯合王國和英格蘭的首都]";
+        final String INPUT_STRING = String.format(
+                "<http://a> <http://b> \"%s\" <http://c> .",
+                INPUT_LITERAL
+        );
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+            INPUT_STRING.getBytes()
+        );
+        parser.parse(bais, "http://base-uri");
+
+        rdfHandler.assertHandler(1);
+        final Literal obj = (Literal) rdfHandler.getStatements().get(0).getObject();
+        Assert.assertEquals(INPUT_LITERAL, obj.getLabel());
+    }
+
+    @Test
+    public void testUnicodeLiteralDecoding() throws RDFHandlerException, IOException, RDFParseException {
+        TestRDFHandler rdfHandler = new TestRDFHandler();
+        parser.setRDFHandler(rdfHandler);
+        final String INPUT_LITERAL_PLAIN   = "[は]";
+        final String INPUT_LITERAL_ENCODED = "[\\u306F]";
+        final String INPUT_STRING = String.format(
+                "<http://a> <http://b> \"%s\" <http://c> .",
+                INPUT_LITERAL_ENCODED
+        );
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+            INPUT_STRING.getBytes()
+        );
+        parser.parse(bais, "http://base-uri");
+
+        rdfHandler.assertHandler(1);
+        final Literal obj = (Literal) rdfHandler.getStatements().get(0).getObject();
+        Assert.assertEquals(INPUT_LITERAL_PLAIN, obj.getLabel());
+    }
+
+    @Test(expected = RDFParseException.class)
+    public void testWrongUnicodeEncodedCharFail() throws RDFHandlerException, IOException, RDFParseException {
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+                "<http://s> <http://p> \"\\u123X\" <http://g> .".getBytes()
+        );
+        parser.parse(bais, "http://base-uri");
+    }
+
+    /**
+     * Tests the correct support for EOS exception.
+     *
+     * @throws RDFHandlerException
+     * @throws IOException
+     * @throws RDFParseException
+     */
+    @Test(expected = RDFParseException.class)
+    public void testEndOfStreamReached()
+    throws RDFHandlerException, IOException, RDFParseException {
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+            "<http://a> <http://b> \"\\\" <http://c> .".getBytes()
+        );
+        parser.parse(bais, "http://base-uri");
+    }
+
+    /**
+     * Tests the parser with all cases defined by the NQuads grammar.
+     *
+     * @throws IOException
+     * @throws RDFParseException
+     * @throws RDFHandlerException
+     */
+    @Test
+    public void testParserWithAllCases()
+    throws IOException, RDFParseException, RDFHandlerException {
+        TestParseLocationListener parseLocationListerner = new TestParseLocationListener();
+        SpecificTestRDFHandler rdfHandler = new SpecificTestRDFHandler();
+        parser.setParseLocationListener(parseLocationListerner);
+        parser.setRDFHandler(rdfHandler);
+
+        BufferedReader br = new BufferedReader(
+                new InputStreamReader(
+                        this.getClass().getClassLoader().getResourceAsStream("application/nquads/test1.nq")
+                ) 
+        );
+        parser.parse(
+            br,
+            "http://test.base.uri"
+        );
+
+        rdfHandler.assertHandler(5);
+        parseLocationListerner.assertListener(7, 107);
+    }
+
+    /**
+     * Tests parser with real data.
+     *
+     * @throws IOException
+     * @throws RDFParseException
+     * @throws RDFHandlerException
+     */
+    @Test
+    public void testParserWithRealData()
+    throws IOException, RDFParseException, RDFHandlerException {
+        TestParseLocationListener parseLocationListener = new TestParseLocationListener();
+        TestRDFHandler rdfHandler = new TestRDFHandler();
+        parser.setParseLocationListener(parseLocationListener);
+        parser.setRDFHandler(rdfHandler);
+
+        parser.parse(
+            this.getClass().getClassLoader().getResourceAsStream("application/nquads/test2.nq"),
+            "http://test.base.uri"
+        );
+
+        rdfHandler.assertHandler(400);
+        parseLocationListener.assertListener(400, 348);
+    }
+
+    @Test
+    public void testStatementWithInvalidLiteralContentAndIgnoreValidation()
+    throws RDFHandlerException, IOException, RDFParseException {
+        verifyStatementWithInvalidLiteralContent(RDFParser.DatatypeHandling.IGNORE);
+    }
+
+    @Test(expected = RDFParseException.class)
+    public void testStatementWithInvalidLiteralContentAndStrictValidation()
+    throws RDFHandlerException, IOException, RDFParseException {
+        verifyStatementWithInvalidLiteralContent(RDFParser.DatatypeHandling.VERIFY);
+    }
+
+    @Test
+    public void testStatementWithInvalidDatatypeAndIgnoreValidation()
+    throws RDFHandlerException, IOException, RDFParseException {
+        verifyStatementWithInvalidDatatype(RDFParser.DatatypeHandling.IGNORE);
+    }
+
+    @Test
+    public void testStatementWithInvalidDatatypeAndVerifyValidation()
+    throws RDFHandlerException, IOException, RDFParseException {
+        verifyStatementWithInvalidDatatype(RDFParser.DatatypeHandling.VERIFY);
+    }
+
+    private void verifyStatementWithInvalidLiteralContent(RDFParser.DatatypeHandling datatypeHandling)
+    throws RDFHandlerException, IOException, RDFParseException {
+       final ByteArrayInputStream bais = new ByteArrayInputStream(
+                (
+                "<http://dbpedia.org/resource/Camillo_Benso,_conte_di_Cavour> " +
+                "<http://dbpedia.org/property/mandatofine> " +
+                "\"1380.0\"^^<http://www.w3.org/2001/XMLSchema#int> " + // Float declared as int.
+                "<http://it.wikipedia.org/wiki/Camillo_Benso,_conte_di_Cavour#absolute-line=20> ."
+                ).getBytes()
+        );
+        parser.setDatatypeHandling(datatypeHandling);
+        parser.parse(bais, "http://base-uri");
+    }
+
+    private void verifyStatementWithInvalidDatatype(RDFParser.DatatypeHandling datatypeHandling)
+    throws RDFHandlerException, IOException, RDFParseException {
+        TestRDFHandler rdfHandler = new TestRDFHandler();
+        parser.setRDFHandler(rdfHandler);
+        parser.setDatatypeHandling(datatypeHandling);
+        final ByteArrayInputStream bais = new ByteArrayInputStream(
+                (
+                        "<http://dbpedia.org/resource/Camillo_Benso,_conte_di_Cavour> " +
+                        "<http://dbpedia.org/property/mandatofine> " +
+                        "\"1380.0\"^^<http://dbpedia.org/datatype/second> " +
+                        "<http://it.wikipedia.org/wiki/Camillo_Benso,_conte_di_Cavour#absolute-line=20> ."
+                ).getBytes()
+        );
+        parser.parse(bais, "http://base-uri");
+        rdfHandler.assertHandler(1);
+    }
+
+    private class TestParseLocationListener implements ParseLocationListener {
+
+        private int lastRow, lastCol;
+
+        public void parseLocationUpdate(int r, int c) {
+            lastRow = r;
+            lastCol = c;
+        }
+
+        private void assertListener(int row, int col) {
+            Assert.assertEquals("Unexpected last row", row , lastRow);
+            Assert.assertEquals("Unexpected last col", col , lastCol);
+        }
+
+    }
+
+    private class TestRDFHandler implements RDFHandler {
+
+        private boolean started = false;
+        private boolean ended   = false;
+
+        private final List<Statement> statements = new ArrayList<Statement>();
+
+        protected List<Statement> getStatements() {
+            return statements;
+        }
+
+        public void startRDF() throws RDFHandlerException {
+            started = true;
+        }
+
+        public void endRDF() throws RDFHandlerException {
+            ended = true;
+        }
+
+        public void handleNamespace(String s, String s1) throws RDFHandlerException {
+            throw new UnsupportedOperationException();
+        }
+
+        public void handleStatement(Statement statement) throws RDFHandlerException {
+            logger.debug(statement.toString());
+            statements.add(statement);
+        }
+
+        public void handleComment(String s) throws RDFHandlerException {
+            throw new UnsupportedOperationException();
+        }
+
+        public void assertHandler(int expected) {
+            Assert.assertTrue("Never stated.", started);
+            Assert.assertTrue("Never ended." , ended  );
+            Assert.assertEquals("Unexpected number of statements.", expected, statements.size());
+        }
+    }
+
+    private class SpecificTestRDFHandler extends TestRDFHandler {
+
+        public void handleStatement(Statement statement) throws RDFHandlerException {
+            int statements = getStatements().size();
+            if(statements == 0){
+                Assert.assertEquals(new URIImpl("http://example.org/alice/foaf.rdf#me"), statement.getSubject() );
+
+            } else {
+                Assert.assertTrue(statement.getSubject() instanceof BNode);
+            }
+            if( statements == 5 ) {
+                Assert.assertEquals(new URIImpl("http://test.base.uri#like"), statement.getPredicate() );
+            }
+            Assert.assertEquals(
+                    new URIImpl( String.format("http://example.org/alice/foaf%s.rdf", statements + 1) ),
+                    statement.getContext()
+            );
+
+            super.handleStatement(statement);
+        }
+    }
+
+}
\ No newline at end of file

Added: incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/io/nquads/NQuadsWriterTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/io/nquads/NQuadsWriterTest.java?rev=1229627&view=auto
==============================================================================
--- incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/io/nquads/NQuadsWriterTest.java (added)
+++ incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/io/nquads/NQuadsWriterTest.java Tue Jan 10 16:32:28 2012
@@ -0,0 +1,145 @@
+/**
+ * Copyright 2008-2010 Digital Enterprise Research Institute (DERI)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *          http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.deri.any23.io.nquads;
+
+import org.deri.any23.rdf.RDFUtils;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.openrdf.model.Statement;
+import org.openrdf.model.impl.URIImpl;
+import org.openrdf.rio.RDFHandlerException;
+import org.openrdf.rio.RDFParseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+/**
+ * Test case for {@link org.deri.any23.io.nquads.NQuadsWriter}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class NQuadsWriterTest {
+
+    private static final Logger logger  = LoggerFactory.getLogger(NQuadsWriterTest.class);
+
+    private final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+    private NQuadsWriter writer;
+
+    @Before
+    public void setUp() {
+        writer = new NQuadsWriter(baos);
+    }
+
+    @After
+    public void tearDown() {
+        logger.debug( "\n" + baos.toString() );
+        baos.reset();
+        writer = null;
+    }
+
+    @Test
+    public void testWrite() throws RDFHandlerException {
+        Statement s1 = RDFUtils.quad(
+                RDFUtils.uri("http://sub"),
+                RDFUtils.uri("http://pre"),
+                RDFUtils.uri("http://obj"),
+                RDFUtils.uri("http://gra1")
+        );
+        Statement s2 = RDFUtils.quad(
+                RDFUtils.getBNode("1"),
+                RDFUtils.uri("http://pre"),
+                RDFUtils.getBNode("2"),
+                RDFUtils.uri("http://gra2")
+        );
+        Statement s3 = RDFUtils.quad(
+                RDFUtils.getBNode("3"),
+                RDFUtils.uri("http://pre"),
+                RDFUtils.literal("Sample text 1"),
+                RDFUtils.uri("http://gra2")
+        );
+        Statement s4 = RDFUtils.quad(
+                RDFUtils.getBNode("4"),
+                RDFUtils.uri("http://pre"),
+                RDFUtils.literal("Sample text 2", "en"),
+                RDFUtils.uri("http://gra2")
+        );
+        Statement s5 = RDFUtils.quad(
+                RDFUtils.getBNode("5"),
+                RDFUtils.uri("http://pre"),
+                RDFUtils.literal("12345", new URIImpl("http://www.w3.org/2001/XMLSchema#integer")),
+                RDFUtils.uri("http://gra2")
+        );
+        Statement s6 = RDFUtils.quad(
+                RDFUtils.uri("p1:sub"),
+                RDFUtils.uri("p1:pre"),
+                RDFUtils.uri("p1:obj"),
+                RDFUtils.uri("p1:gra2")
+        );
+        Statement s7 = RDFUtils.quad(
+                RDFUtils.uri("http://sub"),
+                RDFUtils.uri("http://pre"),
+                RDFUtils.literal("This is line 1.\nThis is line 2.\n"),
+                RDFUtils.uri("http://gra3")
+        );
+
+        // Sending events.
+        writer.startRDF();
+        writer.handleNamespace("p1", "http://test.com/");
+        writer.handleStatement(s1);
+        writer.handleStatement(s2);
+        writer.handleStatement(s3);
+        writer.handleStatement(s4);
+        writer.handleStatement(s5);
+        writer.handleStatement(s6);
+        writer.handleStatement(s7);
+        writer.endRDF();
+
+        // Checking content.
+        String content = baos.toString();
+        String[] lines = content.split("\n");
+        Assert.assertEquals("Unexpected number of lines.", 7, lines.length);
+        Assert.assertTrue( lines[0].matches("<.*> <.*> <.*> <.*> \\.") );
+        Assert.assertTrue( lines[1].matches("_:.* <.*> _:.* <.*> \\.") );
+        Assert.assertTrue( lines[2].matches("_:.* <.*> \".*\" <.*> \\.") );
+        Assert.assertTrue( lines[3].matches("_:.* <.*> \".*\"@en <.*> \\.") );
+        Assert.assertTrue( lines[4].matches("_:.* <.*> \".*\"\\^\\^<.*> <.*> \\.") );
+        Assert.assertTrue( lines[5].matches("<http://.*> <http://.*> <http://.*> <http://.*> \\.") );
+        Assert.assertEquals(
+                "<http://sub> <http://pre> \"This is line 1.\\nThis is line 2.\\n\" <http://gra3> .",
+                lines[6]
+        );
+    }
+
+    @Test
+    public void testReadWrite() throws RDFHandlerException, IOException, RDFParseException {
+        NQuadsParser parser = new NQuadsParser();
+        parser.setRDFHandler(writer);
+        parser.parse(
+            this.getClass().getClassLoader().getResourceAsStream("application/nquads/test2.nq"),
+            "http://test.base.uri"
+        );
+
+        Assert.assertEquals("Unexpected number of lines.", 400, baos.toString().split("\n").length);
+    }
+
+}

Modified: incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/mime/TikaMIMETypeDetectorTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/mime/TikaMIMETypeDetectorTest.java?rev=1229627&r1=1229626&r2=1229627&view=diff
==============================================================================
--- incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/mime/TikaMIMETypeDetectorTest.java (original)
+++ incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/mime/TikaMIMETypeDetectorTest.java Tue Jan 10 16:32:28 2012
@@ -40,6 +40,7 @@ public class TikaMIMETypeDetectorTest {
     private static final String PLAIN  = "text/plain";
     private static final String HTML   = "text/html";
     private static final String XML    = "application/xml";
+    private static final String TRIX   = "application/trix";
     private final static String XHTML  = "application/xhtml+xml";
     private final static String RDFXML = "application/rdf+xml";
     private final static String TURTLE = "application/x-turtle";
@@ -137,6 +138,11 @@ public class TikaMIMETypeDetectorTest {
     }
 
     @Test
+    public void testDetectTriXByContent() throws Exception {
+        detectMIMEtypeByContent("application/trix", "src/test/resources/application/trix");
+    }
+
+    @Test
     public void testDetectAtomByContent() throws Exception {
         detectMIMEtypeByContent("application/atom+xml", "src/test/resources/application/atom");
     }
@@ -211,8 +217,8 @@ public class TikaMIMETypeDetectorTest {
     }
 
     @Test
-    public void testDetectXMLByMeta2() throws IOException {
-        detectMIMETypeByMimeTypeHint(XML, "application/xml");
+    public void testDetectTriXByMeta() throws IOException {
+        detectMIMETypeByMimeTypeHint(TRIX, "application/trix");
     }
 
     @Test
@@ -255,6 +261,11 @@ public class TikaMIMETypeDetectorTest {
     }
 
     @Test
+    public void testTriXByContentAndName() throws Exception {
+        detectMIMETypeByContentAndName("application/trix", "src/test/resources/application/trix");
+    }
+
+    @Test
     public void testRSS1ByContentAndName() throws Exception {
         detectMIMETypeByContentAndName("application/rdf+xml", "src/test/resources/application/rss1");
     }

Modified: incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/plugin/Any23PluginManagerTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/plugin/Any23PluginManagerTest.java?rev=1229627&r1=1229626&r2=1229627&view=diff
==============================================================================
--- incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/plugin/Any23PluginManagerTest.java (original)
+++ incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/plugin/Any23PluginManagerTest.java Tue Jan 10 16:32:28 2012
@@ -57,7 +57,7 @@ public class Any23PluginManagerTest {
     @Test
     public <T> void testGetClassesInPackageFromJAR() throws IOException {
         Set<Class<T>> classes = new HashSet<Class<T>>();
-                manager.getClassesInPackageFromJAR(
+                manager.loadClassesInPackageFromJAR(
                         TARGET_TEST_JAR,
                         "org.hsqldb.store",
                         null,
@@ -74,7 +74,7 @@ public class Any23PluginManagerTest {
         decompressJar(TARGET_TEST_JAR, tmpDir);
 
         Set<Class<T>> classes = new HashSet<Class<T>>();
-        manager.getClassesInPackageFromDir(
+        manager.loadClassesInPackageFromDir(
                 tmpDir,
                 "org.hsqldb.store",
                 null,
@@ -85,14 +85,14 @@ public class Any23PluginManagerTest {
 
     @Test
     public <T> void testGetClassesFromClasspath() throws IOException {
-        Set<Class<T>> clazzes = manager.getClassesInPackage("org.deri.any23", null);
-        Assert.assertEquals(188, clazzes.size());
+        Set<Class<T>> clazzes = manager.getClassesInPackage("org.deri.any23.plugin", null);
+        Assert.assertTrue(clazzes.size() >= 4);
     }
 
     @Test
     public void testGetTools() throws IOException {
         Class<Tool>[] tools = manager.getTools();
-        Assert.assertEquals(7, tools.length);
+        Assert.assertTrue(tools.length > 0); // NOTE: Punctual tool detection verification done by ToolRunnerTest.java
     }
 
     @Test
@@ -101,6 +101,7 @@ public class Any23PluginManagerTest {
         Assert.assertEquals(0, extractorPlugins.length);
     }
 
+    // TODO: move in FileUtils
     private void decompressJar(File jarFile, File destination) throws IOException {
         final int BUFFER = 1024 * 1024;
         BufferedOutputStream dest = null;

Modified: incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/vocab/RDFSchemaUtilsTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/vocab/RDFSchemaUtilsTest.java?rev=1229627&r1=1229626&r2=1229627&view=diff
==============================================================================
--- incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/vocab/RDFSchemaUtilsTest.java (original)
+++ incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/vocab/RDFSchemaUtilsTest.java Tue Jan 10 16:32:28 2012
@@ -23,6 +23,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
 
 /**
  * Test case for {@link RDFSchemaUtils}.
@@ -36,26 +37,28 @@ public class RDFSchemaUtilsTest {
     /**
      * Test case for
      * {@link org.deri.any23.vocab.RDFSchemaUtils#serializeVocabularies(
-     * org.deri.any23.vocab.RDFSchemaUtils.VocabularyFormat, java.io.OutputStream)} with <i>NTriples</i> format.
+     * org.deri.any23.vocab.RDFSchemaUtils.VocabularyFormat, java.io.PrintStream)} with <i>NTriples</i> format.
      */
     @Test
     public void testSerializeVocabulariesNTriples() {
-        serializeVocabularies(RDFSchemaUtils.VocabularyFormat.NTriples, 802);
+        serializeVocabularies(RDFSchemaUtils.VocabularyFormat.NTriples, 844);
     }
 
     /**
      * Test case for
      * {@link org.deri.any23.vocab.RDFSchemaUtils#serializeVocabularies(
-     * org.deri.any23.vocab.RDFSchemaUtils.VocabularyFormat, java.io.OutputStream)} with <i>RDFXML</i> format.
+     * org.deri.any23.vocab.RDFSchemaUtils.VocabularyFormat, java.io.PrintStream)} with <i>RDFXML</i> format.
      */
     @Test
     public void testSerializeVocabulariesRDFXML() {
-        serializeVocabularies(RDFSchemaUtils.VocabularyFormat.RDFXML, 2065);
+        serializeVocabularies(RDFSchemaUtils.VocabularyFormat.RDFXML, 2175 + 31); // Effective lines + separators.
     }
 
     private void serializeVocabularies(RDFSchemaUtils.VocabularyFormat format, int expectedLines) {
         final ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        RDFSchemaUtils.serializeVocabularies(format, baos);
+        final PrintStream ps = new PrintStream(baos);
+        RDFSchemaUtils.serializeVocabularies(format, ps);
+        ps.close();
         final String output = baos.toString();
         logger.debug(output);
         final int occurrences= StringUtils.countOccurrences(output, "\n");

Added: incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/vocab/VocabularyTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/vocab/VocabularyTest.java?rev=1229627&view=auto
==============================================================================
--- incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/vocab/VocabularyTest.java (added)
+++ incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/vocab/VocabularyTest.java Tue Jan 10 16:32:28 2012
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2008-2010 Digital Enterprise Research Institute (DERI)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.deri.any23.vocab;
+
+
+import org.deri.any23.rdf.RDFUtils;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.openrdf.model.URI;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Test case for {@link Vocabulary} class.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class VocabularyTest {
+
+    private static final String namespace = "http://test/vocab#";
+
+    private Vocabulary target;
+
+    @Before
+    public void setUp() {
+        target = new TargetVocabulary();
+    }
+
+    @After
+    public void tearDown() {
+        target = null;
+    }
+
+    @Test
+    public void testGetProperties() {
+        final URI[] props = target.getProperties();
+        Assert.assertEquals(3, props.length);
+        final List<URI> propsList = new ArrayList<URI>(Arrays.asList(props));
+        Assert.assertTrue(propsList.contains( RDFUtils.uri("http://test/vocab#prop1")) );
+        Assert.assertTrue(propsList.contains( RDFUtils.uri("http://test/vocab#prop2")) );
+        Assert.assertTrue(propsList.contains( RDFUtils.uri("http://test/vocab#prop3")) );
+    }
+
+    @Test
+    public void testGetClasses() {
+        final URI[] classes = target.getClasses();
+        Assert.assertEquals(3, classes.length);
+        final List<URI> propsList = new ArrayList<URI>(Arrays.asList(classes));
+        Assert.assertTrue(propsList.contains( RDFUtils.uri("http://test/vocab#Class1")) );
+        Assert.assertTrue(propsList.contains( RDFUtils.uri("http://test/vocab#Class2")) );
+        Assert.assertTrue(propsList.contains( RDFUtils.uri("http://test/vocab#Class3")) );
+    }
+    
+    @Test
+    public void testGetComments() {
+        Assert.assertEquals( "Comment class 1.", target.getCommentFor(RDFUtils.uri("http://test/vocab#Class1")) );
+        Assert.assertEquals( "Comment class 2.", target.getCommentFor(RDFUtils.uri("http://test/vocab#Class2")) );
+        Assert.assertEquals( "Comment prop 1." , target.getCommentFor(RDFUtils.uri("http://test/vocab#prop1")) );
+        Assert.assertEquals( "Comment prop 2." , target.getCommentFor(RDFUtils.uri("http://test/vocab#prop2")) );
+        Assert.assertEquals(4, target.getComments().size());
+    }
+
+    /**
+     * Target test class.
+     */
+    class TargetVocabulary extends Vocabulary {
+
+        @Comment("Comment prop 1.")
+        public final URI property1 = createProperty(namespace, "prop1");
+        @Comment("Comment prop 2.")
+        public final URI property2 = createProperty(namespace, "prop2");
+
+        public final URI property3 = createProperty(namespace, "prop3");
+
+        @Comment("Comment class 1.")
+        public final URI class1 = createClass(namespace, "Class1");
+        @Comment("Comment class 2.")
+        public final URI class2 = createClass(namespace, "Class2");
+
+        public final URI class3 = createClass(namespace, "Class3");
+
+        /**
+         * Constructor.
+         */
+        public TargetVocabulary() {
+            super(namespace);
+        }
+
+    }
+
+}

Added: incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/writer/WriterRegistryTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/writer/WriterRegistryTest.java?rev=1229627&view=auto
==============================================================================
--- incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/writer/WriterRegistryTest.java (added)
+++ incubator/any23/trunk/any23-core/src/test/java/org/deri/any23/writer/WriterRegistryTest.java Tue Jan 10 16:32:28 2012
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2008-2010 Digital Enterprise Research Institute (DERI)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.deri.any23.writer;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.ByteArrayOutputStream;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Test case for {@link WriterRegistry}.
+ *
+ * @author Michele Mostarda (mostarda@fbk.eu)
+ */
+public class WriterRegistryTest {
+
+    private static final int NUM_OF_WRITERS = 7;
+
+    private final WriterRegistry target = WriterRegistry.getInstance();
+
+    @Test
+    public void testGetIdentifiers() {
+        final String[] ids = target.getIdentifiers();
+        Assert.assertTrue(ids.length >= NUM_OF_WRITERS);
+        assertUnique(ids);
+    }
+
+    @Test
+    public void testHasIdentifier() {
+        Assert.assertTrue( target.hasIdentifier( target.getIdentifiers()[0] ) );
+    }
+
+    @Test
+    public void testGetMimeTypes() {
+        final String[] mimeTypes = target.getMimeTypes();
+        Assert.assertTrue(mimeTypes.length > 0);
+    }
+
+    @Test
+    public void testGetWriters() {
+        Assert.assertTrue( target.getWriters().length >= NUM_OF_WRITERS);
+    }
+
+    @Test
+    public void testGetWriterByIdentifier() {
+        final String[] ids = target.getIdentifiers();
+        for(String id : ids) {
+            Assert.assertNotNull( target.getWriterByIdentifier(id) );
+        }
+    }
+
+    @Test
+    public void testGetWriterInstanceByIdentifier() {
+        final String[] ids = target.getIdentifiers();
+        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        for(String id : ids) {
+            Assert.assertNotNull( target.getWriterInstanceByIdentifier(id, baos) );
+        }
+    }
+
+    @Test
+    public void testGetWritersByMimeType() {
+        final Set<Class<? extends FormatWriter>> set = new HashSet<Class<? extends FormatWriter>>();
+        final String[] mimeTypes = target.getMimeTypes();
+        for(String mimeType : mimeTypes) {
+            set.addAll( Arrays.asList(target.getWritersByMimeType(mimeType)) );
+        }
+        Assert.assertEquals( NUM_OF_WRITERS, set.size() );
+    }
+
+    private void assertUnique(String[] list) {
+        final Set<String> set = new HashSet<String>();
+        for(String elem : list) {
+            if(set.contains(elem))
+                Assert.fail("Element " + elem + " already defined.");
+            set.add(elem);
+        }
+    }
+
+}

Added: incubator/any23/trunk/any23-core/src/test/resources/application/trix/test1.trx
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/any23-core/src/test/resources/application/trix/test1.trx?rev=1229627&view=auto
==============================================================================
--- incubator/any23/trunk/any23-core/src/test/resources/application/trix/test1.trx (added)
+++ incubator/any23/trunk/any23-core/src/test/resources/application/trix/test1.trx Tue Jan 10 16:32:28 2012
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<TriX xmlns="http://www.w3.org/2004/03/trix/trix-1/">
+   <graph>
+      <uri>http://example.org/graph1</uri>
+      <triple>
+         <uri>http://example.org/Bob</uri>
+         <uri>http://example.org/wife</uri>
+         <uri>http://example.org/Mary</uri>
+      </triple>
+      <triple>
+         <uri>http://example.org/Bob</uri>
+         <uri>http://example.org/name</uri>
+         <plainLiteral>Bob</plainLiteral>
+      </triple>
+      <triple>
+         <uri>http://example.org/Mary</uri>
+         <uri>http://example.org/age</uri>
+         <typedLiteral datatype="http://www.w3.org/2001/XMLSchema#integer">32</typedLiteral>
+      </triple>
+   </graph>
+</TriX>
\ No newline at end of file

Added: incubator/any23/trunk/any23-core/src/test/resources/html/rdfa/opengraph-structured-properties.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/any23-core/src/test/resources/html/rdfa/opengraph-structured-properties.html?rev=1229627&view=auto
==============================================================================
--- incubator/any23/trunk/any23-core/src/test/resources/html/rdfa/opengraph-structured-properties.html (added)
+++ incubator/any23/trunk/any23-core/src/test/resources/html/rdfa/opengraph-structured-properties.html Tue Jan 10 16:32:28 2012
@@ -0,0 +1,14 @@
+<html prefix="og: http://ogp.me/ns#">
+<head>
+    <meta property="og:audio" content="http://example.com/bond/theme.mp3"/>
+    <meta property="og:description"
+          content="Sean Connery found fame and fortune as the suave, sophisticated British agent, James Bond."/>
+    <meta property="og:determiner" content="the"/>
+    <meta property="og:locale" content="en_UK"/>
+    <meta property="og:locale:alternate" content="fr_FR"/>
+    <meta property="og:locale:alternate" content="es_ES"/>
+    <meta property="og:site_name" content="IMDb"/>
+    <meta property="og:video" content="http://example.com/bond/trailer.swf"/>
+</head>
+<body></body>
+</html>
\ No newline at end of file

Added: incubator/any23/trunk/any23-core/src/test/resources/org/deri/any23/extractor/csv/test-type.csv
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/any23-core/src/test/resources/org/deri/any23/extractor/csv/test-type.csv?rev=1229627&view=auto
==============================================================================
--- incubator/any23/trunk/any23-core/src/test/resources/org/deri/any23/extractor/csv/test-type.csv (added)
+++ incubator/any23/trunk/any23-core/src/test/resources/org/deri/any23/extractor/csv/test-type.csv Tue Jan 10 16:32:28 2012
@@ -0,0 +1,4 @@
+fieldname,fieldvalue
+k1,5.2
+k2,7.9
+k3,10
\ No newline at end of file

Modified: incubator/any23/trunk/any23-service/src/main/java/org/deri/any23/servlet/Servlet.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/any23-service/src/main/java/org/deri/any23/servlet/Servlet.java?rev=1229627&r1=1229626&r2=1229627&view=diff
==============================================================================
--- incubator/any23/trunk/any23-service/src/main/java/org/deri/any23/servlet/Servlet.java (original)
+++ incubator/any23/trunk/any23-service/src/main/java/org/deri/any23/servlet/Servlet.java Tue Jan 10 16:32:28 2012
@@ -59,6 +59,7 @@ public class Servlet extends HttpServlet
         final WebResponder responder = new WebResponder(this, resp);
         final String format = getFormatFromRequestOrNegotiation(req);
         final boolean report = isReport(req);
+        final boolean annotate = isAnnotated(req);
         if (format == null) {
             responder.sendError(406, "Client accept header does not include a supported output format", report);
             return;
@@ -69,13 +70,14 @@ public class Servlet extends HttpServlet
             return;
         }
         final ExtractionParameters eps = getExtractionParameters(req);
-        responder.runExtraction(createHTTPDocumentSource(responder, uri, report), eps, format, report);
+        responder.runExtraction(createHTTPDocumentSource(responder, uri, report), eps, format, report, annotate);
     }
 
     @Override
     protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws IOException {
         final WebResponder responder = new WebResponder(this, resp);
         final boolean report = isReport(req);
+        final boolean annotate = isAnnotated(req);
         if (req.getContentType() == null) {
             responder.sendError(400, "Invalid POST request, no Content-Type for the message body specified", report);
             return;
@@ -90,7 +92,7 @@ public class Servlet extends HttpServlet
         if ("application/x-www-form-urlencoded".equals(getContentTypeHeader(req))) {
             if (uri != null) {
                 log("Attempting conversion to '" + format + "' from URI <" + uri + ">");
-                responder.runExtraction(createHTTPDocumentSource(responder, uri, report), eps, format, report);
+                responder.runExtraction(createHTTPDocumentSource(responder, uri, report), eps, format, report, annotate);
                 return;
             }
             if (req.getParameter("body") == null) {
@@ -106,7 +108,7 @@ public class Servlet extends HttpServlet
                     new StringDocumentSource(req.getParameter("body"), Servlet.DEFAULT_BASE_URI, type),
                     eps,
                     format,
-                    report
+                    report, annotate
             );
             return;
         }
@@ -119,7 +121,7 @@ public class Servlet extends HttpServlet
                 ),
                 eps,
                 format,
-                report
+                report, annotate
         );
     }
 
@@ -243,19 +245,29 @@ public class Servlet extends HttpServlet
         return true;
     }
 
-    // TODO: add possibility to specify validation={none|validate|validate+fix}
+    private ValidationMode getValidationMode(HttpServletRequest request) {
+        final String PARAMETER = "validation-mode";
+        final String validationMode = request.getParameter(PARAMETER);
+        if(validationMode == null) return ValidationMode.None;
+        if("none".equalsIgnoreCase(validationMode)) return ValidationMode.None;
+        if("validate".equalsIgnoreCase(validationMode)) return ValidationMode.Validate;
+        if("validate-fix".equalsIgnoreCase(validationMode)) return ValidationMode.ValidateAndFix;
+        throw new IllegalArgumentException(
+                String.format("Invalid value '%s' for '%s' parameter.", validationMode, PARAMETER)
+        );
+    }
+    
     private ExtractionParameters getExtractionParameters(HttpServletRequest request) {
-        final ValidationMode mode =
-                request.getParameter("fix") != null
-                        ?
-                ValidationMode.ValidateAndFix
-                        :
-                ValidationMode.None;
+        final ValidationMode mode = getValidationMode(request);
         return new ExtractionParameters(DefaultConfiguration.singleton(), mode);
     }
 
     private boolean isReport(HttpServletRequest request) {
         return request.getParameter("report") != null;
     }
-    
+
+    private boolean isAnnotated(HttpServletRequest request) {
+        return request.getParameter("annotate") != null;
+    }
+
 }
\ No newline at end of file

Modified: incubator/any23/trunk/any23-service/src/main/java/org/deri/any23/servlet/WebResponder.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/any23-service/src/main/java/org/deri/any23/servlet/WebResponder.java?rev=1229627&r1=1229626&r2=1229627&view=diff
==============================================================================
--- incubator/any23/trunk/any23-service/src/main/java/org/deri/any23/servlet/WebResponder.java (original)
+++ incubator/any23/trunk/any23-service/src/main/java/org/deri/any23/servlet/WebResponder.java Tue Jan 10 16:32:28 2012
@@ -43,6 +43,8 @@ import java.util.List;
  */
 class WebResponder {
 
+    private static final WriterRegistry writerRegistry = WriterRegistry.getInstance();
+
     /**
      * Library facade.
      */
@@ -89,10 +91,14 @@ class WebResponder {
         return runner;
     }
 
-    public void runExtraction(DocumentSource in, ExtractionParameters eps, String format, boolean report)
-    throws IOException {
+    public void runExtraction(
+            DocumentSource in,
+            ExtractionParameters eps,
+            String format,
+            boolean report, boolean annotate
+    ) throws IOException {
         if (in == null) return;
-        if (!initRdfWriter(format, report)) return;
+        if (!initRdfWriter(format, report, annotate)) return;
         final ExtractionReport er;
         try {
             er = runner.extract(eps, in, rdfWriter);
@@ -221,7 +227,7 @@ class WebResponder {
         ps.println("<data>");
         ps.println("<![CDATA[");
         try {
-            ps.write(byteOutStream.toByteArray());
+            ps.write(data);
         } catch (IOException ioe) {
             ps.println("An error occurred while serializing data.");
             ioe.printStackTrace(ps);
@@ -252,19 +258,19 @@ class WebResponder {
         }
     }
 
-    private boolean initRdfWriter(String format, boolean report) throws IOException {
-        FormatWriter fw = getFormatWriter(format);
+    private boolean initRdfWriter(String format, boolean report, boolean annotate) throws IOException {
+        final FormatWriter fw = getFormatWriter(format, annotate);
         if (fw == null) {
             sendError(
                     400,
-                    "Invalid format '" + format + "', try one of rdfxml, turtle, ntriples, nquads",
+                    "Invalid format '" + format + "', try one of: [rdfxml, turtle, ntriples, nquads, trix, json]",
                     null,
                     null,
                     report
             );
             return false;
         }
-        outputMediaType = fw.getMIMEType();
+        outputMediaType = WriterRegistry.getMimeType( fw.getClass() );
         List<TripleHandler> tripleHandlers = new ArrayList<TripleHandler>();
         tripleHandlers.add(new IgnoreAccidentalRDFa(fw));
         tripleHandlers.add(new CountingTripleHandler());
@@ -274,26 +280,28 @@ class WebResponder {
         return true;
     }
 
-    private FormatWriter getFormatWriter(String format) throws IOException {
+    private FormatWriter getFormatWriter(String format, boolean annotate) throws IOException {
+        final String finalFormat;
         if ("rdf".equals(format) || "xml".equals(format) || "rdfxml".equals(format)) {
-            return new RDFXMLWriter(byteOutStream);
-        }
-        if ("turtle".equals(format) || "ttl".equals(format)) {
-            return new TurtleWriter(byteOutStream);
-        }
-        if ("n3".equals(format)) {
-            return new TurtleWriter(byteOutStream, true);
-        }
-        if ("n-triples".equals(format) || "ntriples".equals(format) || "nt".equals(format)) {
-            return new NTriplesWriter(byteOutStream);
-        }
-        if("nquads".equals(format) || "n-quads".equals(format) || "nq".equals(format)) {
-            return new NQuadsWriter(byteOutStream);
-        }
-        if("json".equals(format)) {
-            return new JSONWriter(byteOutStream);
+            finalFormat = "rdfxml";
+        } else if ("turtle".equals(format) || "ttl".equals(format)) {
+            finalFormat = "turtle";
+        } else if ("n3".equals(format)) {
+            finalFormat = "turtle";
+        } else if ("n-triples".equals(format) || "ntriples".equals(format) || "nt".equals(format)) {
+            finalFormat = "ntriples";
+        } else if("nquads".equals(format) || "n-quads".equals(format) || "nq".equals(format)) {
+            finalFormat = "nquads";
+        } else if("trix".equals(format)) {
+            finalFormat = "trix";
+        } else if("json".equals(format)) {
+            finalFormat = "json";
+        } else {
+            return null;
         }
-        return null;
+        final FormatWriter writer = writerRegistry.getWriterInstanceByIdentifier(finalFormat, byteOutStream);
+        writer.setAnnotated(annotate);
+        return writer;
     }
 
 }

Modified: incubator/any23/trunk/any23-service/src/main/webapp/resources/form.html
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/any23-service/src/main/webapp/resources/form.html?rev=1229627&r1=1229626&r2=1229627&view=diff
==============================================================================
--- incubator/any23/trunk/any23-service/src/main/webapp/resources/form.html (original)
+++ incubator/any23/trunk/any23-service/src/main/webapp/resources/form.html Tue Jan 10 16:32:28 2012
@@ -73,7 +73,7 @@ function init() {
    </div>
     <h1>Any23 - Anything To Triples - Live Service Demo</h1>
     <p>Parses Microformats, RDFa, Microdata, RDF/XML, Turtle, N-Triples and NQuads.</p>
-    <p>Download and install Any23: visit the <a href="http://code.google.com/p/any23/">Developers Site</a> and the <a href="http://developers.any23.org/">Documentation</a>.
+    <p>Download and install Any23: visit the <a href="http://code.google.com/p/any23/" target="_blank">Developers Site</a> and the <a href="http://developers.any23.org/" target="_blank">Documentation</a>.
     <hr />
     <h2>Convert document at URI</h2>
     <form method="get" action="any23/">
@@ -83,13 +83,23 @@ function init() {
           <option value="best" selected="selected">best</option>
           <option value="turtle">turtle</option>
           <option value="ntriples">ntriples</option>
-          <option value="nquads">nquads</option>
           <option value="rdfxml">rdfxml</option>
+          <option value="nquads">nquads</option>
+          <option value="trix">trix</option>
           <option value="json">json</option>
         </select>/<input type="text" size="50" name="uri" value="http://twitter.com/cygri" />
       </code>
-      <input type="checkbox" name="fix">Fix<a class="sPopup" href="javascript:" rel="sPopup-fix">[?]</a></input>
-      <input type="checkbox" name="report">Report<a class="sPopup" href="javascript:" rel="sPopup-report">[?]</a></input>
+      Validation:
+      <select id="url-validation" name="validation-mode" onclick="if(document.getElementById('url-validation').value == 'validate') { document.getElementById('url-report').checked = true; }">
+          <option value="none">none</option>
+          <option value="validate">validate</option>
+          <option value="validate-fix">validate+fix</option>
+      </select>
+      <a class="sPopup" href="javascript:" rel="sPopup-fix">[?]</a></input>
+      Report:<input id="url-report" type="checkbox" name="report">
+      <a class="sPopup" href="javascript:" rel="sPopup-report">[?]</a></input>
+      Annotate:<input id="url-annotate" type="checkbox" name="annotate">
+      <a class="sPopup" href="javascript:" rel="sPopup-annotate">[?]</a></input>
       <input type="submit" value="Convert" />
     </form>
     <hr />
@@ -104,6 +114,7 @@ function init() {
           <option value="text/turtle">Turtle (text/turtle)</option>
           <option value="text/nt">N-Triples (text/nt)</option>
           <option value="text/nq">N-Quads (text/nq)</option>
+          <option value="application/trix">TriX (application/trix)</option>
           <option value="application/rdf+xml">RDF/XML (application/rdf+xml)</option>
           <option value="text/csv">CSV (text/csv)</option>
         </select>
@@ -112,12 +123,22 @@ function init() {
           <option value="best" selected="selected">best (content-negotiated)</option>
           <option value="turtle">turtle</option>
           <option value="ntriples">ntriples</option>
-          <option value="nquads">nquads</option>
           <option value="rdfxml">rdfxml</option>
+          <option value="nquads">nquads</option>
+          <option value="trix">trix</option>
           <option value="json">json</option>
         </select>
-        <input type="checkbox" name="fix">Fix<a class="sPopup" href="javascript:" rel="sPopup-fix">[?]</a></input>
-        <input type="checkbox" name="report">Report<a class="sPopup" href="javascript:" rel="sPopup-report">[?]</a></input>
+          Validation:
+        <select id="direct-validation" name="validation-mode" onclick="if(document.getElementById('direct-validation').value == 'validate') { document.getElementById('direct-report').checked = true; }">
+            <option value="none">none</option>
+            <option value="validate">validate</option>
+            <option value="validate-fix">validate+fix</option>
+        </select>
+        <a class="sPopup" href="javascript:" rel="sPopup-fix">[?]</a></input>
+        Report:<input id="direct-report" type="checkbox" name="report">
+        <a class="sPopup" href="javascript:" rel="sPopup-report">[?]</a></input>
+        Annotate:<input id="direct-annotate" type="checkbox" name="annotate">
+        <a class="sPopup" href="javascript:" rel="sPopup-annotate">[?]</a></input>
         <input type="submit" value="Convert" /><br />
         <textarea name="body" rows="12" cols="80">@prefix foaf: &lt;http://xmlns.com/foaf/0.1/&gt; .
       
@@ -152,8 +173,9 @@ function init() {
       query parameters:
     </p>
     <table rules="all">
-      <tr><th>uri</th><td>URI of an input document</td></tr>
-      <tr><th>format</th><td>Desired output format, defaults to <code>best</code></td></tr>
+      <tr><th>uri</th><td>URI of an input document.</td></tr>
+      <tr><th>format</th><td>Desired output format, defaults to <code>best</code>.</td></tr>
+      <tr><th>annotate</th><td>If specified the output RDF will contain extractor specific scope comments.</td></tr>
     </table>
     <p>The response is the input document converted to the desired output format.</p>
 
@@ -172,8 +194,9 @@ function init() {
       <tr><th>HTML</th><td><code>text/html</code></td></tr>
       <tr><th>RDF/XML</th><td><code>application/rdf+xml</code></td></tr>
       <tr><th>Turtle</th><td><code>text/turtle</code></td></tr>
-      <tr><th>N-Triples</th><td><code>text/plain</code></td></tr>
-      <tr><th>N-Quads</th><td><code>text/plain</code></td></tr>
+      <tr><th>N-Triples</th><td><code>text/nt</code></td></tr>
+      <tr><th>N-Quads</th><td><code>text/nq</code></td></tr>
+      <tr><th>TriX</th><td><code>application/trix</code></td></tr>
     </table>
     <p>Example POST request:</p>
     <pre>POST <span id="app-path">/</span>rdfxml HTTP/1.0
@@ -197,8 +220,9 @@ Content-Length: 174
     </p>
     <table rules="all">
       <tr><th>type</th><td>Media type of the input, see the table above. If not present, auto-detection will be attempted.</td></tr>
-      <tr><th>body</th><td>Document body to be converted</td></tr>
-      <tr><th>format</th><td>Desired output format; defaults to <code>best</code></td></tr>
+      <tr><th>body</th><td>Document body to be converted.</td></tr>
+      <tr><th>format</th><td>Desired output format; defaults to <code>best</code>.</td></tr>
+       <tr><th>annotate</th><td>If specified the output RDF will contain extractor specific scope comments.</td></tr>
     </table>
 
     <h3>Output formats</h3>
@@ -206,14 +230,16 @@ Content-Length: 174
     <ul>
       <li><code>best</code> for content negotiation according to the client's <code>Accept</code> HTTP header</li>
       <li><code>turtle</code>, <code>ttl</code>, <code>n3</code> for
-        <a href="http://www.w3.org/TeamSubmission/turtle/">Turtle</a>/<a href="http://www.w3.org/DesignIssues/Notation3">N3</a></li>
+        <a href="http://www.w3.org/TeamSubmission/turtle/" target="_blank">Turtle</a>/<a href="http://www.w3.org/DesignIssues/Notation3" target="_blank">N3</a></li>
       <li><code>ntriples</code>, <code>nt</code> for
-        <a href="http://www.w3.org/TR/rdf-testcases/#ntriples">N-Triples</a></li>
+        <a href="http://www.w3.org/TR/rdf-testcases/#ntriples" target="_blank">N-Triples</a></li>
       <li><code>nquads</code>, <code>nq</code> for
-        <a href="http://sw.deri.org/2008/07/n-quads/">N-Quads</a></li>
+        <a href="http://sw.deri.org/2008/07/n-quads/" target="_blank">N-Quads</a></li>
+      <li><code>trix</code> for
+        <a href="http://www.w3.org/2004/03/trix/" target="_blank">TriX</a></li>
       <li><code>rdfxml</code>, <code>rdf</code>, <code>xml</code> for
-        <a href="http://www.w3.org/TR/rdf-syntax-grammar/">RDF/XML</a></li>
-      <li><code>json</code> for <a href="http://json.org/">JSON</a></li>
+        <a href="http://www.w3.org/TR/rdf-syntax-grammar/" target="_blank">RDF/XML</a></li>
+      <li><code>json</code> for <a href="http://json.org/" target="_blank">JSON</a></li>
     </ul>
 
     <h3>Error reporting</h3>
@@ -236,20 +262,42 @@ Content-Length: 174
     </table>
     <h3>Report Format</h3>
     <p>The XML report format is subjected to changes. The current content is described in section
-       <a href="http://developers.any23.org/service.html">Any23 Service</a>.
+       <a href="http://developers.any23.org/service.html" target="_blank">Any23 Service</a>.
     </p>
     <hr />
     <p><b>Any23 v.@any23.version@</b></p>
-    <p><a href="http://code.google.com/p/any23/">Any23 project homepage</a> | Hosted at <a href="http://deri.ie/">DERI, NUI Galway</a></p>
+    <p><a href="http://code.google.com/p/any23/" target="_blank">Any23 project homepage</a> | Hosted at <a href="http://deri.ie/" target="_blank">DERI, NUI Galway</a></p>
     <div id="sPopup-fix">
-	    <p>If the <i>Fix</i> checkbox is activated, <b>Any23</b> tries to fix some
-            <a href="http://rdfa.info/wiki/Common-publishing-mistakes">common RDFa mistakes</a> before performing the extraction.
-            Fixing is performed according a set of fully customizable rules. Please refer to the <a href="http://developers.any23.org/dev-validation-fix.html">developer guide</a> for any further detail.
+	    <p> 
+            The <b>Any23</b> service tries to fix some <a href="http://rdfa.info/wiki/Common-publishing-mistakes" target="_blank">common issues</a>
+            before performing a metadata extraction. The fixing is performed according a set of fully customizable rules. 
+	        <br/>
+            The following <i>Validation</i> options are available.
+            <b>none</b>: no validation will be performed. 
+            <b>validate</b>: common errors within the document will be detected and reported. When selected this option the
+                <b>Report</b> flag will be activated to visualize the validation outcome.
+            <br/>
+            <b>validate+fix</b>: the common issues will be detected and a fix will be applied when available.
+            <br/>
+            Please refer to the <a href="http://developers.any23.org/dev-validation-fix.html" target="_blank">Developer Guide</a>
+            for any further detail.
         </p>
     </div>
     <div id="sPopup-report">
-	    <p>If the <i>Report</i> and <i>Fix</i> checkboxes are both activated, <b>Any23</b> wraps the RDF extracted triples with an XML response providing a short report
-            on which RDFa errors have been detected and which rules have been triggered to perform the fix. Please refer to the <a href="http://developers.any23.org/dev-validation-fix.html">developer guide</a> for any further detail.
+	    <p>
+        If the <i>Report</i> checkbox is selected, the <b>Any23</b> service returns an <b>XML</b> output containing,
+        other then the <b>extracted RDF statements</b>, other information as the list of the <b>activated extractors</b>
+        and the <b>detected errors</b>.
+        <br/>
+        If the <b>validation</b> or <b> validation and fix</b> has been activated then the report contains also a list
+        of the applied fixes</a>.
+        </p>
+    </div>
+    <div id="sPopup-annotate">
+	    <p>
+        If the <i>Annotate</i> checkbox is selected, the <b>Any23</b> service returns an output expressed in the
+        selected <b>RDF</b> format containing also specific format <b>comments</b> describing the activated extractor
+        scopes for every produced statement.
         </p>
     </div>
   </body>

Modified: incubator/any23/trunk/any23-service/src/test/java/org/deri/any23/servlet/ServletTest.java
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/any23-service/src/test/java/org/deri/any23/servlet/ServletTest.java?rev=1229627&r1=1229626&r2=1229627&view=diff
==============================================================================
--- incubator/any23/trunk/any23-service/src/test/java/org/deri/any23/servlet/ServletTest.java (original)
+++ incubator/any23/trunk/any23-service/src/test/java/org/deri/any23/servlet/ServletTest.java Tue Jan 10 16:32:28 2012
@@ -21,8 +21,10 @@ import org.deri.any23.http.HTTPClient;
 import org.deri.any23.source.DocumentSource;
 import org.deri.any23.source.FileDocumentSource;
 import org.deri.any23.source.StringDocumentSource;
+import org.deri.any23.util.StringUtils;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Test;
 import org.mortbay.jetty.testing.HttpTester;
 import org.mortbay.jetty.testing.ServletTester;
@@ -35,6 +37,8 @@ import java.net.URLEncoder;
 /**
  * Test case for {@link org.deri.any23.servlet.Servlet} class.
  */
+// TODO: some test verifications are not strict enough.
+//       The assertContainsTag() doesn't verify the entire output content.
 public class ServletTest {
 
     private static String content;
@@ -223,7 +227,8 @@ public class ServletTest {
      * This test has been disabled in order to avoid external resources dependencies
      * @throws Exception
      */
-    // Deactivated online test @Test
+    @Test
+    @Ignore
     public void testGETwithURLEncoding() throws Exception {
         content = null;
         HttpTester response = doGetRequest("/best/http://semanticweb.org/wiki/Knud_M%C3%B6ller");
@@ -234,7 +239,8 @@ public class ServletTest {
      * This test has been disabled in order to avoid external resources dependencies
      * @throws Exception
      */
-    // Deactivated online test @Test
+    @Test
+    @Ignore
     public void testGETwithURLEncodingWithQuery() throws Exception {
         content = null;
         HttpTester response = doGetRequest("/best/http://semanticweb.org/wiki/Knud_M%C3%B6ller?appo=xxx");
@@ -245,7 +251,8 @@ public class ServletTest {
      * This test has been disabled in order to avoid external resources dependencies
      * @throws Exception
      */
-    // Deactivated online test @Test
+    @Test
+    @Ignore
     public void testGETwithURLEncodingWithFragment() throws Exception {
         content = null;
         HttpTester response = doGetRequest("/best/http://semanticweb.org/wiki/Knud_M%C3%B6ller#abcde");
@@ -399,6 +406,17 @@ public class ServletTest {
         assertContains(EXPECTED_JSON, response.getContent());
     }
 
+    @Test
+    public void testTriXResponseFormat() throws Exception {
+        String body = "<http://sub/1> <http://pred/1> \"123\"^^<http://datatype> <http://graph/1>.";
+        HttpTester response = doPostRequest("/trix", body, "text/n-quads");
+        Assert.assertEquals(200, response.getStatus());
+        final String content = response.getContent();
+        assertContainsTag("graph", content, 1);
+        assertContainsTag("uri", content, 3);
+        assertContainsTag("triple", content, 1);
+    }
+
     private HttpTester doGetRequest(String path) throws Exception {
         return doRequest(path, "GET");
     }
@@ -436,14 +454,27 @@ public class ServletTest {
     }
 
     private void assertContains(String expected, String container) {
-        if (container.contains(expected))
-            return;
+        if(expected.length() == 0)
+            throw new IllegalArgumentException("expected string must contains at lease one char.");
+        if (container.contains(expected)) return;
         Assert.fail("expected '" + expected + "' to be contained in '" + container + "'");
     }
 
+    private void assertContainsTag(String tag, String container, int occurrences) {
+        Assert.assertEquals(
+                String.format("Cannot find open tag %s %d times", tag, occurrences),
+                occurrences,
+                StringUtils.countOccurrences(container, "<" + tag + ">")
+        );
+        Assert.assertEquals(
+                String.format("Cannot find close tag %s %d times", tag, occurrences),
+                occurrences,
+                StringUtils.countOccurrences(container, "</" + tag + ">")
+        );
+    }
+
     private void assertContainsTag(String tag, String container) {
-        assertContains("<" + tag + ">", container);
-        assertContains("</" + tag + ">", container);
+        assertContainsTag(tag, container, 1);
     }
 
     /**

Added: incubator/any23/trunk/lib/README.txt
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/lib/README.txt?rev=1229627&view=auto
==============================================================================
--- incubator/any23/trunk/lib/README.txt (added)
+++ incubator/any23/trunk/lib/README.txt Tue Jan 10 16:32:28 2012
@@ -0,0 +1,13 @@
+
+===================
+Any23 External Repo
+===================
+
+This directory contains the libraries used by the Any23 modules that
+are not available within public Maven repositories.
+
+The install-deps.sh executes the installation of these libraries within
+the Any23 Google Code External Repo [1], which is the Any23 repository
+dedicated to host the external libraries.
+
+[1] https://any23.googlecode.com/svn/repo-ext/
\ No newline at end of file

Modified: incubator/any23/trunk/lib/install-deps.sh
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/lib/install-deps.sh?rev=1229627&r1=1229626&r2=1229627&view=diff
==============================================================================
--- incubator/any23/trunk/lib/install-deps.sh (original)
+++ incubator/any23/trunk/lib/install-deps.sh Tue Jan 10 16:32:28 2012
@@ -1,13 +1,45 @@
 #!/bin/bash
 
-mvn -f ../pom.xml      \
-     deploy:deploy-file\
-    -DgroupId=net.xeoh \
-    -DartifactId=jspf  \
-    -Dversion=0.9.0    \
-    -Dpackaging=jar    \
-    -Dfile=./lib/jspf.core-0.9.0.jar \
-    -Durl=svn:https://svn.apache.org/repos/asf/incubator/any23/repo/ \
-|| { echo "Error while installing project dep."; exit 1; }
+CURR_DIR="$(cd "$(dirname "$0")"; pwd -P)"
+
+EXT_REPO=https://svn.apache.org/repos/asf/incubator/any23/repo/
+
+echo Installing Any23 project dependencies in External Repo
+
+echo Installing [crawler4j-2.6.1]
+echo
+echo
+mvn -f $CURR_DIR/../pom.xml \
+     deploy:deploy-file    \
+    -DgroupId=edu.uci.ics  \
+    -DartifactId=crawler4j \
+    -Dversion=2.6.1        \
+    -Dpackaging=jar        \
+    -Dfile=$CURR_DIR/crawler4j-2.6.1.jar \
+    -Durl=svn:$EXT_REPO \
+    -DrepositoryId=any23-repository-external \
+|| { echo "Error while installing project dependency."; exit 1; }
+
+echo
+echo
+echo Installing [dsiutils-2.0.1]
+echo
+echo
+
+mvn -f $CURR_DIR/../pom.xml \
+     deploy:deploy-file     \
+    -DgroupId=it.unimi.dsi  \
+    -DartifactId=dsiutils   \
+    -Dversion=2.0.1         \
+    -Dpackaging=jar         \
+    -Dfile=$CURR_DIR/dsiutils-2.0.1.jar \
+    -Durl=svn:$EXT_REPO \
+    -DrepositoryId=any23-repository-external \
+|| { echo "Error while installing project dependency."; exit 1; }
+
+echo
+echo
+echo Dependencies installation completed successfully.
+echo
 
 exit 0 

Added: incubator/any23/trunk/plugins/README.txt
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/plugins/README.txt?rev=1229627&view=auto
==============================================================================
--- incubator/any23/trunk/plugins/README.txt (added)
+++ incubator/any23/trunk/plugins/README.txt Tue Jan 10 16:32:28 2012
@@ -0,0 +1,34 @@
+=============
+Any23 Plugins
+=============
+
+This is the root dir of the Any23 plugins.
+
+A plugin is an extension of the Any23 core and can be plugged using
+the Plugin Manager capabilities.
+
+Plugins
+=======
+
+basic-crawler
+-------------
+
+A CLI tool which extends the Rover CLI adding crawler specific
+capabilities.
+
+html-scraper
+------------
+
+The HTML scraper is able to convert any HTML page to triples
+containing the text scraped from the page.
+
+office-scraper
+--------------
+
+The Office scraper is able to convert the main MS Office compatible
+formats and convert them to triples.
+
+integration-test
+----------------
+
+This module contains the integration tests for all the defined plugins.
\ No newline at end of file

Added: incubator/any23/trunk/plugins/basic-crawler/pom.xml
URL: http://svn.apache.org/viewvc/incubator/any23/trunk/plugins/basic-crawler/pom.xml?rev=1229627&view=auto
==============================================================================
--- incubator/any23/trunk/plugins/basic-crawler/pom.xml (added)
+++ incubator/any23/trunk/plugins/basic-crawler/pom.xml Tue Jan 10 16:32:28 2012
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Copyright 2008-2010 Digital Enterprise Research Institute (DERI)
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+    author: Michele Mostarda (michele.mostarda@gmail.com)
+    version: $Id$
+ -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+    <parent>
+        <groupId>org.deri.any23</groupId>
+        <artifactId>any23-parent</artifactId>
+        <version>0.6.2-SNAPSHOT</version>
+        <relativePath>../../pom.xml</relativePath>
+    </parent>
+
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.deri.any23.plugin</groupId>
+    <artifactId>basic-crawler</artifactId>
+    <version>1.0.0-SNAPSHOT</version>
+    <packaging>jar</packaging>
+    <name>Basic Crawler Plugin Module</name>
+    <description>Any23 plugin for crawling sites.</description>
+    <url>http://developers.any23.org</url>
+
+    <dependencies>
+        <!-- Sesame. -->
+        <dependency>
+            <groupId>org.openrdf.sesame</groupId>
+            <artifactId>sesame-model</artifactId>
+            <scope>provided</scope>
+        </dependency>
+
+        <!-- Any23 Core. -->
+        <dependency>
+            <groupId>org.deri.any23</groupId>
+            <artifactId>any23-core</artifactId>
+            <version>0.6.2-SNAPSHOT</version>
+            <scope>provided</scope>
+        </dependency>
+
+        <!-- BEGIN: Crawler4j -->
+        <dependency>
+            <groupId>edu.uci.ics</groupId>
+            <artifactId>crawler4j</artifactId>
+            <version>2.6.1</version>
+        </dependency>
+        <dependency>
+            <groupId>com.sleepycat</groupId>
+            <artifactId>je</artifactId>
+            <version>4.0.92</version>
+        </dependency>
+        <dependency>
+            <groupId>it.unimi.dsi</groupId>
+            <artifactId>fastutil</artifactId>
+            <version>6.4.1</version>
+        </dependency>
+        <dependency>
+            <groupId>it.unimi.dsi</groupId>
+            <artifactId>dsiutils</artifactId>
+            <version>2.0.1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpclient</artifactId>
+            <version>4.1</version>
+        </dependency>
+        <!--TODO: resolve dependency conflict.-->
+        <dependency>
+            <groupId>commons-codec</groupId>
+            <artifactId>commons-codec</artifactId>
+            <version>1.4</version>
+        </dependency>
+        <!-- END: Crawler4j -->
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <configuration>
+                    <!-- Solve concurrency issues in Crawler4j internal status. -->
+                    <forkMode>always</forkMode>
+                </configuration>
+            </plugin>
+            <!-- Generates a self-contained JAR. -->
+            <plugin>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <configuration>
+                    <finalName>${project.artifactId}-${project.version}-cli-plugin</finalName>
+                    <appendAssemblyId>false</appendAssemblyId>
+                    <descriptorRefs>
+                        <descriptorRef>jar-with-dependencies</descriptorRef>
+                    </descriptorRefs>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+</project>
+



Mime
View raw message