Return-Path: X-Original-To: apmail-incubator-any23-commits-archive@minotaur.apache.org Delivered-To: apmail-incubator-any23-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id C4B509589 for ; Sat, 4 Feb 2012 18:09:17 +0000 (UTC) Received: (qmail 50284 invoked by uid 500); 4 Feb 2012 18:09:17 -0000 Delivered-To: apmail-incubator-any23-commits-archive@incubator.apache.org Received: (qmail 50248 invoked by uid 500); 4 Feb 2012 18:09:17 -0000 Mailing-List: contact any23-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: any23-dev@incubator.apache.org Delivered-To: mailing list any23-commits@incubator.apache.org Received: (qmail 50241 invoked by uid 99); 4 Feb 2012 18:09:17 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 04 Feb 2012 18:09:17 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 04 Feb 2012 18:09:15 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 699A2238890D; Sat, 4 Feb 2012 18:08:55 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1240571 - in /incubator/any23/trunk/core/src: main/java/org/apache/any23/extractor/rdfa/ test/java/org/apache/any23/ test/java/org/apache/any23/extractor/rdfa/ test/resources/html/rdfa/ Date: Sat, 04 Feb 2012 18:08:55 -0000 To: any23-commits@incubator.apache.org From: mostarda@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120204180855.699A2238890D@eris.apache.org> Author: mostarda Date: Sat Feb 4 18:08:54 2012 New Revision: 1240571 URL: http://svn.apache.org/viewvc?rev=1240571&view=rev Log: Fixed relative URIs management in RDFa11Parser. Added strictest controls in RDFa11ExtractorTest (added verification of non blocking issues). Fixed test files, added default namespace to be compliant to RDFa 1.1 specification. This commit is related to issue #ANY23-42. Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Parser.java incubator/any23/trunk/core/src/test/java/org/apache/any23/Any23Test.java incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java incubator/any23/trunk/core/src/test/resources/html/rdfa/opengraph-structured-properties.html incubator/any23/trunk/core/src/test/resources/html/rdfa/rel-href.html incubator/any23/trunk/core/src/test/resources/html/rdfa/rel-rev.html incubator/any23/trunk/core/src/test/resources/html/rdfa/vocab.html Modified: incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Parser.java URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Parser.java?rev=1240571&r1=1240570&r2=1240571&view=diff ============================================================================== --- incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Parser.java (original) +++ incubator/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Parser.java Sat Feb 4 18:08:54 2012 @@ -345,7 +345,12 @@ public class RDFa11Parser { * @return */ protected URI resolveURI(String uriStr) { - return RDFUtils.uri(uriStr); + return + isAbsoluteURI(uriStr) + ? + RDFUtils.uri(uriStr) + : + RDFUtils.uri( this.documentBase.toExternalForm(), uriStr ); } /** @@ -505,7 +510,7 @@ public class RDFa11Parser { * @param extractionResult */ private void writeTriple(Resource s, URI p, Value o, ExtractionResult extractionResult) { - if(logger.isTraceEnabled()) logger.trace(String.format("writeTriple(%s %s %s)" , s, p, o)); + // if(logger.isTraceEnabled()) logger.trace(String.format("writeTriple(%s %s %s)" , s, p, o)); assert s != null : "subject is null."; assert p != null : "predicate is null."; assert o != null : "object is null."; @@ -524,7 +529,7 @@ public class RDFa11Parser { */ // TODO: add references to the RDFa 1.1 algorithm. private void processNode(Node currentElement, ExtractionResult extractionResult) throws Exception { - if(logger.isTraceEnabled()) logger.trace("processNode(" + DomUtils.getXPathForNode(currentElement) + ")"); + // if(logger.isTraceEnabled()) logger.trace("processNode(" + DomUtils.getXPathForNode(currentElement) + ")"); final EvaluationContext currentEvaluationContext = getContext(); try { if( @@ -883,7 +888,7 @@ public class RDFa11Parser { } private void pushMappings(Node sourceNode, List prefixMapList) { - logger.trace("pushMappings()"); + // logger.trace("pushMappings()"); final Map mapping = new HashMap(); for (PrefixMap prefixMap : prefixMapList) { @@ -896,7 +901,7 @@ public class RDFa11Parser { if(uriMappingStack.isEmpty()) return; final URIMapping peek = uriMappingStack.peek(); if( ! DomUtils.isAncestorOf(peek.sourceNode, node) ) { - logger.trace("popMappings()"); + // logger.trace("popMappings()"); uriMappingStack.pop(); } } Modified: incubator/any23/trunk/core/src/test/java/org/apache/any23/Any23Test.java URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/test/java/org/apache/any23/Any23Test.java?rev=1240571&r1=1240570&r2=1240571&view=diff ============================================================================== --- incubator/any23/trunk/core/src/test/java/org/apache/any23/Any23Test.java (original) +++ incubator/any23/trunk/core/src/test/java/org/apache/any23/Any23Test.java Sat Feb 4 18:08:54 2012 @@ -287,7 +287,7 @@ public class Any23Test extends Any23Onli final String bufferContent = byteArrayOutputStream.toString(); logger.debug(bufferContent); - Assert.assertSame("Unexpected number of triples.", 42, StringUtils.countNL(bufferContent)); + Assert.assertSame("Unexpected number of triples.", 65, StringUtils.countNL(bufferContent)); } @@ -321,8 +321,8 @@ public class Any23Test extends Any23Onli } @Test - public void testExtractionParameters() throws IOException, ExtractionException { - final int EXPECTED_TRIPLES = 3; + public void testExtractionParameters() throws IOException, ExtractionException, TripleHandlerException { + final int EXPECTED_TRIPLES = 6; Any23 runner = new Any23(); DocumentSource source = new FileDocumentSource( new File("src/test/resources/org/apache/any23/validator/missing-og-namespace.html"), @@ -336,15 +336,19 @@ public class Any23Test extends Any23Onli CompositeTripleHandler compositeTH1 = new CompositeTripleHandler(); compositeTH1.addChild(cth1); compositeTH1.addChild(ctw1); - runner.extract( - new ExtractionParameters( - DefaultConfiguration.singleton(), - ValidationMode.None - ), - source, - compositeTH1 - ); - logger.debug(baos.toString()); + try { + runner.extract( + new ExtractionParameters( + DefaultConfiguration.singleton(), + ValidationMode.None + ), + source, + compositeTH1 + ); + } finally { + compositeTH1.close(); + } + logger.info(baos.toString()); Assert.assertEquals("Unexpected number of triples.", EXPECTED_TRIPLES, cth1.getCount() ); baos.reset(); @@ -366,8 +370,9 @@ public class Any23Test extends Any23Onli } @Test - public void testExtractionParametersWithNestingDisabled() throws IOException, ExtractionException, TripleHandlerException { - final int EXPECTED_TRIPLES = 20; + public void testExtractionParametersWithNestingDisabled() + throws IOException, ExtractionException, TripleHandlerException { + final int EXPECTED_TRIPLES = 21; Any23 runner = new Any23(); DocumentSource source = new FileDocumentSource( new File("src/test/resources/microformats/nested-microformats-a1.html"), Modified: incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java?rev=1240571&r1=1240570&r2=1240571&view=diff ============================================================================== --- incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java (original) +++ incubator/any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java Sat Feb 4 18:08:54 2012 @@ -48,7 +48,7 @@ public class RDFa11ExtractorTest extends */ @Test public void testObjectResourceConversion() throws RepositoryException { - assertExtracts("html/rdfa/object-resource-test.html"); + extractsAndCheckNoIssues("html/rdfa/object-resource-test.html"); logger.debug(dumpModelToTurtle()); assertContains( null, @@ -67,7 +67,7 @@ public class RDFa11ExtractorTest extends */ @Test public void testExplicitDatatypeDeclaration() throws RepositoryException { - assertExtracts("html/rdfa/xmlliteral-datatype-test.html"); + extractsAndCheckNoIssues("html/rdfa/xmlliteral-datatype-test.html"); logger.debug(dumpModelToTurtle()); Literal literal = RDFUtils.literal( @@ -88,7 +88,7 @@ public class RDFa11ExtractorTest extends */ @Test public void testRelWithHref() throws RepositoryException { - assertExtracts("html/rdfa/rel-href.html"); + extractsAndCheckNoIssues("html/rdfa/rel-href.html"); logger.debug(dumpModelToTurtle()); assertContains( @@ -110,7 +110,7 @@ public class RDFa11ExtractorTest extends */ @Test public void testRelRevSupport() throws RepositoryException { - assertExtracts("html/rdfa/rel-rev.html"); + extractsAndCheckNoIssues("html/rdfa/rel-rev.html"); logger.debug(dumpModelToTurtle()); assertContains( @@ -132,7 +132,7 @@ public class RDFa11ExtractorTest extends */ @Test public void testVocabSupport() throws RepositoryException { - assertExtracts("html/rdfa/vocab.html"); + extractsAndCheckNoIssues("html/rdfa/vocab.html"); logger.debug(dumpModelToTurtle()); assertContains( @@ -170,7 +170,7 @@ public class RDFa11ExtractorTest extends throws RepositoryException, RDFHandlerException, IOException, RDFParseException { final int EXPECTED_STATEMENTS = 33; - assertExtracts("html/rdfa/goodrelations-rdfa10.html"); + extractsAndCheckNoIssues("html/rdfa/goodrelations-rdfa10.html"); logger.debug(dumpModelToNQuads()); Assert.assertEquals(EXPECTED_STATEMENTS, dumpAsListOfStatements().size()); @@ -191,7 +191,7 @@ public class RDFa11ExtractorTest extends throws RepositoryException, RDFHandlerException, IOException, RDFParseException { final int EXPECTED_STATEMENTS = 33; - assertExtracts("html/rdfa/goodrelations-rdfa11.html"); + extractsAndCheckNoIssues("html/rdfa/goodrelations-rdfa11.html"); logger.debug(dumpHumanReadableTriples()); Assert.assertEquals(EXPECTED_STATEMENTS, dumpAsListOfStatements().size()); @@ -208,7 +208,7 @@ public class RDFa11ExtractorTest extends */ @Test public void testOpenGraphStructuredProperties() throws IOException, ExtractionException, RepositoryException { - assertExtracts("html/rdfa/opengraph-structured-properties.html"); + extractsAndCheckNoIssues("html/rdfa/opengraph-structured-properties.html"); logger.info( dumpHumanReadableTriples() ); Assert.assertEquals(8, getStatementsSize(null, null, null) ); @@ -228,10 +228,15 @@ public class RDFa11ExtractorTest extends assertContains(baseURI, vOGP.siteName, RDFUtils.literal("IMDb") ); assertContains(baseURI, vOGP.video, RDFUtils.literal("http://example.com/bond/trailer.swf") ); } - + @Override protected ExtractorFactory getExtractorFactory() { return RDFa11Extractor.factory; } + + private void extractsAndCheckNoIssues(String resource) { + assertExtracts(resource); + assertNoIssues(); + } } Modified: incubator/any23/trunk/core/src/test/resources/html/rdfa/opengraph-structured-properties.html URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/test/resources/html/rdfa/opengraph-structured-properties.html?rev=1240571&r1=1240570&r2=1240571&view=diff ============================================================================== --- incubator/any23/trunk/core/src/test/resources/html/rdfa/opengraph-structured-properties.html (original) +++ incubator/any23/trunk/core/src/test/resources/html/rdfa/opengraph-structured-properties.html Sat Feb 4 18:08:54 2012 @@ -1,4 +1,4 @@ - + +
Modified: incubator/any23/trunk/core/src/test/resources/html/rdfa/rel-rev.html URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/test/resources/html/rdfa/rel-rev.html?rev=1240571&r1=1240570&r2=1240571&view=diff ============================================================================== --- incubator/any23/trunk/core/src/test/resources/html/rdfa/rel-rev.html (original) +++ incubator/any23/trunk/core/src/test/resources/html/rdfa/rel-rev.html Sat Feb 4 18:08:54 2012 @@ -1,4 +1,4 @@ - + As Gandalf the White said in Modified: incubator/any23/trunk/core/src/test/resources/html/rdfa/vocab.html URL: http://svn.apache.org/viewvc/incubator/any23/trunk/core/src/test/resources/html/rdfa/vocab.html?rev=1240571&r1=1240570&r2=1240571&view=diff ============================================================================== --- incubator/any23/trunk/core/src/test/resources/html/rdfa/vocab.html (original) +++ incubator/any23/trunk/core/src/test/resources/html/rdfa/vocab.html Sat Feb 4 18:08:54 2012 @@ -1,4 +1,4 @@ - +