Return-Path: X-Original-To: apmail-incubator-connectors-commits-archive@minotaur.apache.org Delivered-To: apmail-incubator-connectors-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 42575762F for ; Fri, 14 Oct 2011 15:56:18 +0000 (UTC) Received: (qmail 40892 invoked by uid 500); 14 Oct 2011 15:56:18 -0000 Delivered-To: apmail-incubator-connectors-commits-archive@incubator.apache.org Received: (qmail 40857 invoked by uid 500); 14 Oct 2011 15:56:17 -0000 Mailing-List: contact connectors-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: connectors-dev@incubator.apache.org Delivered-To: mailing list connectors-commits@incubator.apache.org Received: (qmail 40850 invoked by uid 99); 14 Oct 2011 15:56:17 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 14 Oct 2011 15:56:17 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 14 Oct 2011 15:56:14 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id CB2AD23889D7; Fri, 14 Oct 2011 15:55:52 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1183398 - in /incubator/lcf/trunk: CHANGES.txt connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java tests/wiki/src/test/java/org/apache/manifoldcf/wiki_tests/MockWikiService.java Date: Fri, 14 Oct 2011 15:55:52 -0000 To: connectors-commits@incubator.apache.org From: kwright@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20111014155552.CB2AD23889D7@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: kwright Date: Fri Oct 14 15:55:52 2011 New Revision: 1183398 URL: http://svn.apache.org/viewvc?rev=1183398&view=rev Log: Fix for CONNECTORS-273. Added last-modified metadata to the Wiki connector. Modified: incubator/lcf/trunk/CHANGES.txt incubator/lcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java incubator/lcf/trunk/tests/wiki/src/test/java/org/apache/manifoldcf/wiki_tests/MockWikiService.java Modified: incubator/lcf/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/CHANGES.txt?rev=1183398&r1=1183397&r2=1183398&view=diff ============================================================================== --- incubator/lcf/trunk/CHANGES.txt (original) +++ incubator/lcf/trunk/CHANGES.txt Fri Oct 14 15:55:52 2011 @@ -3,6 +3,10 @@ $Id$ ======================= 0.4-dev ===================== +CONNECTORS-273: Add last-modified metadata to indexing for the +Wiki connector. +(Tobias Wunderlich, Karl Wright) + CONNECTORS-274: Fix long-standing problem with XML parsing, which affected the wiki connector in a big way. (Tobias Wunderlich, Karl Wright) Modified: incubator/lcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java?rev=1183398&r1=1183397&r2=1183398&view=diff ============================================================================== --- incubator/lcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java (original) +++ incubator/lcf/trunk/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java Fri Oct 14 15:55:52 2011 @@ -2054,6 +2054,7 @@ public class WikiConnector extends org.a String author = t.getAuthor(); String comment = t.getComment(); String title = t.getTitle(); + String lastModified = t.getLastModified(); RepositoryDocument rd = new RepositoryDocument(); dataSize = contentFile.length(); @@ -2067,6 +2068,8 @@ public class WikiConnector extends org.a rd.addField("author",author); if (title != null) rd.addField("title",title); + if (lastModified != null) + rd.addField("last-modified",lastModified); activities.ingestDocument(documentIdentifier,documentVersion,fullURL,rd); } finally @@ -2157,7 +2160,7 @@ public class WikiConnector extends org.a } } - /** Thread to execute a "get timestamp" operation. This thread both executes the operation and parses the result. */ + /** Thread to execute a "get doc info" operation. This thread both executes the operation and parses the result. */ protected static class ExecuteGetDocInfoThread extends Thread { protected HttpClient client; @@ -2168,6 +2171,7 @@ public class WikiConnector extends org.a protected String author = null; protected String title = null; protected String comment = null; + protected String lastModified = null; protected String statusCode = null; protected String errorMessage = null; @@ -2221,6 +2225,7 @@ public class WikiConnector extends org.a title = c.getTitle(); author = c.getAuthor(); comment = c.getComment(); + lastModified = c.getLastModified(); statusCode = "OK"; } catch (IOException e) @@ -2290,6 +2295,11 @@ public class WikiConnector extends org.a { return title; } + + public String getLastModified() + { + return lastModified; + } public void cleanup() { @@ -2308,7 +2318,7 @@ public class WikiConnector extends org.a protected String getGetDocInfoURL(String documentIdentifier) throws ManifoldCFException { - return baseURL + "action=query&prop=revisions&pageids="+documentIdentifier+"&rvprop=user%7ccomment%7ccontent"; + return baseURL + "action=query&prop=revisions&pageids="+documentIdentifier+"&rvprop=user%7ccomment%7ccontent%7ctimestamp"; } /** Class representing the "api" context of a "get doc info" response */ @@ -2322,6 +2332,8 @@ public class WikiConnector extends org.a protected String author = null; /** Comment */ protected String comment = null; + /** Last modified */ + protected String lastModified = null; public WikiGetDocInfoAPIContext(XMLStream theStream) { @@ -2342,6 +2354,7 @@ public class WikiConnector extends org.a contentFile = pc.getContentFile(); author = pc.getAuthor(); comment = pc.getComment(); + lastModified = pc.getLastModified(); } protected void tagCleanup() @@ -2371,6 +2384,11 @@ public class WikiConnector extends org.a { return author; } + + public String getLastModified() + { + return lastModified; + } public String getComment() { @@ -2390,6 +2408,8 @@ public class WikiConnector extends org.a protected String author = null; /** Comment */ protected String comment = null; + /** Last modified */ + protected String lastModified = null; public WikiGetDocInfoQueryContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts) { @@ -2410,6 +2430,7 @@ public class WikiConnector extends org.a contentFile = pc.getContentFile(); author = pc.getAuthor(); comment = pc.getComment(); + lastModified = pc.getLastModified(); } protected void tagCleanup() @@ -2439,6 +2460,11 @@ public class WikiConnector extends org.a { return author; } + + public String getLastModified() + { + return lastModified; + } public String getComment() { @@ -2458,6 +2484,8 @@ public class WikiConnector extends org.a protected String author = null; /** Comment */ protected String comment = null; + /** Last modified */ + protected String lastModified = null; public WikiGetDocInfoPagesContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts) { @@ -2477,6 +2505,7 @@ public class WikiConnector extends org.a title = pc.getTitle(); contentFile = pc.getContentFile(); author = pc.getAuthor(); + lastModified = pc.getLastModified(); comment = pc.getComment(); } @@ -2507,6 +2536,11 @@ public class WikiConnector extends org.a { return author; } + + public String getLastModified() + { + return lastModified; + } public String getComment() { @@ -2526,6 +2560,8 @@ public class WikiConnector extends org.a protected String author = null; /** Comment */ protected String comment = null; + /** Last modified */ + protected String lastModified = null; public WikiGetDocInfoPageContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts) { @@ -2556,6 +2592,7 @@ public class WikiConnector extends org.a contentFile = rc.getContentFile(); author = rc.getAuthor(); comment = rc.getComment(); + lastModified = rc.getLastModified(); } super.endTag(); } @@ -2592,6 +2629,11 @@ public class WikiConnector extends org.a { return comment; } + + public String getLastModified() + { + return lastModified; + } } @@ -2601,6 +2643,7 @@ public class WikiConnector extends org.a protected File contentFile = null; protected String author = null; protected String comment = null; + protected String lastModified = null; public WikiGetDocInfoRevisionsContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts) { @@ -2620,6 +2663,7 @@ public class WikiConnector extends org.a contentFile = rc.getContentFile(); author = rc.getAuthor(); comment = rc.getComment(); + lastModified = rc.getLastModified(); } protected void tagCleanup() @@ -2650,6 +2694,10 @@ public class WikiConnector extends org.a return comment; } + public String getLastModified() + { + return lastModified; + } } /** Class looking for the "api/query/pages/page/revisions/rev" context of a "get doc info" response */ @@ -2658,6 +2706,7 @@ public class WikiConnector extends org.a protected String author = null; protected String comment = null; protected File contentFile = null; + protected String lastModified = null; public WikiGetDocInfoRevContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts) { @@ -2671,6 +2720,7 @@ public class WikiConnector extends org.a { author = atts.getValue("user"); comment = atts.getValue("comment"); + lastModified = atts.getValue("timestamp"); try { File tempFile = File.createTempFile("_wikidata_","tmp"); @@ -2723,6 +2773,11 @@ public class WikiConnector extends org.a { return author; } + + public String getLastModified() + { + return lastModified; + } public String getComment() { Modified: incubator/lcf/trunk/tests/wiki/src/test/java/org/apache/manifoldcf/wiki_tests/MockWikiService.java URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/tests/wiki/src/test/java/org/apache/manifoldcf/wiki_tests/MockWikiService.java?rev=1183398&r1=1183397&r2=1183398&view=diff ============================================================================== --- incubator/lcf/trunk/tests/wiki/src/test/java/org/apache/manifoldcf/wiki_tests/MockWikiService.java (original) +++ incubator/lcf/trunk/tests/wiki/src/test/java/org/apache/manifoldcf/wiki_tests/MockWikiService.java Fri Oct 14 15:55:52 2011 @@ -139,7 +139,7 @@ public class MockWikiService if (resourceName == null) throw new IOException("Could not find a matching resource for the timestamp parameters; pageids = '"+pageIds+"'"); } - else if (rvprop != null && rvprop.equals("user|comment|content")) + else if (rvprop != null && rvprop.equals("user|comment|content|timestamp")) { // Doc info query if (pageIds == null) @@ -148,7 +148,7 @@ public class MockWikiService throw new IOException("cannot do more than one docinfo request at once"); resourceName = docInfoQueryResources.get(pageIds); if (resourceName == null) - throw new IOException("Could not find a matching resource for the user|comment|content parameters; pageids = '"+pageIds+"'"); + throw new IOException("Could not find a matching resource for the user|comment|content|timestamp parameters; pageids = '"+pageIds+"'"); } else throw new IOException("rvprop parameter missing or incorrect: "+rvprop);