Return-Path: X-Original-To: apmail-incubator-connectors-commits-archive@minotaur.apache.org Delivered-To: apmail-incubator-connectors-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id AC85294DF for ; Wed, 5 Oct 2011 07:34:34 +0000 (UTC) Received: (qmail 80576 invoked by uid 500); 5 Oct 2011 07:34:34 -0000 Delivered-To: apmail-incubator-connectors-commits-archive@incubator.apache.org Received: (qmail 80538 invoked by uid 500); 5 Oct 2011 07:34:34 -0000 Mailing-List: contact connectors-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: connectors-dev@incubator.apache.org Delivered-To: mailing list connectors-commits@incubator.apache.org Received: (qmail 80531 invoked by uid 99); 5 Oct 2011 07:34:34 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 05 Oct 2011 07:34:34 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 05 Oct 2011 07:34:29 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 726CD23888E4; Wed, 5 Oct 2011 07:34:08 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1179076 - /incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java Date: Wed, 05 Oct 2011 07:34:08 -0000 To: connectors-commits@incubator.apache.org From: kwright@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20111005073408.726CD23888E4@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: kwright Date: Wed Oct 5 07:34:08 2011 New Revision: 1179076 URL: http://svn.apache.org/viewvc?rev=1179076&view=rev Log: Add session management and more http transaction infrastructure Modified: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java Modified: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java?rev=1179076&r1=1179075&r2=1179076&view=diff ============================================================================== --- incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java (original) +++ incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java Wed Oct 5 07:34:08 2011 @@ -31,8 +31,15 @@ import org.apache.manifoldcf.agents.comm import org.apache.manifoldcf.agents.common.XMLStringContext; import org.apache.manifoldcf.agents.common.XMLFileContext; +import org.apache.commons.httpclient.*; +import org.apache.commons.httpclient.methods.*; +import org.apache.commons.httpclient.params.*; +import org.apache.commons.httpclient.auth.*; +import org.apache.commons.httpclient.protocol.*; + import java.util.*; import java.io.*; +import java.net.*; /** This is the repository connector for a wiki. */ @@ -41,17 +48,25 @@ public class WikiConnector extends org.a public static final String _rcsid = "@(#)$Id$"; // Activities that we know about + + /** Fetch activity */ protected final static String ACTIVITY_FETCH = "fetch document"; - // Activities list + /** Activities list */ protected static final String[] activitiesList = new String[]{ACTIVITY_FETCH}; - // Parameters - protected String protocol = null; + /** Has setup been called? */ + protected boolean hasBeenSetup = false; + + /** Server name */ protected String server = null; - protected int port = -1; - protected String path = null; + /** Base URL */ + protected String baseURL = null; + + /** Connection management */ + protected MultiThreadedHttpConnectionManager connectionManager = null; + /** Constructor. */ public WikiConnector() @@ -83,7 +98,125 @@ public class WikiConnector extends org.a public void connect(ConfigParams configParameters) { super.connect(configParameters); - server = configParameters.getParameter(WikiConfig.PARAM_SERVER); + server = params.getParameter(WikiConfig.PARAM_SERVER); + } + + protected void getSession() + throws ManifoldCFException, ServiceInterruption + { + if (hasBeenSetup == false) + { + String protocol = params.getParameter(WikiConfig.PARAM_PROTOCOL); + if (protocol == null || protocol.length() == 0) + protocol = "http"; + String portString = params.getParameter(WikiConfig.PARAM_PORT); + if (portString == null || portString.length() == 0) + portString = null; + String path = params.getParameter(WikiConfig.PARAM_PATH); + if (path == null) + path = ""; + + baseURL = protocol + "://" + server + ((portString!=null)?":" + portString:"") + path + "/api.php?"; + + // Set up connection manager + connectionManager = new MultiThreadedHttpConnectionManager(); + connectionManager.getParams().setMaxTotalConnections(1); + + hasBeenSetup = true; + } + } + + /** Check status of connection. + */ + @Override + public String check() + throws ManifoldCFException + { + try + { + // Destroy saved session setup and repeat it + hasBeenSetup = false; + getSession(); + + // Now, set up trial data fetch + String checkURL = getCheckURL(); + + HttpClient client = getInitializedClient(); + try + { + // Set up fetch using our special stuff if it's https + GetMethod method = new GetMethod(checkURL); + try + { + method.getParams().setParameter("http.socket.timeout", new Integer(300000)); + int statusCode = executeMethodViaThread(client,method); + switch (statusCode) + { + case HttpStatus.SC_OK: + return super.check(); + + default: + return "Fetch test returned an unexpected response code of "+Integer.toString(statusCode); + } + } + catch (InterruptedException e) + { + // Drop the connection on the floor + method = null; + throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED); + } + catch (java.net.SocketTimeoutException e) + { + return "Fetch test timed out reading from the Wiki server: "+e.getMessage(); + } + catch (java.net.SocketException e) + { + return "Fetch test received a socket error reading from Wiki server: "+e.getMessage(); + } + catch (org.apache.commons.httpclient.ConnectTimeoutException e) + { + return "Fetch test connection timed out reading from Wiki server: "+e.getMessage(); + } + catch (InterruptedIOException e) + { + throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED); + } + catch (IOException e) + { + return "Fetch test had an IO failure: "+e.getMessage(); + } + finally + { + if (method != null) + method.releaseConnection(); + } + } + catch (IllegalStateException e) + { + return "Fetch test had a state exception talking to Livelink HTTP Server: "+e.getMessage(); + } + } + catch (ServiceInterruption e) + { + return "Transient error: "+e.getMessage(); + } + catch (ManifoldCFException e) + { + if (e.getErrorCode() == ManifoldCFException.INTERRUPTED) + throw e; + return "Error: "+e.getMessage(); + } + } + + /** This method is periodically called for all connectors that are connected but not + * in active use. + */ + @Override + public void poll() + throws ManifoldCFException + { + if (connectionManager != null) + connectionManager.closeIdleConnections(60000L); } /** Close the connection. Call this before discarding the connection. @@ -92,7 +225,16 @@ public class WikiConnector extends org.a public void disconnect() throws ManifoldCFException { + hasBeenSetup = false; server = null; + baseURL = null; + + if (connectionManager != null) + { + connectionManager.shutdown(); + connectionManager = null; + } + super.disconnect(); } @@ -480,6 +622,118 @@ public class WikiConnector extends org.a // Protected static classes and methods + /** Create and initialize an HttpClient instance */ + protected HttpClient getInitializedClient() + throws ServiceInterruption, ManifoldCFException + { + HttpClient client = new HttpClient(connectionManager); + return client; + } + + /** Execute an HttpClient method via thread, so we don't get stuck in socket wait */ + protected static int executeMethodViaThread(HttpClient client, HttpMethodBase executeMethod) + throws InterruptedException, IOException + { + ExecuteMethodThread t = new ExecuteMethodThread(client,executeMethod); + try + { + t.start(); + t.join(); + Throwable thr = t.getException(); + if (thr != null) + { + if (thr instanceof IOException) + throw (IOException)thr; + else if (thr instanceof RuntimeException) + throw (RuntimeException)thr; + else + throw (Error)thr; + } + return t.getResponse(); + } + catch (InterruptedException e) + { + t.interrupt(); + // We need the caller to abandon any connections left around, so rethrow in a way that forces them to process the event properly. + throw e; + } + } + + /** Thread to execute an HttpClient method */ + protected static class ExecuteMethodThread extends Thread + { + protected HttpClient client; + protected HttpMethodBase executeMethod; + protected Throwable exception = null; + protected int rval = 0; + + public ExecuteMethodThread(HttpClient client, HttpMethodBase executeMethod) + { + super(); + setDaemon(true); + this.client = client; + this.executeMethod = executeMethod; + } + + public void run() + { + try + { + // Call the execute method appropriately + rval = client.executeMethod(executeMethod); + } + catch (Throwable e) + { + this.exception = e; + } + } + + public Throwable getException() + { + return exception; + } + + public int getResponse() + { + return rval; + } + } + + + /** Create a URL to obtain the next 500 pages. + */ + protected String getListPagesURL(String startingTitle) + throws ManifoldCFException + { + try + { + return baseURL + "action=query&list=allpages" + + ((startingTitle!=null)?"&apfrom="+URLEncoder.encode(startingTitle,"utf-8"):"") + + "&aplimit=500"; + } + catch (UnsupportedEncodingException e) + { + throw new ManifoldCFException(e.getMessage(),e); + } + } + + /** Create a URL to obtain a page's browse URL, given the page ID. + */ + protected String getGetURLURL(String documentIdentifier) + throws ManifoldCFException + { + return baseURL + "action=query&prop=info&pageids="+documentIdentifier+"&inprop=url"; + } + + /** Parse the response to a GetURL request, of the form: + * + * + * + * + * + * + * + */ protected static String parseGetURLResponse(InputStream is, String documentIdentifier) throws ManifoldCFException, ServiceInterruption { @@ -517,6 +771,7 @@ public class WikiConnector extends org.a } } + /** Class representing the "api" context of a "get url" response */ protected static class WikiGetURLAPIContext extends SingleLevelContext { protected String fullURL = null; @@ -548,6 +803,7 @@ public class WikiConnector extends org.a } + /** Class representing the "api/query" context of a "get url" response */ protected static class WikiGetURLQueryContext extends SingleLevelContext { protected String fullURL = null; @@ -579,6 +835,7 @@ public class WikiConnector extends org.a } + /** Class representing the "api/query/pages" context of a "get url" response */ protected static class WikiGetURLPagesContext extends SingleLevelContext { protected String fullURL = null; @@ -610,6 +867,7 @@ public class WikiConnector extends org.a } + /** Class representing the "api/query/pages/page" context of a "get url" response */ protected static class WikiGetURLPageContext extends BaseProcessingContext { protected String fullURL = null; @@ -638,4 +896,28 @@ public class WikiConnector extends org.a } } + /** Create a URL to obtain a page's timestamp, given the page ID. + */ + protected String getGetTimestampURL(String documentIdentifier) + throws ManifoldCFException + { + return baseURL + "action=query&prop=revisions&pageids="+documentIdentifier+"&rvprop=timestamp"; + } + + /** Create a URL to obtain a page's metadata and content, given the page ID. + */ + protected String getGetDocinfoURL(String documentIdentifier) + throws ManifoldCFException + { + return baseURL + "action=query&prop=revisions&pageids="+documentIdentifier+"&rvprop=user|comment|content"; + } + + /** Get a URL for a check operation. + */ + protected String getCheckURL() + throws ManifoldCFException + { + return baseURL + "action=query&list=allpages&aplimit=1"; + } + }