manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1181236 - in /incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src: main/java/org/apache/manifoldcf/crawler/connectors/wiki/ test/java/org/ test/java/org/apache/ test/java/org/apache/manifoldcf/ test/java/org/apache/manifold...
Date Mon, 10 Oct 2011 21:03:47 GMT
Author: kwright
Date: Mon Oct 10 21:03:46 2011
New Revision: 1181236

URL: http://svn.apache.org/viewvc?rev=1181236&view=rev
Log:
Complete the connector functionality

Added:
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/test/java/org/
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/test/java/org/apache/
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/test/java/org/apache/manifoldcf/
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/test/java/org/apache/manifoldcf/crawler/
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/wiki/
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/wiki/tests/
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/wiki/tests/Base.java   (with props)
Removed:
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/ByteBuffer.java
Modified:
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java

Modified: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java?rev=1181236&r1=1181235&r2=1181236&view=diff
==============================================================================
--- incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java (original)
+++ incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java Mon Oct 10 21:03:46 2011
@@ -55,6 +55,8 @@ public abstract class BaseProcessingCont
   }
     
   /** Process this data */
-  protected abstract void process()
-    throws ManifoldCFException;
+  protected void process()
+    throws ManifoldCFException
+  {
+  }
 }

Modified: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java?rev=1181236&r1=1181235&r2=1181236&view=diff
==============================================================================
--- incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java (original)
+++ incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java Mon Oct 10 21:03:46 2011
@@ -71,6 +71,7 @@ public abstract class SingleLevelContext
       super.endTag();
   }
   
-  protected abstract void finishChild(BaseProcessingContext child);
+  protected abstract void finishChild(BaseProcessingContext child)
+    throws ManifoldCFException;
   
 }

Modified: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java?rev=1181236&r1=1181235&r2=1181236&view=diff
==============================================================================
--- incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java (original)
+++ incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java Mon Oct 10 21:03:46 2011
@@ -136,9 +136,7 @@ public class WikiConnector extends org.a
     {
       // Destroy saved session setup and repeat it
       hasBeenSetup = false;
-      getSession();
-
-      executeCheckViaThread();
+      performCheck();
       return super.check();
     }
     catch (ServiceInterruption e)
@@ -183,6 +181,14 @@ public class WikiConnector extends org.a
     super.disconnect();
   }
 
+  /** Get the maximum number of documents to amalgamate together into one batch, for this connector.
+  *@return the maximum number. 0 indicates "unlimited".
+  */
+  public int getMaxDocumentRequest()
+  {
+    return 20;
+  }
+
   /** Queue "seed" documents.  Seed documents are the starting places for crawling activity.  Documents
   * are seeded when this method calls appropriate methods in the passed in ISeedingActivity object.
   *
@@ -213,7 +219,7 @@ public class WikiConnector extends org.a
     long startTime, long endTime)
     throws ManifoldCFException, ServiceInterruption
   {
-    // MHL
+    listAllPages(activities,startTime,endTime);
   }
 
   /** Get document versions given an array of document identifiers.
@@ -238,8 +244,14 @@ public class WikiConnector extends org.a
     DocumentSpecification spec, int jobMode, boolean usesDefaultAuthority)
     throws ManifoldCFException, ServiceInterruption
   {
-    // MHL
-    return null;
+    Map<String,String> versions = new HashMap<String,String>();
+    getTimestamps(documentIdentifiers,versions,activities);
+    String[] rval = new String[documentIdentifiers.length];
+    for (int i = 0 ; i < rval.length ; i++)
+    {
+      rval[i] = versions.get(documentIdentifiers[i]);
+    }
+    return rval;
   }
   
   /** Process a set of documents.
@@ -260,7 +272,11 @@ public class WikiConnector extends org.a
     DocumentSpecification spec, boolean[] scanOnly, int jobMode)
     throws ManifoldCFException, ServiceInterruption
   {
-    // MHL
+    for (int i = 0 ; i < documentIdentifiers.length ; i++)
+    {
+      if (!scanOnly[i])
+        getDocInfo(documentIdentifiers[i], versions[i], activities);
+    }
   }
   
   // UI support methods.
@@ -577,10 +593,14 @@ public class WikiConnector extends org.a
     return method;
   }
 
-  /** Execute a check() operation via a thread */
-  protected void executeCheckViaThread()
+  // -- Methods and classes to perform a "check" operation. --
+
+  /** Do the check operation.  This throws an exception if anything is wrong.
+  */
+  protected void performCheck()
     throws ManifoldCFException, ServiceInterruption
   {
+    getSession();
     HttpClient client = getInitializedClient();
     HttpMethodBase executeMethod = getInitializedMethod(getCheckURL());
     try
@@ -637,6 +657,13 @@ public class WikiConnector extends org.a
       executeMethod = null;
       throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
     }
+    catch (ManifoldCFException e)
+    {
+      if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+        // Drop the connection on the floor
+        executeMethod = null;
+      throw e;
+    }
     catch (java.net.SocketTimeoutException e)
     {
       long currentTime = System.currentTimeMillis();
@@ -667,14 +694,21 @@ public class WikiConnector extends org.a
         executeMethod.releaseConnection();
     }
   }
+
+  /** Get a URL for a check operation.
+  */
+  protected String getCheckURL()
+    throws ManifoldCFException
+  {
+    return baseURL + "action=query&list=allpages&aplimit=1";
+  }
   
-  /** Thread to execute a check */
+  /** Thread to execute a "check" operation.  This thread both executes the operation and parses the result. */
   protected static class ExecuteCheckThread extends Thread
   {
     protected HttpClient client;
     protected HttpMethodBase executeMethod;
     protected Throwable exception = null;
-    protected int rval = 0;
 
     public ExecuteCheckThread(HttpClient client, HttpMethodBase executeMethod)
     {
@@ -778,13 +812,9 @@ public class WikiConnector extends org.a
     }
     
     protected void finishChild(BaseProcessingContext child)
-    {
-      responseSeen |= ((WikiCheckQueryContext)child).hasResponse();
-    }
-
-    protected void process()
       throws ManifoldCFException
     {
+      responseSeen |= ((WikiCheckQueryContext)child).hasResponse();
     }
     
     public boolean hasResponse()
@@ -810,15 +840,11 @@ public class WikiConnector extends org.a
     }
 
     protected void finishChild(BaseProcessingContext child)
+      throws ManifoldCFException
     {
       responseSeen |= ((WikiCheckAllPagesContext)child).hasResponse();
     }
 
-    protected void process()
-      throws ManifoldCFException
-    {
-    }
-    
     public boolean hasResponse()
     {
       return responseSeen;
@@ -842,15 +868,11 @@ public class WikiConnector extends org.a
     }
 
     protected void finishChild(BaseProcessingContext child)
+      throws ManifoldCFException
     {
       responseSeen |= true;
     }
 
-    protected void process()
-      throws ManifoldCFException
-    {
-    }
-    
     public boolean hasResponse()
     {
       return responseSeen;
@@ -865,101 +887,29 @@ public class WikiConnector extends org.a
     {
       super(theStream,namespaceURI,localName,qName,atts);
     }
-
-    protected void process()
-      throws ManifoldCFException
-    {
-    }
-    
-  }
-
-  /** Execute an HttpClient method via thread, so we don't get stuck in socket wait */
-  protected static int executeMethodViaThread(HttpClient client, HttpMethodBase executeMethod)
-    throws InterruptedException, IOException
-  {
-    ExecuteMethodThread t = new ExecuteMethodThread(client,executeMethod);
-    try
-    {
-      t.start();
-      t.join();
-      Throwable thr = t.getException();
-      if (thr != null)
-      {
-        if (thr instanceof IOException)
-          throw (IOException)thr;
-        else if (thr instanceof RuntimeException)
-          throw (RuntimeException)thr;
-        else
-          throw (Error)thr;
-      }
-      return t.getResponse();
-    }
-    catch (InterruptedException e)
-    {
-      t.interrupt();
-      // We need the caller to abandon any connections left around, so rethrow in a way that forces them to process the event properly.
-      throw e;
-    }
-  }
-
-  /** Thread to execute an HttpClient method */
-  protected static class ExecuteMethodThread extends Thread
-  {
-    protected HttpClient client;
-    protected HttpMethodBase executeMethod;
-    protected Throwable exception = null;
-    protected int rval = 0;
-
-    public ExecuteMethodThread(HttpClient client, HttpMethodBase executeMethod)
-    {
-      super();
-      setDaemon(true);
-      this.client = client;
-      this.executeMethod = executeMethod;
-    }
-
-    public void run()
-    {
-      try
-      {
-        // Call the execute method appropriately
-        rval = client.executeMethod(executeMethod);
-      }
-      catch (Throwable e)
-      {
-        this.exception = e;
-      }
-    }
-
-    public Throwable getException()
-    {
-      return exception;
-    }
-
-    public int getResponse()
-    {
-      return rval;
-    }
   }
 
+  // -- Methods and classes to perform a "list pages" operation. --
 
-  /** Create a URL to obtain the next 500 pages.
+  /** Perform a series of listPages() operations, so that we fully obtain the documents we're looking for even though
+  * we're limited to 500 of them per request.
   */
-  protected String getListPagesURL(String startingTitle)
-    throws ManifoldCFException
+  protected void listAllPages(ISeedingActivity activities, long startTime, long endTime)
+    throws ManifoldCFException, ServiceInterruption
   {
-    try
-    {
-      return baseURL + "action=query&list=allpages" +
-        ((startingTitle!=null)?"&apfrom="+URLEncoder.encode(startingTitle,"utf-8"):"") +
-        "&aplimit=500";
-    }
-    catch (UnsupportedEncodingException e)
-    {
-      throw new ManifoldCFException(e.getMessage(),e);
+    getSession();
+    String lastTitle = null;
+    while (true)
+    {
+      // Start with the last title seen in the previous round.  This will cause a duplicate to be queued, but that's not
+      // a problem.
+      String newLastTitle = executeListPagesViaThread(lastTitle,activities);
+      if (newLastTitle == null)
+        break;
+      lastTitle = newLastTitle;
     }
   }
-
+  
   /** Execute a listPages() operation via a thread.  Returns the last page title. */
   protected String executeListPagesViaThread(String startPageTitle, ISeedingActivity activities)
     throws ManifoldCFException, ServiceInterruption
@@ -969,7 +919,7 @@ public class WikiConnector extends org.a
     try
     {
       PageBuffer pageBuffer = new PageBuffer();
-      ExecuteListPagesThread t = new ExecuteListPagesThread(client,executeMethod,pageBuffer);
+      ExecuteListPagesThread t = new ExecuteListPagesThread(client,executeMethod,pageBuffer,startPageTitle);
       try
       {
         t.start();
@@ -1039,6 +989,13 @@ public class WikiConnector extends org.a
       executeMethod = null;
       throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
     }
+    catch (ManifoldCFException e)
+    {
+      if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+        // Drop the connection on the floor
+        executeMethod = null;
+      throw e;
+    }
     catch (java.net.SocketTimeoutException e)
     {
       long currentTime = System.currentTimeMillis();
@@ -1069,7 +1026,24 @@ public class WikiConnector extends org.a
         executeMethod.releaseConnection();
     }
   }
-  
+
+  /** Create a URL to obtain the next 500 pages.
+  */
+  protected String getListPagesURL(String startingTitle)
+    throws ManifoldCFException
+  {
+    try
+    {
+      return baseURL + "action=query&list=allpages" +
+        ((startingTitle!=null)?"&apfrom="+URLEncoder.encode(startingTitle,"utf-8"):"") +
+        "&aplimit=500";
+    }
+    catch (UnsupportedEncodingException e)
+    {
+      throw new ManifoldCFException(e.getMessage(),e);
+    }
+  }
+
   /** Thread to execute a list pages operation */
   protected static class ExecuteListPagesThread extends Thread
   {
@@ -1078,14 +1052,16 @@ public class WikiConnector extends org.a
     protected Throwable exception = null;
     protected PageBuffer pageBuffer;
     protected String lastPageTitle = null;
+    protected String startPageTitle;
 
-    public ExecuteListPagesThread(HttpClient client, HttpMethodBase executeMethod, PageBuffer pageBuffer)
+    public ExecuteListPagesThread(HttpClient client, HttpMethodBase executeMethod, PageBuffer pageBuffer, String startPageTitle)
     {
       super();
       setDaemon(true);
       this.client = client;
       this.executeMethod = executeMethod;
       this.pageBuffer = pageBuffer;
+      this.startPageTitle = startPageTitle;
     }
 
     public void run()
@@ -1100,7 +1076,7 @@ public class WikiConnector extends org.a
         InputStream is = executeMethod.getResponseBodyAsStream();
         try
         {
-          lastPageTitle = parseListPagesResponse(is,pageBuffer);
+          lastPageTitle = parseListPagesResponse(is,pageBuffer,startPageTitle);
         }
         finally
         {
@@ -1151,12 +1127,12 @@ public class WikiConnector extends org.a
   *   </query-continue>
   * </api>
   */
-  protected static String parseListPagesResponse(InputStream is, PageBuffer buffer)
+  protected static String parseListPagesResponse(InputStream is, PageBuffer buffer, String startPageTitle)
     throws ManifoldCFException, ServiceInterruption
   {
     // Parse the document.  This will cause various things to occur, within the instantiated XMLContext class.
     XMLStream x = new XMLStream();
-    WikiListPagesAPIContext c = new WikiListPagesAPIContext(x,buffer);
+    WikiListPagesAPIContext c = new WikiListPagesAPIContext(x,buffer,startPageTitle);
     x.setContext(c);
     try
     {
@@ -1170,19 +1146,6 @@ public class WikiConnector extends org.a
         long time = System.currentTimeMillis();
         throw new ServiceInterruption(e.getMessage(),e,time + 300000L,time + 12L * 60000L,-1,false);
       }
-      catch (ManifoldCFException e)
-      {
-        if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
-          throw e;
-        // Ignore XML parsing errors.
-        if (e.getMessage().indexOf("pars") >= 0)
-        {
-          if (Logging.connectors.isDebugEnabled())
-            Logging.connectors.debug("Wiki: listPages() response was unparseable ("+e.getMessage()+"), skipping");
-          return null;
-        }
-        throw e;
-      }
     }
     finally
     {
@@ -1195,26 +1158,24 @@ public class WikiConnector extends org.a
   {
     protected String lastTitle = null;
     protected PageBuffer buffer;
+    protected String startPageTitle;
     
-    public WikiListPagesAPIContext(XMLStream theStream, PageBuffer buffer)
+    public WikiListPagesAPIContext(XMLStream theStream, PageBuffer buffer, String startPageTitle)
     {
       super(theStream,"api");
       this.buffer = buffer;
+      this.startPageTitle = startPageTitle;
     }
 
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String qName, Attributes atts)
     {
-      return new WikiListPagesQueryContext(theStream,namespaceURI,localName,qName,atts,buffer);
+      return new WikiListPagesQueryContext(theStream,namespaceURI,localName,qName,atts,buffer,startPageTitle);
     }
     
     protected void finishChild(BaseProcessingContext child)
-    {
-      lastTitle = ((WikiListPagesQueryContext)child).getLastTitle();
-    }
-
-    protected void process()
       throws ManifoldCFException
     {
+      lastTitle = ((WikiListPagesQueryContext)child).getLastTitle();
     }
     
     public String getLastTitle()
@@ -1229,26 +1190,25 @@ public class WikiConnector extends org.a
   {
     protected String lastTitle = null;
     protected PageBuffer buffer;
+    protected String startPageTitle;
     
-    public WikiListPagesQueryContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts, PageBuffer buffer)
+    public WikiListPagesQueryContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts,
+      PageBuffer buffer, String startPageTitle)
     {
       super(theStream,namespaceURI,localName,qName,atts,"query");
       this.buffer = buffer;
+      this.startPageTitle = startPageTitle;
     }
 
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String qName, Attributes atts)
     {
-      return new WikiListPagesAllPagesContext(theStream,namespaceURI,localName,qName,atts,buffer);
+      return new WikiListPagesAllPagesContext(theStream,namespaceURI,localName,qName,atts,buffer,startPageTitle);
     }
 
     protected void finishChild(BaseProcessingContext child)
-    {
-      lastTitle = ((WikiListPagesAllPagesContext)child).getLastTitle();
-    }
-
-    protected void process()
       throws ManifoldCFException
     {
+      lastTitle = ((WikiListPagesAllPagesContext)child).getLastTitle();
     }
     
     public String getLastTitle()
@@ -1263,29 +1223,28 @@ public class WikiConnector extends org.a
   {
     protected String lastTitle = null;
     protected PageBuffer buffer;
+    protected String startPageTitle;
     
-    public WikiListPagesAllPagesContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts, PageBuffer buffer)
+    public WikiListPagesAllPagesContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts,
+      PageBuffer buffer, String startPageTitle)
     {
       super(theStream,namespaceURI,localName,qName,atts,"allpages");
       this.buffer = buffer;
+      this.startPageTitle = startPageTitle;
     }
 
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String qName, Attributes atts)
     {
       // When we recognize allpages, we need to look for <p> records.
-      return new WikiListPagesPContext(theStream,namespaceURI,localName,qName,atts,buffer);
+      return new WikiListPagesPContext(theStream,namespaceURI,localName,qName,atts,buffer,startPageTitle);
     }
 
     protected void finishChild(BaseProcessingContext child)
+      throws ManifoldCFException
     {
       // Update the last title from all the <p> records we saw.
       lastTitle = ((WikiListPagesPContext)child).getLastTitle();
     }
-
-    protected void process()
-      throws ManifoldCFException
-    {
-    }
     
     public String getLastTitle()
     {
@@ -1299,11 +1258,14 @@ public class WikiConnector extends org.a
   {
     protected String lastTitle = null;
     protected PageBuffer buffer;
+    protected String startPageTitle;
     
-    public WikiListPagesPContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts, PageBuffer buffer)
+    public WikiListPagesPContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts,
+      PageBuffer buffer, String startPageTitle)
     {
       super(theStream,namespaceURI,localName,qName,atts);
       this.buffer = buffer;
+      this.startPageTitle = startPageTitle;
     }
 
     protected XMLContext beginTag(String namespaceURI, String localName, String qName, Attributes atts)
@@ -1311,188 +1273,981 @@ public class WikiConnector extends org.a
     {
       if (qName.equals("p"))
       {
-        lastTitle = atts.getValue("title");
-        String pageID = atts.getValue("pageid");
-        // Add the discovered page id to the page buffer
-        try
-        {
-          buffer.add(pageID);
-        }
-        catch (InterruptedException e)
+        String currentTitle = atts.getValue("title");
+        // Skip the record that matches the start page title (just pretend it isn't there)
+        if (startPageTitle == null || !currentTitle.equals(startPageTitle))
         {
-          throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+          lastTitle = currentTitle;
+          String pageID = atts.getValue("pageid");
+          // Add the discovered page id to the page buffer
+          try
+          {
+            buffer.add(pageID);
+          }
+          catch (InterruptedException e)
+          {
+            throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+          }
         }
       }
       return super.beginTag(namespaceURI,localName,qName,atts);
     }
     
-    protected void process()
-      throws ManifoldCFException
-    {
-    }
-    
     public String getLastTitle()
     {
       return lastTitle;
     }
   }
 
-  /** Create a URL to obtain a page's browse URL, given the page ID.
-  */
-  protected String getGetURLURL(String documentIdentifier)
-    throws ManifoldCFException
-  {
-    return baseURL + "action=query&prop=info&pageids="+documentIdentifier+"&inprop=url";
-  }
-  
-  /** Parse the response to a GetURL request, of the form:
-  * <api>
-  *  <query>
-  *    <pages>
-  *      <page pageid="27697087" ns="0" title="API" touched="2011-09-27T07:00:55Z" lastrevid="367741756" counter="" length="70" redirect="" fullurl="http://en.wikipedia.org/wiki/API" editurl="http://en.wikipedia.org/w/index.php?title=API&amp;action=edit" />
-  *    </pages>
-  *  </query>
-  *</api>
+
+  // -- Methods and classes to perform a "get Timestamp" operation. --
+
+  /** Obtain document versions for a set of documents.
   */
-  protected static String parseGetURLResponse(InputStream is, String documentIdentifier)
+  protected void getTimestamps(String[] documentIdentifiers, Map<String,String> versions, IVersionActivity activities)
     throws ManifoldCFException, ServiceInterruption
   {
-    // Parse the document.  This will cause various things to occur, within the instantiated XMLContext class.
-    XMLStream x = new XMLStream();
-    WikiGetURLAPIContext c = new WikiGetURLAPIContext(x);
-    x.setContext(c);
+    getSession();
+    HttpClient client = getInitializedClient();
+    HttpMethodBase executeMethod = getInitializedMethod(getGetTimestampURL(documentIdentifiers));
     try
     {
+      ExecuteGetTimestampThread t = new ExecuteGetTimestampThread(client,executeMethod,versions);
       try
       {
-        x.parse(is);
-        return c.getURL();
+        t.start();
+        t.join();
+        Throwable thr = t.getException();
+        if (thr != null)
+        {
+          if (thr instanceof ManifoldCFException)
+          {
+            if (((ManifoldCFException)thr).getErrorCode() == ManifoldCFException.INTERRUPTED)
+              throw new InterruptedException(thr.getMessage());
+            throw (ManifoldCFException)thr;
+          }
+          else if (thr instanceof ServiceInterruption)
+            throw (ServiceInterruption)thr;
+          else if (thr instanceof IOException)
+            throw (IOException)thr;
+          else if (thr instanceof RuntimeException)
+            throw (RuntimeException)thr;
+          else
+            throw (Error)thr;
+        }
+      }
+      catch (ManifoldCFException e)
+      {
+        t.interrupt();
+        throw e;
+      }
+      catch (ServiceInterruption e)
+      {
+        t.interrupt();
+        throw e;
       }
       catch (IOException e)
       {
-        long time = System.currentTimeMillis();
-        throw new ServiceInterruption(e.getMessage(),e,time + 300000L,time + 12L * 60000L,-1,false);
+        t.interrupt();
+        throw e;
       }
-      catch (ManifoldCFException e)
+      catch (InterruptedException e)
       {
-        // Ignore XML parsing errors.
-        if (e.getMessage().indexOf("pars") >= 0)
-        {
-          if (Logging.connectors.isDebugEnabled())
-            Logging.connectors.debug("Wiki: getURL() document '"+documentIdentifier+"' was unparseable ("+e.getMessage()+"), skipping");
-          return null;
-        }
+        t.interrupt();
+        // We need the caller to abandon any connections left around, so rethrow in a way that forces them to process the event properly.
         throw e;
       }
     }
-    finally
+    catch (InterruptedException e)
     {
-      x.cleanup();
+      // Drop the connection on the floor
+      executeMethod = null;
+      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
     }
-  }
-  
-  /** Class representing the "api" context of a "get url" response */
-  protected static class WikiGetURLAPIContext extends SingleLevelContext
-  {
-    protected String fullURL = null;
-    
-    public WikiGetURLAPIContext(XMLStream theStream)
+    catch (ManifoldCFException e)
     {
-      super(theStream,"api");
+      if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+        // Drop the connection on the floor
+        executeMethod = null;
+      throw e;
     }
-
-    protected BaseProcessingContext createChild(String namespaceURI, String localName, String qName, Attributes atts)
+    catch (java.net.SocketTimeoutException e)
     {
-      return new WikiGetURLQueryContext(theStream,namespaceURI,localName,qName,atts);
+      long currentTime = System.currentTimeMillis();
+      throw new ServiceInterruption("Version fetch timed out reading from the Wiki server: "+e.getMessage(),e,currentTime+300000L,currentTime+12L * 60000L,-1,false);
     }
-    
-    protected void finishChild(BaseProcessingContext child)
+    catch (java.net.SocketException e)
     {
-      fullURL = ((WikiGetURLQueryContext)child).getURL();
+      long currentTime = System.currentTimeMillis();
+      throw new ServiceInterruption("Version fetch received a socket error reading from Wiki server: "+e.getMessage(),e,currentTime+300000L,currentTime+12L * 60000L,-1,false);
     }
-
-    protected void process()
-      throws ManifoldCFException
+    catch (org.apache.commons.httpclient.ConnectTimeoutException e)
     {
+      long currentTime = System.currentTimeMillis();
+      throw new ServiceInterruption("Version fetch connection timed out reading from Wiki server: "+e.getMessage(),e,currentTime+300000L,currentTime+12L * 60000L,-1,false);
     }
-    
-    public String getURL()
+    catch (InterruptedIOException e)
     {
-      return fullURL;
+      executeMethod = null;
+      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
     }
-
-  }
-
-  /** Class representing the "api/query" context of a "get url" response */
-  protected static class WikiGetURLQueryContext extends SingleLevelContext
-  {
-    protected String fullURL = null;
-    
-    public WikiGetURLQueryContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
+    catch (IOException e)
     {
-      super(theStream,namespaceURI,localName,qName,atts,"query");
+      throw new ManifoldCFException("Version fetch had an IO failure: "+e.getMessage(),e);
+    }
+    finally
+    {
+      if (executeMethod != null)
+        executeMethod.releaseConnection();
+    }
+  }
+
+  /** Create a URL to obtain multiple page's timestamps, given the page IDs.
+  */
+  protected String getGetTimestampURL(String[] documentIdentifiers)
+    throws ManifoldCFException
+  {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0 ; i < documentIdentifiers.length ; i++)
+    {
+      if (i > 0)
+        sb.append(",");
+      sb.append(documentIdentifiers[i]);
+    }
+    return baseURL + "action=query&prop=revisions&pageids="+sb.toString()+"&rvprop=timestamp";
+  }
+
+  /** Thread to execute a "get timestamp" operation.  This thread both executes the operation and parses the result. */
+  protected static class ExecuteGetTimestampThread extends Thread
+  {
+    protected HttpClient client;
+    protected HttpMethodBase executeMethod;
+    protected Throwable exception = null;
+    protected Map<String,String> versions;
+
+    public ExecuteGetTimestampThread(HttpClient client, HttpMethodBase executeMethod, Map<String,String> versions)
+    {
+      super();
+      setDaemon(true);
+      this.client = client;
+      this.executeMethod = executeMethod;
+      this.versions = versions;
+    }
+
+    public void run()
+    {
+      try
+      {
+        // Call the execute method appropriately
+        int rval = client.executeMethod(executeMethod);
+        if (rval != 200)
+          throw new ManifoldCFException("Unexpected response code: "+rval);
+        // Read response and make sure it's valid
+        InputStream is = executeMethod.getResponseBodyAsStream();
+        try
+        {
+          parseGetTimestampResponse(is,versions);
+        }
+        finally
+        {
+          try
+          {
+            is.close();
+          }
+          catch (IllegalStateException e)
+          {
+            // Ignore this error
+          }
+        }
+      }
+      catch (Throwable e)
+      {
+        this.exception = e;
+      }
+    }
+
+    public Throwable getException()
+    {
+      return exception;
+    }
+
+  }
+
+  /** This method parses a response like the following:
+  * <api>
+  *   <query>
+  *     <pages>
+  *       <page pageid="27697087" ns="0" title="API">
+  *         <revisions>
+  *           <rev user="Graham87" timestamp="2010-06-13T08:41:17Z" />
+  *         </revisions>
+  *       </page>
+  *     </pages>
+  *   </query>
+  * </api>
+  */
+  protected static void parseGetTimestampResponse(InputStream is, Map<String,String> versions)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    // Parse the document.  This will cause various things to occur, within the instantiated XMLContext class.
+    XMLStream x = new XMLStream();
+    WikiGetTimestampAPIContext c = new WikiGetTimestampAPIContext(x,versions);
+    x.setContext(c);
+    try
+    {
+      try
+      {
+        x.parse(is);
+      }
+      catch (IOException e)
+      {
+        long time = System.currentTimeMillis();
+        throw new ServiceInterruption(e.getMessage(),e,time + 300000L,time + 12L * 60000L,-1,false);
+      }
+    }
+    finally
+    {
+      x.cleanup();
+    }
+  }
+
+  /** Class representing the "api" context of a "get timestamp" response */
+  protected static class WikiGetTimestampAPIContext extends SingleLevelContext
+  {
+    protected Map<String,String> versions;
+    
+    public WikiGetTimestampAPIContext(XMLStream theStream, Map<String,String> versions)
+    {
+      super(theStream,"api");
+      this.versions = versions;
+    }
+
+    protected BaseProcessingContext createChild(String namespaceURI, String localName, String qName, Attributes atts)
+    {
+      return new WikiGetTimestampQueryContext(theStream,namespaceURI,localName,qName,atts,versions);
+    }
+    
+    protected void finishChild(BaseProcessingContext child)
+      throws ManifoldCFException
+    {
+    }
+
+  }
+
+  /** Class representing the "api/query" context of a "get timestamp" response */
+  protected static class WikiGetTimestampQueryContext extends SingleLevelContext
+  {
+    protected Map<String,String> versions;
+    
+    public WikiGetTimestampQueryContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts,
+      Map<String,String> versions)
+    {
+      super(theStream,namespaceURI,localName,qName,atts,"query");
+      this.versions = versions;
+    }
+
+    protected BaseProcessingContext createChild(String namespaceURI, String localName, String qName, Attributes atts)
+    {
+      return new WikiGetTimestampPagesContext(theStream,namespaceURI,localName,qName,atts,versions);
+    }
+
+    protected void finishChild(BaseProcessingContext child)
+      throws ManifoldCFException
+    {
+    }
+    
+  }
+
+  /** Class looking for the "api/query/pages" context of a "get timestamp" response */
+  protected static class WikiGetTimestampPagesContext extends SingleLevelContext
+  {
+    protected Map<String,String> versions;
+    
+    public WikiGetTimestampPagesContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts,
+      Map<String,String> versions)
+    {
+      super(theStream,namespaceURI,localName,qName,atts,"pages");
+      this.versions = versions;
+    }
+
+    protected BaseProcessingContext createChild(String namespaceURI, String localName, String qName, Attributes atts)
+    {
+      return new WikiGetTimestampPageContext(theStream,namespaceURI,localName,qName,atts);
+    }
+
+    protected void finishChild(BaseProcessingContext child)
+      throws ManifoldCFException
+    {
+      WikiGetTimestampPageContext pc = (WikiGetTimestampPageContext)child;
+      String pageID = pc.getPageID();
+      String version = pc.getLastEdit();
+      if (pageID != null && version != null)
+        versions.put(pageID,version);
+    }
+  }
+
+  /** Class looking for the "api/query/pages/page" context of a "get timestamp" response */
+  protected static class WikiGetTimestampPageContext extends BaseProcessingContext
+  {
+    protected String pageID = null;
+    protected String lastRevEdit = null;
+    
+    public WikiGetTimestampPageContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
+    {
+      super(theStream,namespaceURI,localName,qName,atts);
+    }
+
+    protected XMLContext beginTag(String namespaceURI, String localName, String qName, Attributes atts)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      if (qName.equals("page"))
+      {
+        pageID = atts.getValue("pageid");
+        return new WikiGetTimestampRevisionsContext(theStream,namespaceURI,localName,qName,atts);
+      }
+      return super.beginTag(namespaceURI,localName,qName,atts);
+    }
+    
+    protected void endTag()
+      throws ManifoldCFException, ServiceInterruption
+    {
+      XMLContext theContext = theStream.getContext();
+      String theTag = theContext.getQname();
+
+      if (theTag.equals("page"))
+        lastRevEdit = ((WikiGetTimestampRevisionsContext)theContext).getTimestamp();
+      else
+        super.endTag();
+    }
+    
+    public String getPageID()
+    {
+      return pageID;
+    }
+    
+    public String getLastEdit()
+    {
+      return lastRevEdit;
+    }
+  }
+
+  /** Class looking for the "api/query/pages/page/revisions" context of a "get timestamp" response */
+  protected static class WikiGetTimestampRevisionsContext extends SingleLevelContext
+  {
+    protected String timestamp = null;
+    
+    public WikiGetTimestampRevisionsContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
+    {
+      super(theStream,namespaceURI,localName,qName,atts,"revisions");
+    }
+
+    protected BaseProcessingContext createChild(String namespaceURI, String localName, String qName, Attributes atts)
+    {
+      return new WikiGetTimestampRevContext(theStream,namespaceURI,localName,qName,atts);
+    }
+
+    protected void finishChild(BaseProcessingContext child)
+      throws ManifoldCFException
+    {
+      WikiGetTimestampRevContext rc = (WikiGetTimestampRevContext)child;
+      if (timestamp == null)
+        timestamp = rc.getTimestamp();
+    }
+    
+    public String getTimestamp()
+    {
+      return timestamp;
+    }
+  }
+
+  /** Class looking for the "api/query/pages/page/revisions/rev" context of a "get timestamp" response */
+  protected static class WikiGetTimestampRevContext extends BaseProcessingContext
+  {
+    protected String timestamp = null;
+    
+    public WikiGetTimestampRevContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
+    {
+      super(theStream,namespaceURI,localName,qName,atts);
+    }
+
+    protected XMLContext beginTag(String namespaceURI, String localName, String qName, Attributes atts)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      if (qName.equals("rev"))
+        timestamp = atts.getValue("timestamp");
+      return super.beginTag(namespaceURI,localName,qName,atts);
+    }
+    
+    public String getTimestamp()
+    {
+      return timestamp;
+    }
+  }
+    
+  // -- Methods and classes to perform a "get Docinfo" operation. --
+
+  /** Get document info and index the document.
+  */
+  protected void getDocInfo(String documentIdentifier, String documentVersion, IProcessActivity activities)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    getSession();
+    HttpClient client = getInitializedClient();
+    HttpMethodBase executeMethod = getInitializedMethod(getGetDocInfoURL(documentIdentifier));
+    
+    String statusCode = "UNKNOWN";
+    String errorMessage = null;
+    long startTime = System.currentTimeMillis();
+    long dataSize = 0L;
+    
+    try
+    {
+      ExecuteGetDocInfoThread t = new ExecuteGetDocInfoThread(client,executeMethod,documentIdentifier);
+      try
+      {
+        t.start();
+        t.join();
+        
+        statusCode = t.getStatusCode();
+        errorMessage = t.getErrorMessage();
+          
+        Throwable thr = t.getException();
+        if (thr != null)
+        {
+          if (thr instanceof ManifoldCFException)
+          {
+            if (((ManifoldCFException)thr).getErrorCode() == ManifoldCFException.INTERRUPTED)
+              throw new InterruptedException(thr.getMessage());
+            throw (ManifoldCFException)thr;
+          }
+          else if (thr instanceof ServiceInterruption)
+            throw (ServiceInterruption)thr;
+          else if (thr instanceof IOException)
+            throw (IOException)thr;
+          else if (thr instanceof RuntimeException)
+            throw (RuntimeException)thr;
+          else
+            throw (Error)thr;
+        }
+ 
+        // Fetch all the data we need from the thread, and do the indexing.
+        File contentFile = t.getContentFile();
+        if (contentFile != null)
+        {
+          statusCode = "OK";
+          try
+          {
+            String author = t.getAuthor();
+            String comment = t.getComment();
+            String title = t.getTitle();
+            String fullURL = t.getFullURL();
+            
+            RepositoryDocument rd = new RepositoryDocument();
+            dataSize = contentFile.length();
+            InputStream is = new FileInputStream(contentFile);
+            try
+            {
+              rd.setBinary(is,dataSize);
+              if (comment != null)
+                rd.addField("comment",comment);
+              if (author != null)
+                rd.addField("author",author);
+              if (title != null)
+                rd.addField("title",title);
+              activities.ingestDocument(documentIdentifier,documentVersion,fullURL,rd);
+            }
+            finally
+            {
+              is.close();
+            }
+          }
+          finally
+          {
+            contentFile.delete();
+          }
+        }
+      }
+      catch (ManifoldCFException e)
+      {
+        t.interrupt();
+        throw e;
+      }
+      catch (ServiceInterruption e)
+      {
+        t.interrupt();
+        throw e;
+      }
+      catch (IOException e)
+      {
+        t.interrupt();
+        throw e;
+      }
+      catch (InterruptedException e)
+      {
+        t.interrupt();
+        // We need the caller to abandon any connections left around, so rethrow in a way that forces them to process the event properly.
+        throw e;
+      }
+      finally
+      {
+        t.cleanup();
+      }
+    }
+    catch (InterruptedException e)
+    {
+      // Drop the connection on the floor
+      executeMethod = null;
+      statusCode = null;
+      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+    }
+    catch (ManifoldCFException e)
+    {
+      if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+      {
+        // Drop the connection on the floor
+        executeMethod = null;
+        statusCode = null;
+      }
+      throw e;
+    }
+    catch (java.net.SocketTimeoutException e)
+    {
+      long currentTime = System.currentTimeMillis();
+      throw new ServiceInterruption("Get doc info timed out reading from the Wiki server: "+e.getMessage(),e,currentTime+300000L,currentTime+12L * 60000L,-1,false);
+    }
+    catch (java.net.SocketException e)
+    {
+      long currentTime = System.currentTimeMillis();
+      throw new ServiceInterruption("Get doc info received a socket error reading from Wiki server: "+e.getMessage(),e,currentTime+300000L,currentTime+12L * 60000L,-1,false);
+    }
+    catch (org.apache.commons.httpclient.ConnectTimeoutException e)
+    {
+      long currentTime = System.currentTimeMillis();
+      throw new ServiceInterruption("Get doc info connection timed out reading from Wiki server: "+e.getMessage(),e,currentTime+300000L,currentTime+12L * 60000L,-1,false);
+    }
+    catch (InterruptedIOException e)
+    {
+      executeMethod = null;
+      statusCode = null;
+      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+    }
+    catch (IOException e)
+    {
+      throw new ManifoldCFException("Get doc info had an IO failure: "+e.getMessage(),e);
+    }
+    finally
+    {
+      if (executeMethod != null)
+        executeMethod.releaseConnection();
+      if (statusCode != null)
+        activities.recordActivity(new Long(startTime),ACTIVITY_FETCH,new Long(dataSize),documentIdentifier,statusCode,errorMessage,null);
+    }
+  }
+  
+  /** Thread to execute a "get timestamp" operation.  This thread both executes the operation and parses the result. */
+  protected static class ExecuteGetDocInfoThread extends Thread
+  {
+    protected HttpClient client;
+    protected HttpMethodBase executeMethod;
+    protected Throwable exception = null;
+    protected String documentIdentifier;
+    protected File contentFile = null;
+    protected String author = null;
+    protected String title = null;
+    protected String comment = null;
+    protected String fullURL = null;
+    
+    protected String statusCode = null;
+    protected String errorMessage = null;
+
+    public ExecuteGetDocInfoThread(HttpClient client, HttpMethodBase executeMethod, String documentIdentifier)
+    {
+      super();
+      setDaemon(true);
+      this.client = client;
+      this.executeMethod = executeMethod;
+      this.documentIdentifier = documentIdentifier;
+    }
+
+    public void run()
+    {
+      try
+      {
+        // Call the execute method appropriately
+        int rval = client.executeMethod(executeMethod);
+        if (rval != 200)
+        {
+          statusCode = "HTTP code "+rval;
+          throw new ManifoldCFException("Unexpected response code "+rval+": "+executeMethod.getResponseBodyAsString());
+        }
+        // Read response and make sure it's valid
+        InputStream is = executeMethod.getResponseBodyAsStream();
+        try
+        {
+          // Parse the document.  This will cause various things to occur, within the instantiated XMLContext class.
+          // <api>
+          //  <query>
+          //    <pages>
+          //      <page pageid="27697087" ns="0" title="API" touched="2011-09-27T07:00:55Z" lastrevid="367741756" counter="" length="70" redirect="" fullurl="http://en.wikipedia.org/wiki/API" editurl="http://en.wikipedia.org/w/index.php?title=API&amp;action=edit">
+          //        <revisions>
+          //          <rev user="Graham87" timestamp="2010-06-13T08:41:17Z" comment="Protected API: restore protection ([edit=sysop] (indefinite) [move=sysop] (indefinite))" xml:space="preserve">#REDIRECT [[Application programming interface]]{{R from abbreviation}}</rev>
+          //        </revisions>
+          //      </page>
+          //    </pages>
+          //  </query>
+          //</api>
+
+          XMLStream x = new XMLStream();
+          WikiGetDocInfoAPIContext c = new WikiGetDocInfoAPIContext(x);
+          x.setContext(c);
+          try
+          {
+            try
+            {
+              x.parse(is);
+              contentFile = c.getContentFile();
+              fullURL = c.getURL();
+              title = c.getTitle();
+              author = c.getAuthor();
+              comment = c.getComment();
+              statusCode = "OK";
+            }
+            catch (IOException e)
+            {
+              long time = System.currentTimeMillis();
+              throw new ServiceInterruption(e.getMessage(),e,time + 300000L,time + 12L * 60000L,-1,false);
+            }
+          }
+          finally
+          {
+            x.cleanup();
+          }
+        }
+        finally
+        {
+          try
+          {
+            is.close();
+          }
+          catch (IllegalStateException e)
+          {
+            // Ignore this error
+          }
+        }
+      }
+      catch (Throwable e)
+      {
+        statusCode = "Exception";
+        errorMessage = e.getMessage();
+        this.exception = e;
+      }
     }
 
-    protected BaseProcessingContext createChild(String namespaceURI, String localName, String qName, Attributes atts)
+    public Throwable getException()
+    {
+      return exception;
+    }
+
+    public String getStatusCode()
+    {
+      return statusCode;
+    }
+    
+    public String getErrorMessage()
+    {
+      return errorMessage;
+    }
+    
+    public File getContentFile()
+    {
+      File rval = contentFile;
+      contentFile = null;
+      return rval;
+    }
+    
+    public String getAuthor()
+    {
+      return author;
+    }
+    
+    public String getComment()
+    {
+      return comment;
+    }
+    
+    public String getTitle()
+    {
+      return title;
+    }
+    
+    public String getFullURL()
+    {
+      return fullURL;
+    }
+    
+    public void cleanup()
+    {
+      if (contentFile != null)
+      {
+        contentFile.delete();
+        contentFile = null;
+      }
+    }
+    
+  }
+
+  /** Create a URL to obtain a page's metadata and content, given the page ID.
+  * QUESTION: Can we do multiple document identifiers at a time??
+  */
+  protected String getGetDocInfoURL(String documentIdentifier)
+    throws ManifoldCFException
+  {
+    return baseURL + "action=query&prop=revisions&pageids="+documentIdentifier+"&rvprop=user|comment|content&inprop=url";
+  }
+
+  /** Class representing the "api" context of a "get doc info" response */
+  protected static class WikiGetDocInfoAPIContext extends SingleLevelContext
+  {
+    /** Full URL */
+    protected String fullURL = null;
+    /** Title */
+    protected String title = null;
+    /** Content file */
+    protected File contentFile = null;
+    /** Author */
+    protected String author = null;
+    /** Comment */
+    protected String comment = null;
+    
+    public WikiGetDocInfoAPIContext(XMLStream theStream)
     {
-      return new WikiGetURLPagesContext(theStream,namespaceURI,localName,qName,atts);
+      super(theStream,"api");
     }
 
+    protected BaseProcessingContext createChild(String namespaceURI, String localName, String qName, Attributes atts)
+    {
+      return new WikiGetDocInfoQueryContext(theStream,namespaceURI,localName,qName,atts);
+    }
+    
     protected void finishChild(BaseProcessingContext child)
+      throws ManifoldCFException
+    {
+      WikiGetDocInfoQueryContext pc = (WikiGetDocInfoQueryContext)child;
+      tagCleanup();
+      fullURL = pc.getURL();
+      title = pc.getTitle();
+      contentFile = pc.getContentFile();
+      author = pc.getAuthor();
+      comment = pc.getComment();
+    }
+    
+    protected void tagCleanup()
+      throws ManifoldCFException
+    {
+      // Delete the contents file if it is there.
+      if (contentFile != null)
+      {
+        contentFile.delete();
+        contentFile = null;
+      }
+    }
+
+    public String getURL()
+    {
+      return fullURL;
+    }
+
+    public String getTitle()
+    {
+      return title;
+    }
+    
+    public File getContentFile()
+    {
+      File rval = contentFile;
+      contentFile = null;
+      return rval;
+    }
+    
+    public String getAuthor()
+    {
+      return author;
+    }
+    
+    public String getComment()
+    {
+      return comment;
+    }
+
+  }
+
+  /** Class representing the "api/query" context of a "get doc info" response */
+  protected static class WikiGetDocInfoQueryContext extends SingleLevelContext
+  {
+    /** Full URL */
+    protected String fullURL = null;
+    /** Title */
+    protected String title = null;
+    /** Content file */
+    protected File contentFile = null;
+    /** Author */
+    protected String author = null;
+    /** Comment */
+    protected String comment = null;
+    
+    public WikiGetDocInfoQueryContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
+    {
+      super(theStream,namespaceURI,localName,qName,atts,"query");
+    }
+
+    protected BaseProcessingContext createChild(String namespaceURI, String localName, String qName, Attributes atts)
     {
-      fullURL = ((WikiGetURLPagesContext)child).getURL();
+      return new WikiGetDocInfoPagesContext(theStream,namespaceURI,localName,qName,atts);
     }
 
-    protected void process()
+    protected void finishChild(BaseProcessingContext child)
       throws ManifoldCFException
     {
+      WikiGetDocInfoPagesContext pc = (WikiGetDocInfoPagesContext)child;
+      tagCleanup();
+      fullURL = pc.getURL();
+      title = pc.getTitle();
+      contentFile = pc.getContentFile();
+      author = pc.getAuthor();
+      comment = pc.getComment();
     }
     
+    protected void tagCleanup()
+      throws ManifoldCFException
+    {
+      // Delete the contents file if it is there.
+      if (contentFile != null)
+      {
+        contentFile.delete();
+        contentFile = null;
+      }
+    }
+
     public String getURL()
     {
       return fullURL;
     }
+
+    public String getTitle()
+    {
+      return title;
+    }
+    
+    public File getContentFile()
+    {
+      File rval = contentFile;
+      contentFile = null;
+      return rval;
+    }
+    
+    public String getAuthor()
+    {
+      return author;
+    }
+    
+    public String getComment()
+    {
+      return comment;
+    }
     
   }
 
-  /** Class representing the "api/query/pages" context of a "get url" response */
-  protected static class WikiGetURLPagesContext extends SingleLevelContext
+  /** Class representing the "api/query/pages" context of a "get doc info" response */
+  protected static class WikiGetDocInfoPagesContext extends SingleLevelContext
   {
+    /** Full URL */
     protected String fullURL = null;
+    /** Title */
+    protected String title = null;
+    /** Content file */
+    protected File contentFile = null;
+    /** Author */
+    protected String author = null;
+    /** Comment */
+    protected String comment = null;
     
-    public WikiGetURLPagesContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
+    public WikiGetDocInfoPagesContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
     {
       super(theStream,namespaceURI,localName,qName,atts,"pages");
     }
 
     protected BaseProcessingContext createChild(String namespaceURI, String localName, String qName, Attributes atts)
     {
-      return new WikiGetURLPageContext(theStream,namespaceURI,localName,qName,atts);
+      return new WikiGetDocInfoPageContext(theStream,namespaceURI,localName,qName,atts);
     }
     
     protected void finishChild(BaseProcessingContext child)
+      throws ManifoldCFException
     {
-      fullURL = ((WikiGetURLPagesContext)child).getURL();
+      WikiGetDocInfoPageContext pc = (WikiGetDocInfoPageContext)child;
+      tagCleanup();
+      fullURL = pc.getURL();
+      title = pc.getTitle();
+      contentFile = pc.getContentFile();
+      author = pc.getAuthor();
+      comment = pc.getComment();
     }
-
-    protected void process()
+    
+    protected void tagCleanup()
       throws ManifoldCFException
     {
+      // Delete the contents file if it is there.
+      if (contentFile != null)
+      {
+        contentFile.delete();
+        contentFile = null;
+      }
     }
-    
+
     public String getURL()
     {
       return fullURL;
     }
 
+    public String getTitle()
+    {
+      return title;
+    }
+    
+    public File getContentFile()
+    {
+      File rval = contentFile;
+      contentFile = null;
+      return rval;
+    }
+    
+    public String getAuthor()
+    {
+      return author;
+    }
+    
+    public String getComment()
+    {
+      return comment;
+    }
+
   }
 
-  /** Class representing the "api/query/pages/page" context of a "get url" response */
-  protected static class WikiGetURLPageContext extends BaseProcessingContext
+  /** Class representing the "api/query/pages/page" context of a "get doc info" response */
+  protected static class WikiGetDocInfoPageContext extends BaseProcessingContext
   {
+    /** Full URL */
     protected String fullURL = null;
+    /** Title */
+    protected String title = null;
+    /** Content file */
+    protected File contentFile = null;
+    /** Author */
+    protected String author = null;
+    /** Comment */
+    protected String comment = null;
     
-    public WikiGetURLPageContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
+    public WikiGetDocInfoPageContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
     {
       super(theStream,namespaceURI,localName,qName,atts);
     }
@@ -1501,43 +2256,213 @@ public class WikiConnector extends org.a
       throws ManifoldCFException, ServiceInterruption
     {
       if (qName.equals("page"))
+      {
         fullURL = atts.getValue("fullurl");
+        title = atts.getValue("title");
+        return new WikiGetDocInfoRevisionsContext(theStream,namespaceURI,localName,qName,atts);
+      }
       return super.beginTag(namespaceURI,localName,qName,atts);
     }
     
-    protected void process()
+    protected void endTag()
+      throws ManifoldCFException, ServiceInterruption
+    {
+      XMLContext theContext = theStream.getContext();
+      String theTag = theContext.getQname();
+      if (theTag.equals("page"))
+      {
+        // Pull down the data
+        WikiGetDocInfoRevisionsContext rc = (WikiGetDocInfoRevisionsContext)theContext;
+        tagCleanup();
+        contentFile = rc.getContentFile();
+        author = rc.getAuthor();
+        comment = rc.getComment();
+      }
+      super.endTag();
+    }
+
+    protected void tagCleanup()
       throws ManifoldCFException
     {
+      // Delete the contents file if it is there.
+      if (contentFile != null)
+      {
+        contentFile.delete();
+        contentFile = null;
+      }
     }
-    
+
     public String getURL()
     {
       return fullURL;
     }
+    
+    public String getTitle()
+    {
+      return title;
+    }
+    
+    public File getContentFile()
+    {
+      File rval = contentFile;
+      contentFile = null;
+      return rval;
+    }
+    
+    public String getAuthor()
+    {
+      return author;
+    }
+    
+    public String getComment()
+    {
+      return comment;
+    }
+    
   }
 
-  /** Create a URL to obtain a page's timestamp, given the page ID.
-  */
-  protected String getGetTimestampURL(String documentIdentifier)
-    throws ManifoldCFException
+  /** Class representing the "api/query/pages/page/revisions" context of a "get doc info" response */
+  protected static class WikiGetDocInfoRevisionsContext extends SingleLevelContext
   {
-    return baseURL + "action=query&prop=revisions&pageids="+documentIdentifier+"&rvprop=timestamp";
-  }
+    protected File contentFile = null;
+    protected String author = null;
+    protected String comment = null;
+    
+    public WikiGetDocInfoRevisionsContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
+    {
+      super(theStream,namespaceURI,localName,qName,atts,"revisions");
+    }
 
-  /** Create a URL to obtain a page's metadata and content, given the page ID.
-  */
-  protected String getGetDocinfoURL(String documentIdentifier)
-    throws ManifoldCFException
-  {
-    return baseURL + "action=query&prop=revisions&pageids="+documentIdentifier+"&rvprop=user|comment|content";
+    protected BaseProcessingContext createChild(String namespaceURI, String localName, String qName, Attributes atts)
+    {
+      // MHL to insure that only the FIRST revision is taken.
+      return new WikiGetDocInfoRevContext(theStream,namespaceURI,localName,qName,atts);
+    }
+
+    protected void finishChild(BaseProcessingContext child)
+      throws ManifoldCFException
+    {
+      WikiGetDocInfoRevContext rc = (WikiGetDocInfoRevContext)child;
+      tagCleanup();
+      contentFile = rc.getContentFile();
+      author = rc.getAuthor();
+      comment = rc.getComment();
+    }
+    
+    protected void tagCleanup()
+      throws ManifoldCFException
+    {
+      // Delete the contents file if it is there.
+      if (contentFile != null)
+      {
+        contentFile.delete();
+        contentFile = null;
+      }
+    }
+
+    public File getContentFile()
+    {
+      File rval = contentFile;
+      contentFile = null;
+      return rval;
+    }
+    
+    public String getAuthor()
+    {
+      return author;
+    }
+    
+    public String getComment()
+    {
+      return comment;
+    }
+    
   }
 
-  /** Get a URL for a check operation.
-  */
-  protected String getCheckURL()
-    throws ManifoldCFException
+  /** Class looking for the "api/query/pages/page/revisions/rev" context of a "get doc info" response */
+  protected static class WikiGetDocInfoRevContext extends BaseProcessingContext
   {
-    return baseURL + "action=query&list=allpages&aplimit=1";
+    protected String author = null;
+    protected String comment = null;
+    protected File contentFile = null;
+    
+    public WikiGetDocInfoRevContext(XMLStream theStream, String namespaceURI, String localName, String qName, Attributes atts)
+    {
+      super(theStream,namespaceURI,localName,qName,atts);
+    }
+
+    protected XMLContext beginTag(String namespaceURI, String localName, String qName, Attributes atts)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      if (qName.equals("rev"))
+      {
+        author = atts.getValue("user");
+        comment = atts.getValue("comment");
+        try
+        {
+          File tempFile = File.createTempFile("_wikidata_","tmp");
+          return new XMLFileContext(theStream,namespaceURI,localName,qName,atts,tempFile);
+        }
+        catch (java.net.SocketTimeoutException e)
+        {
+          throw new ManifoldCFException("IO exception creating temp file: "+e.getMessage(),e);
+        }
+        catch (InterruptedIOException e)
+        {
+          throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+        }
+        catch (IOException e)
+        {
+          throw new ManifoldCFException("IO exception creating temp file: "+e.getMessage(),e);
+        }
+      }
+      return super.beginTag(namespaceURI,localName,qName,atts);
+    }
+
+    protected void endTag()
+      throws ManifoldCFException, ServiceInterruption
+    {
+      XMLContext theContext = theStream.getContext();
+      String theTag = theContext.getQname();
+      if (theTag.equals("rev"))
+      {
+        // Pull down the data
+        XMLFileContext rc = (XMLFileContext)theContext;
+        tagCleanup();
+        contentFile = rc.getCompletedFile();
+      }
+      else
+        super.endTag();
+    }
+
+    protected void tagCleanup()
+      throws ManifoldCFException
+    {
+      // Delete the contents file if it is there.
+      if (contentFile != null)
+      {
+        contentFile.delete();
+        contentFile = null;
+      }
+    }
+    
+    public String getAuthor()
+    {
+      return author;
+    }
+    
+    public String getComment()
+    {
+      return comment;
+    }
+    
+    public File getContentFile()
+    {
+      File rval = contentFile;
+      contentFile = null;
+      return rval;
+    }
+    
   }
   
 }

Added: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/wiki/tests/Base.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/wiki/tests/Base.java?rev=1181236&view=auto
==============================================================================
--- incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/wiki/tests/Base.java (added)
+++ incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/wiki/tests/Base.java Mon Oct 10 21:03:46 2011
@@ -0,0 +1,119 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.crawler.connectors.wiki.tests;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.agents.interfaces.*;
+import org.apache.manifoldcf.crawler.interfaces.*;
+import org.apache.manifoldcf.crawler.system.ManifoldCF;
+
+import org.mortbay.jetty.Server;
+import org.mortbay.jetty.servlet.Context;
+import org.mortbay.jetty.servlet.ServletHolder;
+
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import java.io.*;
+import java.util.*;
+import org.junit.*;
+
+/** This is a testing base class that is responsible for setting up/tearing down the agents framework. */
+public class Base extends org.apache.manifoldcf.crawler.tests.ConnectorBase
+{
+  
+  protected String[] getConnectorNames()
+  {
+    return new String[]{"Wiki Connector"};
+  }
+  
+  protected String[] getConnectorClasses()
+  {
+    return new String[]{"org.apache.manifoldcf.crawler.connectors.wiki.WikiConnector"};
+  }
+
+  /** Mock wiki service */
+  static class MockWikiService
+  {
+    Server server;
+    
+    public MockWikiService()
+    {
+      server = new Server(8089);
+      Context asContext = new Context(server,"/w",Context.SESSIONS);
+      asContext.addServlet(new ServletHolder(new WikiAPIServlet()), "/api.php");
+    }
+    
+    public void start() throws Exception {
+      server.start();
+    }
+    
+    public void stop() throws Exception {
+      server.stop();
+    }
+
+    public static class WikiAPIServlet extends HttpServlet
+    {
+      @Override
+      public void service(HttpServletRequest req, HttpServletResponse res)
+          throws IOException
+      {
+        String format = req.getParameter("format");
+        if (!format.equals("xml"))
+          throw new IOException("Format parameter incorrect: "+format);
+        String list = req.getParameter("list");
+        String action = req.getParameter("action");
+        if (action == null && list == null)
+          throw new IOException("Must have either action or list");
+        if (action != null && list != null)
+          throw new IOException("Cannot have both action and list");
+        if (action != null)
+        {
+          if (!action.equals("query"))
+            throw new IOException("Action parameter incorrect: "+query);
+          String prop = req.getParameter("prop");
+          String pageIds = req.getParameter("pageids");
+          String rvprop = req.getParameter("rvprop");
+          String inprop = req.getParameter("inprop");
+          // MHL
+        }
+        else if (list != null)
+        {
+          if (!list.equals("allpages"))
+            throw new IOException("List parameter incorrect: "+list);
+          String apfrom = req.getParameter("apfrom");
+          String aplimit = req.getParameter("aplimit");
+          // MHL
+        }
+        
+        // MHL
+        String user = req.getParameter("username");
+        res.setStatus(HttpServletResponse.SC_OK);
+        if(user.equals("user1") || user.equals("user2") || user.equals("user3"))
+          res.getWriter().printf("TOKEN:token1\n");
+        if(user.equals("user2") || user.equals("user3"))
+          res.getWriter().printf("TOKEN:token2\n");
+        if(user.equals("user3"))
+          res.getWriter().printf("TOKEN:token3\n");
+      }
+    }
+  }
+
+}

Propchange: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/wiki/tests/Base.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/wiki/tests/Base.java
------------------------------------------------------------------------------
    svn:keywords = Id



Mime
View raw message