manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1178968 - in /incubator/lcf/branches/CONNECTORS-256/connectors/wiki: ./ connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/
Date Tue, 04 Oct 2011 21:47:49 GMT
Author: kwright
Date: Tue Oct  4 21:47:48 2011
New Revision: 1178968

URL: http://svn.apache.org/viewvc?rev=1178968&view=rev
Log:
Start fleshing out code that parses the necessary XML responses.

Added:
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java
  (with props)
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java
  (with props)
Modified:
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/   (props changed)
    incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java

Propchange: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Oct  4 21:47:48 2011
@@ -0,0 +1,3 @@
+build
+dist
+lib

Added: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java?rev=1178968&view=auto
==============================================================================
--- incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java
(added)
+++ incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java
Tue Oct  4 21:47:48 2011
@@ -0,0 +1,55 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.crawler.connectors.wiki;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.agents.interfaces.*;
+import org.apache.manifoldcf.crawler.interfaces.*;
+
+import org.xml.sax.Attributes;
+
+import org.apache.manifoldcf.agents.common.XMLStream;
+import org.apache.manifoldcf.agents.common.XMLContext;
+
+/** Abstract class representing an api/query context.  Create one of these
+* and pass it into the general parse for the desired response parsing behavior.
+*/
+public abstract class BaseProcessingContext extends XMLContext
+{
+  public BaseProcessingContext(XMLStream theStream)
+  {
+    super(theStream);
+  }
+    
+  protected XMLContext beginTag(String namespaceURI, String localName, String qName, Attributes
atts)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    return super.beginTag(namespaceURI,localName,qName,atts);
+  }
+    
+  protected void endTag()
+    throws ManifoldCFException, ServiceInterruption
+  {
+    super.endTag();
+  }
+    
+  /** Process this data */
+  protected abstract void process()
+    throws ManifoldCFException;
+}

Propchange: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/BaseProcessingContext.java
------------------------------------------------------------------------------
    svn:keywords = Id

Added: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java?rev=1178968&view=auto
==============================================================================
--- incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java
(added)
+++ incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java
Tue Oct  4 21:47:48 2011
@@ -0,0 +1,70 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.crawler.connectors.wiki;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.agents.interfaces.*;
+import org.apache.manifoldcf.crawler.interfaces.*;
+
+import org.xml.sax.Attributes;
+
+import org.apache.manifoldcf.agents.common.XMLStream;
+import org.apache.manifoldcf.agents.common.XMLContext;
+
+/** Abstract class representing an api/query context.  Create one of these
+* and pass it into the general parse for the desired response parsing behavior.
+*/
+public abstract class SingleLevelContext extends BaseProcessingContext
+{
+  protected String nodeName;
+  
+  public SingleLevelContext(XMLStream theStream, String nodeName)
+  {
+    super(theStream);
+    this.nodeName = nodeName;
+  }
+    
+  protected XMLContext beginTag(String namespaceURI, String localName, String qName, Attributes
atts)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    if (qName.equals(nodeName))
+      return createChild(namespaceURI,localName,qName,atts);
+    return super.beginTag(namespaceURI,localName,qName,atts);
+  }
+  
+  protected abstract BaseProcessingContext createChild(String namespaceURI, String localName,
String qName, Attributes atts);
+  
+  protected void endTag()
+    throws ManifoldCFException, ServiceInterruption
+  {
+    XMLContext theContext = theStream.getContext();
+    String theTag = theContext.getQname();
+
+    if (theTag.equals(nodeName))
+    {
+      BaseProcessingContext child = (BaseProcessingContext)theContext;
+      finishChild(child);
+    }
+    else
+      super.endTag();
+  }
+  
+  protected abstract void finishChild(BaseProcessingContext child);
+  
+}

Propchange: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/SingleLevelContext.java
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
URL: http://svn.apache.org/viewvc/incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java?rev=1178968&r1=1178967&r2=1178968&view=diff
==============================================================================
--- incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
(original)
+++ incubator/lcf/branches/CONNECTORS-256/connectors/wiki/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/wiki/WikiConnector.java
Tue Oct  4 21:47:48 2011
@@ -22,6 +22,15 @@ import org.apache.manifoldcf.core.interf
 import org.apache.manifoldcf.agents.interfaces.*;
 import org.apache.manifoldcf.crawler.interfaces.*;
 import org.apache.manifoldcf.crawler.system.Logging;
+
+import org.xml.sax.Attributes;
+
+import org.apache.manifoldcf.core.common.XMLDoc;
+import org.apache.manifoldcf.agents.common.XMLStream;
+import org.apache.manifoldcf.agents.common.XMLContext;
+import org.apache.manifoldcf.agents.common.XMLStringContext;
+import org.apache.manifoldcf.agents.common.XMLFileContext;
+
 import java.util.*;
 import java.io.*;
 
@@ -469,6 +478,164 @@ public class WikiConnector extends org.a
   {
   }
 
-  // Protected static methods
+  // Protected static classes and methods
+
+  protected static String parseGetURLResponse(InputStream is, String documentIdentifier)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    // Parse the document.  This will cause various things to occur, within the instantiated
XMLContext class.
+    XMLStream x = new XMLStream();
+    WikiGetURLAPIContext c = new WikiGetURLAPIContext(x);
+    x.setContext(c);
+    try
+    {
+      try
+      {
+        x.parse(is);
+        return c.getURL();
+      }
+      catch (IOException e)
+      {
+        long time = System.currentTimeMillis();
+        throw new ServiceInterruption(e.getMessage(),e,time + 300000L,time + 12L * 60000L,-1,false);
+      }
+      catch (ManifoldCFException e)
+      {
+        // Ignore XML parsing errors.
+        if (e.getMessage().indexOf("pars") >= 0)
+        {
+          if (Logging.connectors.isDebugEnabled())
+            Logging.connectors.debug("Wiki: getURL() document '"+documentIdentifier+"' was
unparseable ("+e.getMessage()+"), skipping");
+          return null;
+        }
+        throw e;
+      }
+    }
+    finally
+    {
+      x.cleanup();
+    }
+  }
+  
+  protected static class WikiGetURLAPIContext extends SingleLevelContext
+  {
+    protected String fullURL = null;
+    
+    public WikiGetURLAPIContext(XMLStream theStream)
+    {
+      super(theStream,"api");
+    }
+
+    protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
+    {
+      return new WikiGetURLQueryContext(theStream);
+    }
+    
+    protected void finishChild(BaseProcessingContext child)
+    {
+      fullURL = ((WikiGetURLQueryContext)child).getURL();
+    }
+
+    protected void process()
+      throws ManifoldCFException
+    {
+    }
+    
+    public String getURL()
+    {
+      return fullURL;
+    }
+
+  }
+
+  protected static class WikiGetURLQueryContext extends SingleLevelContext
+  {
+    protected String fullURL = null;
+    
+    public WikiGetURLQueryContext(XMLStream theStream)
+    {
+      super(theStream,"query");
+    }
+
+    protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
+    {
+      return new WikiGetURLPagesContext(theStream);
+    }
+
+    protected void finishChild(BaseProcessingContext child)
+    {
+      fullURL = ((WikiGetURLPagesContext)child).getURL();
+    }
+
+    protected void process()
+      throws ManifoldCFException
+    {
+    }
+    
+    public String getURL()
+    {
+      return fullURL;
+    }
+    
+  }
+
+  protected static class WikiGetURLPagesContext extends SingleLevelContext
+  {
+    protected String fullURL = null;
+    
+    public WikiGetURLPagesContext(XMLStream theStream)
+    {
+      super(theStream,"pages");
+    }
+
+    protected BaseProcessingContext createChild(String namespaceURI, String localName, String
qName, Attributes atts)
+    {
+      return new WikiGetURLPageContext(theStream);
+    }
+    
+    protected void finishChild(BaseProcessingContext child)
+    {
+      fullURL = ((WikiGetURLPagesContext)child).getURL();
+    }
+
+    protected void process()
+      throws ManifoldCFException
+    {
+    }
+    
+    public String getURL()
+    {
+      return fullURL;
+    }
+
+  }
+
+  protected static class WikiGetURLPageContext extends BaseProcessingContext
+  {
+    protected String fullURL = null;
+    
+    public WikiGetURLPageContext(XMLStream theStream)
+    {
+      super(theStream);
+    }
+
+    protected XMLContext beginTag(String namespaceURI, String localName, String qName, Attributes
atts)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      if (qName.equals("page"))
+        fullURL = atts.getValue("fullurl");
+      return super.beginTag(namespaceURI,localName,qName,atts);
+    }
+    
+    protected void process()
+      throws ManifoldCFException
+    {
+    }
+    
+    public String getURL()
+    {
+      return fullURL;
+    }
+  }
 
 }



Mime
View raw message