manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r1423678 - /manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Date Tue, 18 Dec 2012 21:38:28 GMT
Author: kwright
Date: Tue Dec 18 21:38:27 2012
New Revision: 1423678

URL: http://svn.apache.org/viewvc?rev=1423678&view=rev
Log:
Move fix for CONNECTORS-589 to web connector also

Modified:
    manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java

Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java?rev=1423678&r1=1423677&r2=1423678&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
(original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
Tue Dec 18 21:38:27 2012
@@ -6557,7 +6557,7 @@ public class WebcrawlerConnector extends
 
   protected class FeedItemContextClass extends XMLContext
   {
-    protected String linkField = null;
+    protected List<String> linkField = new ArrayList<String>();
 
     public FeedItemContextClass(XMLStream theStream, String namespaceURI, String localName,
String qName, Attributes atts)
     {
@@ -6571,7 +6571,9 @@ public class WebcrawlerConnector extends
       if (qName.equals("link"))
       {
         // "link" tag
-        linkField = atts.getValue("href");
+        String ref = atts.getValue("href");
+        if (ref != null && ref.length() > 0)
+          linkField.add(ref);
         return super.beginTag(namespaceURI,localName,qName,atts);
       }
       else
@@ -6585,15 +6587,18 @@ public class WebcrawlerConnector extends
     public void process(IXMLHandler handler)
       throws ManifoldCFException
     {
-      if (linkField != null && linkField.length() > 0)
+      if (linkField.size() > 0)
       {
-        String[] links = linkField.split(", ");
-        int l = 0;
-        while (l < links.length)
+        for (String linkValue : linkField)
         {
-          String rawURL = links[l++].trim();
-          // Process the link
-          handler.noteDiscoveredLink(rawURL);
+          String[] links = linkValue.split(", ");
+          int l = 0;
+          while (l < links.length)
+          {
+            String rawURL = links[l++].trim();
+            // Process the link
+            handler.noteDiscoveredLink(rawURL);
+          }
         }
       }
     }



Mime
View raw message