incubator-droids-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From thors...@apache.org
Subject svn commit: r939662 - /incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java
Date Fri, 30 Apr 2010 13:30:27 GMT
Author: thorsten
Date: Fri Apr 30 13:30:27 2010
New Revision: 939662

URL: http://svn.apache.org/viewvc?rev=939662&view=rev
Log:
DROIDS-74
Reporter: Richard Frovarp
Patch: Richard Frovarp
review: thorsten

Modified:
    incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java

Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java?rev=939662&r1=939661&r2=939662&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java
(original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java
Fri Apr 30 13:30:27 2010
@@ -61,7 +61,7 @@ public class LinkExtractor extends Defau
   /**
    * List of links
    */
-  private Collection<Link> links = new ArrayList<Link>();
+  private ArrayList<Link> links = new ArrayList<Link>();
 
   /**
    * Set of URIs visited yet
@@ -83,6 +83,11 @@ public class LinkExtractor extends Defau
    */
   private URI link = null;
 
+  /**
+   * Anchor text
+   */
+  private StringBuilder anchorText = new StringBuilder();
+
   public LinkExtractor(Link base, Map<String, String> elements) {
     super();
     this.base = base;
@@ -121,11 +126,30 @@ public class LinkExtractor extends Defau
         if (link != null) {
         	addOutlinkURI(link.toString());
         	link = null;
+                anchorText = new StringBuilder();
         }
       }
     }
   }
 
+  @Override 
+  public void characters(char[] ch, int start, int length) {
+    anchorText.append(ch, start, length);
+  }
+  
+  @Override
+  public void endElement(String uri, String loc, String raw) {
+    Iterator<String> it = elements.keySet().iterator();
+    String elem;
+    while (it.hasNext()) {
+      elem = it.next();
+      if (elem.equalsIgnoreCase(loc)) {
+        addAnchorText(anchorText.toString());
+      }
+    }
+  }
+
+
   @Override
   public void endDocument() throws SAXException 
   {
@@ -134,6 +158,19 @@ public class LinkExtractor extends Defau
   }
 
   /**
+   * Setting Anchor text of last added anchor
+   * @param anchorText Text to be added
+   */
+  private void addAnchorText(String anchorText) {
+    if(links.size() > 0) {
+      LinkTask l = (LinkTask) links.get(links.size() - 1);
+      l.setAnchorText(anchorText.replaceAll("\\s+", " ").trim());
+      log.debug("Adding anchor: " + l.getAnchorText() + " on link: " + l);
+    } 
+  }
+
+
+  /**
    * Add the outlink to the {@code links} list if the value is a valid URI.
    * @param value the outlink.
    */



Mime
View raw message