ant-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Dominique Devienne" <ddevie...@gmail.com>
Subject Re: Ant task walk html and find broken links
Date Wed, 14 May 2008 22:02:35 GMT
On Wed, May 14, 2008 at 4:00 PM, gregsmit <gregsmit@us.ibm.com> wrote:
>  Does anyone know of an Ant task that I could use to walk through a website
>  (that I built with ant) to confirm that there are no broken links?  I found
>  one really old project on sourceforge, but it looks pretty abandoned.

I wrote one a long time ago based on NekoHTML to do the HTML parsing,
because all the ones I could find were online only, and thus checked
public internet links only. I only made mine verify the link fragments
(#id) could be found in the link target (I was checking documentation
cross-references).

Unless Canoo, it doesn't attempt to process javascript. Mine was
simple minded and looking only at <a href>, <link href>, and <img
src>, and at filters to avoid checking links based on patterns (to
restrict checking local relative links for example, and skip http:
links).

This code is old, and hasn't been compiled or run in ages, but
apparently I unit tested it, so might still be useful ;-) I'm happy to
share the code (although it uses a few utility classes, so not easy to
extract the relevant pieces).

That's assuming Canoo is not a good fit here. My stuff probably pales
in comparison, but I'm throwing it out there just in case it might be
useful.

--DD

/**
 * Checks an HTML page for bad links.
 * <p>
 * Uses <a href="http://www.apache.org/~andyc/neko/doc/html/">NekoHTML</a>,
 * but could also use <a href="http://jtidy.sourceforge.net/">JTidy</a> I guess.
 * <p>
 * Current limitations:
 * <ul>
 *   <li>Cannot indicate line/column of the bad link</li>
 *   <li>Does not support re-baseing of document</li>
 *   <li>Does not check URL in stylesheets</li>
 *   <li>Slow!?</li>
 * </ul>
 *
 * @version May 2004
 */
public class HtmlLinkChecker  extends ConditionalAspect.AbstractTask { ... }


<?xml version="1.0"?>

<project name="HtmlLinkCheckerTest" default="tearDown"
         xmlns:bm="antlib:buildmagic">

  <target name="setUp">
    <property name="tmp" location="${basedir}/${ant.project.name}.tmp" />
    <mkdir dir="${tmp}" />
  </target>

  <target name="tearDown">
    <delete dir="${tmp}" />
  </target>

  <!-- Creates a few dummy HTML files, which by default have no bad links.
       Just override one of the property to force some kind of bad link. -->
  <target name="setUpFiles" depends="setUp">
    <property name="google.link"   value="http://www.google.com" />
    <property name="logo.file"     value="logo.gif" />
    <property name="bullet.file"   value="bullet.gif" />
    <property name="style.file"    value="style.css" />
    <property name="book.file"     value="book.html" />
    <property name="chapter1.file" value="chapter1.html" />
    <property name="section1.id"   value="section1" />
    <property name="sectionA.id"   value="sectionA" />
    <property name="coucou.id"     value="coucou" />

    <echo file="${tmp}/logo.gif">I am a logo!</echo>
    <echo file="${tmp}/bullet.gif">I am a bullet!</echo>
    <echo file="${tmp}/style.css">
      p  { color: #000000 }
      ul { list-style: url(${bullet.file}) }
    </echo>
    <echo file="${tmp}/book.html"><![CDATA[
      <html>
        <body>
          <a href="${google.link}">Search:</a>
          <p id="coucou">coucou</p>
          <a   href="${chapter1.file}">Chapter 1</a>
            <a href="${chapter1.file}#${section1.id}">Section 1</a>
            <a href="${chapter1.file}#section2">Section 1</a>
          <a href="chapter2.html">Chapter 2</a>
        </body>
      </html>
    ]]></echo>

    <echo file="${tmp}/chapter1.html"><![CDATA[
      <html>
        <head>
          <link href="${style.file}" rel="stylesheet">
        </head>
        <body>
          <h2 id="section1">Section #1</h2>
          <h2 id="section2">Section #2</h2>
          <a href="book.html#${coucou.id}">Book Index</a>
        </body>
      </html>
    ]]></echo>

    <echo file="${tmp}/chapter2.html"><![CDATA[
      <html>
        <head>
          <link href="${style.file}" rel="stylesheet">
        </head>
        <body>
          <img src="${logo.file}">
          See <a href="#${sectionA.id}">Section A</a>
          <h2 id="sectionA">Section A</h2>
          <h2 id="sectionB">Section B</h2>
          <a href="${book.file}">Book Index</a>
        </body>
      </html>
    ]]></echo>
  </target>

  <target name="test-generic" depends="setUpFiles">
    <bm:checklinks verbose="true">
      <bm:fileset dir="${tmp}" includes="*.html" />
    </bm:checklinks>
  </target>

  <target name="test-patterns" depends="setUpFiles">
    <bm:checklinks verbose="false">
      <bm:fileset dir="${tmp}" includes="*.html" />

      <bm:linkpatterns>
        <bm:include regexp=".*/images/.*" ifTrue="${+imgs}" />

        <bm:exclude prefix="chapterOne.html" ifTrue="${-chap1}" />
        <bm:exclude regexp=".*#.*" ifTrue="${-frag}" />
        <bm:exclude prefix="http:" ifTrue="${-http}" />
      </bm:linkpatterns>
    </bm:checklinks>
  </target>

</project>

public class HtmlLinkCheckerTest
             extends BuildFileTestCase {

    /**
     * Tests all the links are OK.
     * Note that it doesn't tell us if some links are not checked...
     * Note also that it requires an internet connection to go to Google.
     */
    public void testGoodLinks() {
        executeTarget("test-generic");
    }

    public void testBadExternalHttpLink() {
        setProperty("google.link", "http://zzz.google.com");
        expectSpecificBuildException("test-generic", "bad external http link",
                                     "1 bad link(s)");
        assertBadLink("http://zzz.google.com");
    }

    public void testBadInternalFileLink() {
        setProperty("google.link", "book.html");
        setProperty("chapter1.file", "chapterOne.html");
        expectSpecificBuildException("test-generic", "bad internal file link",
                                     "3 bad link(s)");
        assertBadLink("chapterOne.html");
        assertBadLink("chapterOne.html#section1");
        assertBadLink("chapterOne.html#section2");
    }

    public void testBadInternalFileFragment() {
        setProperty("google.link", "book.html");
        setProperty("section1.id", "sectionOne");
        expectSpecificBuildException("test-generic", "bad internal file frag",
                                     "1 bad link(s)");
        assertBadLink("chapter1.html#sectionOne");
    }

    public void testBadSelfFragment() {
        setProperty("google.link", "book.html");
        setProperty("sectionA.id", "sectionABC");
        expectSpecificBuildException("test-generic", "bad self frag",
                                     "1 bad link(s)");
        assertBadLink("#sectionABC");
    }

    public void testBadHeadLink() {
        setProperty("google.link", "book.html");
        setProperty("style.file", "stylesheet.CSS");
        expectSpecificBuildException("test-generic", "bad head link",
                                     "1 bad link(s)");
        assertBadLink("stylesheet.CSS");
    }

    public void testBadUrlInCss() {
        setProperty("google.link", "book.html");
        setProperty("bullet.file", "square.gif");
        try {
            expectSpecificBuildException("test-generic", "bad url in css",
                                         "1 bad link(s)");
            assertBadLink("square.gif");
        }
        catch (junit.framework.AssertionFailedError e) {
            // TODO: implement CSS link checks
        }
    }

    public void testBadImage() {
        setProperty("google.link", "book.html");
        setProperty("logo.file", "logo.jpg");
        expectSpecificBuildException("test-generic", "bad image",
                                     "1 bad link(s)");
        assertBadLink("logo.jpg");

        //System.out.println(getLog());
        //System.out.println(getOutput());
        //System.out.println(getFullLog());
        //System.err.println(getError());
    }

    public void testIgnoreBadInternalFileLink() {
        setProperty("google.link", "book.html");
        setProperty("chapter1.file", "chapterOne.html");

        setProperty("-chap1", "true");

        executeTarget("test-patterns");
    }

    public void testIgnoreBadExternalHttpLink() {
        setProperty("-http", "true");
        setProperty("google.link", "http://zzz.google.com");
        executeTarget("test-patterns");
    }

    public void testIgnoreBadFragments() {
        setProperty("-frag", "true");
        setProperty("google.link", "book.html");
        setProperty("section1.id", "sectionOne");
        setProperty("sectionA.id", "sectionABC");
        executeTarget("test-patterns");
    }

    public void testCheckImagesOnly() {
        setProperty("+imgs", "true");
        setProperty("google.link", "book.html");

        // Creates a few broken links, to be ignored (since not checked)
        setProperty("section1.id", "sectionOne");
        setProperty("sectionA.id", "sectionABC");
        setProperty("chapter1.file", "chapterOne.html");

        executeTarget("test-patterns");
    }

    private void setProperty(String name, String value) {
        getProject().setNewProperty(name, value);
    }

    private void assertBadLink(String link) {
        assertTrue(getLog().indexOf(": " + link + ":") > -1);
    }

} // END class HtmlLinkCheckerTest

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@ant.apache.org
For additional commands, e-mail: user-help@ant.apache.org


Mime
View raw message