jakarta-taglibs-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From catl...@apache.org
Subject cvs commit: jakarta-taglibs/scrape/xml scrape.xml
Date Fri, 09 Nov 2001 23:33:34 GMT
catlett     01/11/09 15:33:34

  Added:       scrape/xml scrape.xml
  Log:
  the scrape tag library is converted over to the new build
  
  Revision  Changes    Path
  1.1                  jakarta-taglibs/scrape/xml/scrape.xml
  
  Index: scrape.xml
  ===================================================================
  <?xml version="1.0" ?>
  
  <!-- Change all instances of session with the jakarta-taglib 
       name for this tag library.
  
       Change all instances of Scrape with the name to use
       for things such as titles in the tag library documentation.
  
       Change all instances of {your-name} with your name for
       for things such as author name in the tag library documentation.
  
       Change <prefix>foo</prefix> below to a short prefix for this
       tag library.
       -->
  
  <document url="./scrape.xml">
  
  <!-- More properties can be added.  Good place to stick loose
       data needed elsewhere. -->
  <properties>
    <!-- The title here overrides the title generated by the 
         stylesheet for the documentation HTML <title> tag
    <title>Jakarta Project: Scrape JSP Tag Library</title>
         -->
    <!-- The name here is used in the HTML <meta name="author"...> tag -->
    <author>Rich Catlett</author>
  </properties>
  
  <!-- The following defines elements uses both to create the taglib
       documentation and the tag library descriptor .tld file.  The
       elements used are those from the JSP 1.2 TLD DTD and special
       elements used when creating the tagib documentation.  Although
       the JSP1.2 TLD DTD is used, this document can be used to
       generate both a JSP 1.1 and a JSP 1.2 TLD.
       -->
  <taglib>
    <!-- The following elements are from the JSP 1.2 TLD DTD -->
    <!-- Version number of this tagib -->
    <tlib-version>1.0</tlib-version>
    <!-- Minimum version of JSP spec required -->
    <jsp-version>1.1</jsp-version>
    <!-- jakarta-taglib name of this tag library -->
    <short-name>scrape</short-name>
    <!-- URI of taglib -->
    <uri>http://jakarta.apache.org/taglibs/scrape-1.0</uri>
    <!-- The name to use in titles, etc. for the taglib -->
    <display-name>Scrape Tag library (Pre Beta)</display-name>
  
    <description>
    <P>The scrape tag library can scrape or extract content from web
       documents and display the content in your JSP. For example, 
       you could scrape stock quotes from other web sites 
       and display them in your pages.</P>
  
    <P>After your JSP scrapes a document for the first time, 
       the results of the scrape are cached for subsequent JSP requests. 
       These results are returned unless the JSP determines that 
       the document must be rescraped. Rescraping is determined by the following 
       logic:</P>
  
    <OL>
      <LI>The status of the scrape tags and attributes in the JSP is examined. Any
          modifications to the tags or attributes trigger a rescrape. If the tags 
          have not been modified, the JSP proceeds to step 2.</LI> 
      <LI>The minimum time for rescraping, specified by the time attribute of 
          the page tag, is examined. The default time is 10 minutes. 
          If this time has not passed since the last scrape, cached results are 
          returned.  If this time has passed, the JSP proceeds to step 3.</LI>
      <LI>The expired header of the scraped document is examined. If the
          expiration date/time has not passed, cached results are returned. If the
  	expiration date/time is not specified or the document has expired, the
  	JSP proceeds to step 4.</LI>
      <LI>The headers for the scraped document are requested and examined. If the 
          document has not been modified since the last scrape, cached results 
          are returned. If the document has been modified, it is rescraped and 
          the new results are returned.</LI>
    </OL>
    </description> 
  
    <!-- The taglib-location is used to fill in the web.xml configuration
         information in the HTML doc. -->
    <taglib-location>/WEB-INF/scrape.tld</taglib-location>
  
    <!-- The prefix is used to fill in the taglib directive
         configuration information in the HTML doc. -->
    <prefix>scrp</prefix>
  
    <!-- This element must be straight text and is copied right into
         the "Requirements" section of the HTML doc. -->
    <requirements-info>
      This custom tag library requires a servlet container that supports the 
      JavaServer Pages Specification, version 1.1 or higher.  It also requires an
      up-to-date version of the jakarta-oro package.
    </requirements-info>
  
    <tagtoc name="Scrape Tags">
      <tag>
        <name>page</name>
        <tag-class>org.apache.taglibs.scrape.PageTag</tag-class>
        <body-content>JSP</body-content>
        <display-name>page</display-name>
  
        <description>
          Specify the URL of the document to be scraped and the minimum time that
  	must pass before the document is rescraped.
        </description>
    
        <summary>Specify the URL of the document to be scraped and the minimum 
  	time that must pass before the document is rescraped.</summary>
        <availability>1.0</availability>
        <restrictions>None</restrictions>
  
        <attribute>
          <name>url</name>
          <required>no</required>
          <rtexprvalue>no</rtexprvalue>
          <description>The fully qualified URL of the document that is to
            be scraped, such as: <br/><br/>
            http://<i>domain.name/directory/document.html</i><br/><br/>
            Note that if you must dynamically generate the URL, perhaps via 
            a set of tags from a different tag library, you can omit the url 
            attribute in the page tag and instead use the url tag.
  	</description>
          <availability>1.0</availability>
        </attribute>
  
        <attribute>
          <name>time</name>
          <required>no</required>
          <rtexprvalue>no</rtexprvalue>
          <description>The length of time the JSP waits before attempting
            to rescrape the document. The value of time is specified in minutes. 
            The minimum value is 10 minutes. Note that the minimum value is used 
  	  if a time attribute is not specified.
  	</description>
          <availability>1.0</availability>
        </attribute>
          
        <example>
          <usage>
            <comment>
              Specify a document to be scraped with a rescrape time of 20 minutes.
  	    Note that a scrape tag must be nested within the body of the page tag.
            </comment>
            <code>
  <![CDATA[ 
  <scrp:page url="http://finance.yahoo.com/q?s=SUNW" time="20">
     <scrp:scrape id="qt" begin="<table border=1" end="</table>" anchors="true"/>
  </scrp:page>
  ]]>       
            </code>  
          </usage>  
        </example>
            
      </tag>
  
      <tag>
        <name>url</name>
        <tag-class>org.apache.taglibs.scrape.UrlTag</tag-class>
        <body-content>JSP</body-content>
        <display-name>url</display-name>
  
        <description>
           Specify the URL of the document that contains the content to be 
  	 scraped. Use this tag as an alternate to the page tag's url attribute 
  	 when the URL must be generated dynamically.
        </description>
        
        <summary>
  	 Specify the URL of the document that contains the content to be 
  	 scraped. Use this tag as an alternate to the page tag's url attribute 
  	 when the URL must be generated dynamically.
        </summary>
        <availability>1.0</availability> 
        <restrictions>Must be nested within a page tag.</restrictions>
  
        <example>
          <usage>
            <comment>
              Specify a document to be scraped
    	    Note that a url tag must be nested within the body of the page tag
            </comment>
            <code>   
  <![CDATA[      
  <scrp:page>
     <scrp:url>http://finance.yahoo.com/q?s=SUNW</scrp:url>
     <scrp:scrape id="qt" begin="<table border=1" end="</table>" anchors="true"/>
  </scrp:page>
  ]]>               
            </code>
  	  <comment>
  	      It is possible to use another tag set nested within the url tag to
  	      dynamically generate the URL.
  	  </comment>
          </usage>
        </example>
  
      </tag>
  
      <tag>
        <name>scrape</name>
        <tag-class>org.apache.taglibs.scrape.ScrapeTag</tag-class>
        <body-content>JSP</body-content>
        <display-name>scrape</display-name>
    
        <description>
           Specify the text anchors that mark the beginning and end of the content
  	 to be scraped.
        </description>
  
        <summary>
           Specify the text anchors that mark the beginning and end of the content
  	 to be scraped.
        </summary> 
        <availability>1.0</availability>                         
        <restrictions>Must be nested within a page tag</restrictions>
  
        <variable>
  	<name-from-attribute>id</name-from-attribute>
  	<variable-class>java.lang.String</variable-class>
  	<declare>true</declare>
  	<scope>AT_BEGIN</scope>
  	<description>Name used to retrieve the scrape later in the page.
  	</description>
  	<availability>1.0</availability>
        </variable>	
  
        <attribute>
          <name>id</name>
          <required>yes</required>
          <rtexprvalue>no</rtexprvalue>
          <description>
  	   A unique identifier that distinguishes this scrape from all others. 
  	   Each scrape is unique and accessible only by this id.
  	</description>
          <availability>1.0</availability>
        </attribute>
  
        <attribute>
          <name>begin</name>
          <required>yes</required>
          <rtexprvalue>no</rtexprvalue>
          <description>
  	   The text anchor that marks the beginning of the content to be scraped
  	   from the document.
  	</description>
          <availability>1.0</availability>
        </attribute>
  
        <attribute>
          <name>end</name>
          <required>yes</required>
          <rtexprvalue>no</rtexprvalue>
          <description>
  	   The text anchor that marks the end of the content to be scraped from 
  	   the document.
  	</description>
          <availability>1.0</availability>
        </attribute>
  
        <attribute>
          <name>strip</name>
          <required>no</required>
          <rtexprvalue>no</rtexprvalue>
          <description>
  	   If strip is set to true, the output from the result tag is stripped 
  	   of HTML, XML, DHTML, etc. tags. That is, nothing within &lt; &gt; 
  	   will be included in the scrape result. The default value is false. 
  	   Note that strip can be used in conjunction with the anchors 
  	   attribute.
  	</description>
          <availability>1.0</availability>
        </attribute>
  
        <attribute>
          <name>anchors</name>
          <required>no</required>
          <rtexprvalue>no</rtexprvalue>
          <description>
  	   If anchors is set to true, the begin and end text anchors are 
  	   included in the scrape result. The default value is false. Note that 
  	   anchors can be used in conjunction with the strip attribute.
  	</description>
          <availability>1.0</availability>
        </attribute>
  
        <example>                        
          <usage>
            <comment>
              Set a scrape on a page with anchors included.
  	    Note that the page tag is first and the scrape tag is nested.
            </comment>
            <code>   
  <![CDATA[         
  <scrp:page url="http://finance.yahoo.com/q?s=SUNW" time="20">
     <scrp:scrape id="qt" begin="<table border=1" end="</table>" anchors="true"
/>
  </scrp:page>
  ]]>
            </code>
  	  <comment>
  	    Set a scrape on a page with results set to have no tags.
  	  </comment>
  	  <code>
  <![CDATA[
  <scrp:page url="http://finance.yahoo.com/q?s=SUNW" time="20">
     <scrp:scrape id="qt" begin="<table border=1" end="</table>" strip="true"/>
  </scrp:page>
  ]]>
  	  </code>
          </usage>
        </example>
      
      </tag>
  
      <tag>
        <name>result</name>
        <tag-class>org.apache.taglibs.scrape.ResultTag</tag-class>
        <body-content>Empty</body-content>
        <display-name>result</display-name>
  
        <description>
           Retrieve the content from a scrape.
        </description>
        
        <summary>Retrieve the content from a scrape.</summary>
        <availability>1.0</availability> 
        <restrictions>None</restrictions>
  
        <example>
          <usage>
            <comment>
              Get the results of a previously performed scrape.
            </comment>
            <code>   
  <![CDATA[      
  <scrp:result scrape="qt"/>
  ]]>               
            </code>
          </usage>
        </example>
  
      </tag>
  
    </tagtoc>
  
  </taglib>
  
  <revision release="Pre Beta" date="07/22/2001">
    <description>
      Clean up of tag library prior to performing a beta
      release, moving toward an official release.  Changing tag library over to 
      the new build.
    </description>
  </revision>
         
  <revision release="Development" date="02/26/2001"> 
    <description>
      Initial version of tag library before Jakarta-Taglibs
      had an official release policy for tag libraries.
    </description>
  </revision>
  
  </document>
  
  
  

--
To unsubscribe, e-mail:   <mailto:taglibs-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:taglibs-dev-help@jakarta.apache.org>


Mime
View raw message