commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bur...@apache.org
Subject svn commit: r149303 - in jakarta/commons/sandbox/feedparser/trunk: TODO project.properties project.xml src/java/org/apache/commons/feedparser/RSSFeedParser.java src/java/org/apache/commons/feedparser/example/HelloFeedParser.java xdocs/index.xml
Date Mon, 31 Jan 2005 21:29:07 GMT
Author: burton
Date: Mon Jan 31 13:29:04 2005
New Revision: 149303

URL: http://svn.apache.org/viewcvs?view=rev&rev=149303
Log:
brads email added

Modified:
    jakarta/commons/sandbox/feedparser/trunk/TODO
    jakarta/commons/sandbox/feedparser/trunk/project.properties
    jakarta/commons/sandbox/feedparser/trunk/project.xml
    jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/RSSFeedParser.java
    jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/example/HelloFeedParser.java
    jakarta/commons/sandbox/feedparser/trunk/xdocs/index.xml

Modified: jakarta/commons/sandbox/feedparser/trunk/TODO
URL: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/TODO?view=diff&r1=149302&r2=149303
==============================================================================
--- jakarta/commons/sandbox/feedparser/trunk/TODO (original)
+++ jakarta/commons/sandbox/feedparser/trunk/TODO Mon Jan 31 13:29:04 2005
@@ -55,7 +55,10 @@
     - Do we support multiple content items in Atom?
 
 
-- Where do we store global options like USER_AGENT, STRICT_PARSING, and STRICT_SPEC
+- Where do we store global options like USER_AGENT, STRICT_PARSING, and
+  STRICT_SPECw
+
+
 
 
 - Unit tests don't need to be so LOUD!!
@@ -66,6 +69,16 @@
     - Atom content
     - Atom summary
     - xml:base expansion
+
+    - http://www.intertwingly.net/stories/2004/04/04/title.html
+
+
+- http://feedparser.org/docs/html-sanitization.html#advanced.sanitization.why
+
+    Hm...
+
+
+- FIXME: documentation on using HTTP authentication
 
 - BUG:
 

Modified: jakarta/commons/sandbox/feedparser/trunk/project.properties
URL: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/project.properties?view=diff&r1=149302&r2=149303
==============================================================================
--- jakarta/commons/sandbox/feedparser/trunk/project.properties (original)
+++ jakarta/commons/sandbox/feedparser/trunk/project.properties Mon Jan 31 13:29:04 2005
@@ -25,3 +25,5 @@
 org.xml.sax.driver=org.apache.xerces.parsers.SAXParser
 
 clover.excludes=**/Test*.java
+
+maven.username=burton
\ No newline at end of file

Modified: jakarta/commons/sandbox/feedparser/trunk/project.xml
URL: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/project.xml?view=diff&r1=149302&r2=149303
==============================================================================
--- jakarta/commons/sandbox/feedparser/trunk/project.xml (original)
+++ jakarta/commons/sandbox/feedparser/trunk/project.xml Mon Jan 31 13:29:04 2005
@@ -42,7 +42,7 @@
             <name>Brad Neuberg</name>
             
             <id></id>
-            <email></email>
+            <email>bkn3@columbia.edu</email>
             <organization>Rojo Networks Inc.</organization>
 
         </developer>

Modified: jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/RSSFeedParser.java
URL: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/RSSFeedParser.java?view=diff&r1=149302&r2=149303
==============================================================================
--- jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/RSSFeedParser.java
(original)
+++ jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/RSSFeedParser.java
Mon Jan 31 13:29:04 2005
@@ -51,7 +51,10 @@
         FeedParserState state = new FeedParserState( listener );
 
         FeedVersion v = new FeedVersion();
+
         v.isRSS = true;
+        v.version = doc.getRootElement().getAttributeValue( "version" );
+        
         listener.onFeedVersion( v );
 
         listener.init();

Modified: jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/example/HelloFeedParser.java
URL: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/example/HelloFeedParser.java?view=diff&r1=149302&r2=149303
==============================================================================
--- jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/example/HelloFeedParser.java
(original)
+++ jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/example/HelloFeedParser.java
Mon Jan 31 13:29:04 2005
@@ -18,6 +18,7 @@
 
 import org.apache.commons.feedparser.*;
 import org.apache.commons.feedparser.impl.*;
+import org.apache.commons.feedparser.network.*;
 
 import java.io.*;
 import java.net.*;
@@ -33,8 +34,10 @@
 
     public static void main( String[] args ) throws Exception {
 
+        //create a new FeedParser...
         FeedParser parser = FeedParserFactory.newFeedParser();
 
+        //create a listener for handling our callbacks
         FeedParserListener listener = new DefaultFeedParserListener() {
 
                 public void onItem( FeedParserState state,
@@ -43,19 +46,23 @@
                                     String description,
                                     String permalink ) throws FeedParserException {
 
-
                     System.out.println( "Found a new published article: " + permalink );
                     
                 }
 
-                
             };
 
+        //specify the feed we want to fetch
         String resource = "http://peerfear.org/rss/index.rss";
-        
-        InputStream is = new URL( resource ).openStream();
-        
-        parser.parse( listener, is, resource );    
+
+        //use the FeedParser network IO package to fetch our resource URL
+        ResourceRequest request = ResourceRequestFactory.getResourceRequest( resource );
+
+        //grab our input stream
+        InputStream is = request.getInputStream();
+
+        //start parsing our feed and have the above onItem methods called
+        parser.parse( listener, is, resource );
 
     }
 

Modified: jakarta/commons/sandbox/feedparser/trunk/xdocs/index.xml
URL: http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/xdocs/index.xml?view=diff&r1=149302&r2=149303
==============================================================================
--- jakarta/commons/sandbox/feedparser/trunk/xdocs/index.xml (original)
+++ jakarta/commons/sandbox/feedparser/trunk/xdocs/index.xml Mon Jan 31 13:29:04 2005
@@ -18,7 +18,8 @@
             </p>
 
             <p>
-                FeedParser was the parser API designed from <a
+                FeedParser was the parser API designed by <a
+                href="http://peerfear.org">Kevin Burton</a> for <a
                 href="http://newsmonster.org">NewsMonster</a> and has been
                 donated to the ASF in order to continue development.
             </p>
@@ -26,30 +27,33 @@
             <p>
                 FeedParser differs from most other RSS/Atom parsers in that it
                 is not DOM based but event based (similar to SAX).  Instead of
-                the low level startElement() API present in SAX, we provide high
-                level events based on RSS parsing information.
+                the low level startElement() API present in SAX, we provide
+                higher level events based on feed parsing information.
             </p>
 
             <p>
                 Events are also given to the caller independent of the
-                underlying format.  This is accomplished by a Feed Event Model
+                underlying format.  This is accomplished with a Feed Event Model
                 that isolates your application from the underlying feed format.
-                This enables your applications to transparently support all RSS
-                versions and Atom.  We also hide format specific implementation
-                such as dates (RFC 822 in RSS 2.0 and 0.9x and ISO 8601 in RSS
-                1.0 and Atom).
+                This enables transparent support for all RSS versions including
+                Atom.  We also hide format specific implementation such as dates
+                (RFC 822 in RSS 2.0 and 0.9x and ISO 8601 in RSS 1.0 and Atom)
+                and other metadata.
             </p>
 
             <p>
                 The FeedParser distribution also includes:
-
             </p>
 
             <ol>
                 <li>An implementation of RSS and Atom autodiscovery.</li>
 
-                <li>Support for all content modules including xhtml:body,
-                    mod_content (RDF and inline), and atom:content </li>
+                <li>
+
+                    Support for all content modules including xhtml:body,
+                    mod_content (RDF and inline), atom:content, and atom:summary
+
+                </li>
 
                 <li>
                     Atom 1.0 link API as well as RSS 1.0 mod_link API
@@ -57,7 +61,8 @@
 
                 <li>
                     An HTML link parser for finding all links in an HTML source
-                    file and expanding them to become full
+                    file and expanding them to become full URLs instead of
+                    relative.
                 </li>
 
             </ol>
@@ -75,8 +80,27 @@
                 would otherwise fail.
             </p>
 
+            <p>
+                Feed location within FeedParser is simple.  Simply pass a URL to
+                <a href="apidocs/org/apache/commons/feedparser/locate/FeedLocator.html">
+                    FeedLocator
+                </a> which will parse your HTML for your weblog and return all
+                references to feeds with a
+
+                <a href="apidocs/org/apache/commons/feedparser/FeedList.html">
+                    FeedList
+                </a>
+            </p>
+
         </section>
 
+        <!-- 
+
+             Feed Type and Version Detection
+             Relative Link Expansion
+
+             -->
+
         <section name="Liberal Parsing">
 
             <p>
@@ -104,6 +128,14 @@
                 <dt>RSS 0.91</dt>
                 <dt>RSS 0.92</dt>
 
+                <!--
+                http://www.purplepages.ie/RSS/netscape/rss0.90.html
+                http://my.netscape.com/publish/formats/rss-spec-0.91.html
+                http://purl.org/rss/1.0/
+                http://backend.userland.com/rss092
+                http://backend.userland.com/rss093
+                     -->
+
                 <dt>
 
                     <a href="http://feedvalidator.org/docs/rss2.html">
@@ -196,9 +228,16 @@
             </source>
 
             <p>
-                This is a trivial example from the HelloFeedParser demo
-                distributed with the app.  Other events such as onChannel,
-                onImage, onLink can be used to obtain additional metadata.
+
+                This is a trivial example from the 
+
+                <a href="xref/org/apache/commons/feedparser/example/HelloFeedParser.html">
+                    HelloFeedParser 
+                </a>
+
+                demo distributed within FeedParser.  Other events such as
+                onChannel, onImage, onLink can be used to obtain additional
+                metadata.
             </p>
 
             <p>
@@ -206,7 +245,11 @@
                 the future as well as support for additional namespaces.  For
                 example the RSS 1.0, 2.0, and Atom specification all support
                 different date mechanisms.  The FeedParser simply passes
-                onCreated, onIssued methods via the MetaFeedParserListener
+                onCreated, onIssued methods via the 
+
+                <a href="apidocs/org/apache/commons/feedparser/MetaFeedParserListener.html">
+                    MetaFeedParserListener
+                </a>
                 interface.
             </p>
 
@@ -234,7 +277,12 @@
             </p>
 
             <p>
-                The FeedParser includes a generic ContentFeedParserListener
+                The FeedParser includes a generic 
+
+                <a href="apidocs/org/apache/commons/feedparser/ContentFeedParserListener.html">
+                    ContentFeedParserListener
+                </a>
+
                 which allows you to intercept all content markup from all RSS
                 formats including Atom.
             </p>
@@ -300,6 +348,14 @@
                 used to provide a scalable system.
             </p>
 
+            <p>
+                The Network IO sets a default user agent of:
+            </p>
+
+            <source>
+Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1; aggregator:FeedParser; http://jakarta.apache.org/commons/sandbox/feedparser/)
Gecko/20021130
+</source>
+
         </section>
 
         <!--
@@ -311,6 +367,42 @@
             
         </section>
              -->
+
+        <section name="Alternative RSS/Atom and Feed Parsers">
+
+            <p>
+                If for some reason FeedParser doesn't meet you needs (and we'd
+                love to find out why) there are other alternatives.
+            </p>
+
+            <dl>
+                <dt>
+                    <a href="https://rome.dev.java.net/">Rome</a>
+                </dt>
+
+                <dd>
+                    While Rome lacks autodiscovery and a networking layer it
+                    does provide a nice DOM API (if this is what you require)
+                    and the developers from both projects are friendly and
+                    cooperate.
+                </dd>
+
+                <dt>
+                    <a href="http://sourceforge.net/projects/feedparser/">
+                        Universal FeedParser
+                    </a>
+                </dt>
+
+                <dd>
+
+                    The Universal FeedParser is a python-based parser which
+                    happens to conflict somewhat in our use of names.
+
+                </dd>
+
+            </dl>
+
+        </section>
 
     </body>
 



---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org


Mime
View raw message