commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bur...@apache.org
Subject cvs commit: jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/sax RSSFeedParser.java
Date Fri, 23 Apr 2004 06:11:57 GMT
burton      2004/04/22 23:11:57

  Added:       feedparser/src/java/org/apache/commons/feedparser/sax
                        RSSFeedParser.java
  Log:
  SAX support...
  
  Revision  Changes    Path
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/sax/RSSFeedParser.java
  
  Index: RSSFeedParser.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.sax;
  
  import org.apache.commons.feedparser.FeedParserListener;
  import org.apache.commons.feedparser.FeedParserState;
  import org.apache.commons.feedparser.FeedParserException;
  import org.apache.commons.feedparser.FeedVersion;
  import org.apache.commons.feedparser.MetaFeedParserListener;
  
  import java.io.*;
  import java.net.*;
  import java.util.*;
  
  import org.xml.sax.*;
  import org.xml.sax.helpers.*;
  
  /** *
   * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
   * @version $Id: RSSFeedParser.java,v 1.1 2004/04/23 06:11:57 burton Exp $
   */
  public class RSSFeedParser extends BaseDefaultHandler {
  
      public FeedParserListener listener = null;
  
      boolean onItem = false;
  
      HashMap properties = new HashMap();
  
      FeedParserState state = new FeedParserState();
  
      static HashSet RSS_NAMESPACES = new HashSet();
  
      static HashSet RDF_NAMESPACES = new HashSet();
  
      static HashSet MOD_CONTENT_NAMESPACES = new HashSet();
  
      static {
  
          RSS_NAMESPACES.add( "http://purl.org/rss/1.0/" );
  
          RDF_NAMESPACES.add( "http://www.w3.org/1999/02/22-rdf-syntax-ns#" );
  
          MOD_CONTENT_NAMESPACES.add( "http://purl.org/rss/1.0/modules/content/" );
          
      }
      
      /**
       * 
       * Create a new <code>RSSFeedParser</code> instance.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public RSSFeedParser() {
  
          super( "FIXME" );
          
          this.parser = this;
  
          setNext( new ChannelTemplate( this ) );
  
      }
  
      public void startDocument() throws SAXException {
  
          try { 
              
              FeedVersion v = new FeedVersion();
              v.isRSS = true;
              listener.onFeedVersion( v );
              
              listener.init();
              
          } catch ( FeedParserException f ) {
              throw new SAXException( f );
          }
  
      }
  
      public void endDocument() throws SAXException {
  
          try { 
              
              listener.finished();
              
          } catch ( FeedParserException f ) {
              throw new SAXException( f );
          }
  
      }
  
      /**
       * Match rss:channel
       */
      class ChannelTemplate extends BaseDefaultHandler {
  
          public ChannelTemplate( RSSFeedParser parser ) {
  
              super( "channel", parser.RSS_NAMESPACES, parser );
  
              setNext( new URLTemplate( parser ) );
  
          }
  
          public void beginFeedElement() throws FeedParserException {
  
              parser.listener.onChannel( parser.state,
                                         getProperty( "title" ),
                                         getProperty( "link" ),
                                         getProperty( "description" ) );
  
          }
      
          public void endFeedElement() throws FeedParserException {
              parser.listener.onChannelEnd();
          }
  
      }
  
      /**
       * Match rss:url for images/etc
       */
      class URLTemplate extends BaseDefaultHandler {
  
          public URLTemplate( RSSFeedParser parser ) {
  
              super( "url", parser.RSS_NAMESPACES, parser );
  
              setNext( new ModContentTemplate( parser ) );
              //this.setNext( new RDFValueTemplate( parser ) );
  
          }
  
      }
  
      /**
       * Match the rdf:value for mod_content
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      class ModContentTemplate extends BaseDefaultHandler {
  
          public ModContentTemplate( RSSFeedParser parser ) {
  
              super( "items", parser.MOD_CONTENT_NAMESPACES, parser );
  
              this.setNext( new RDFValueTemplate( parser ) );
  
          }
  
      }
      
      /**
       * Match the rdf:value for mod_content
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      class RDFValueTemplate extends BaseDefaultHandler {
  
          public RDFValueTemplate( RSSFeedParser parser ) {
  
              super( "value", parser.RDF_NAMESPACES, parser );
  
              this.setIncludeContent( true );
              this.setNext( new RSSImageFeedParser( parser ) );
  
          }
  
          public void endFeedElement() throws FeedParserException {
              System.out.println( " FIXME: (debug): " + getProperty( "value" ) );
          }
  
      }
      
  }
  
  class RSSImageFeedParser extends BaseDefaultHandler {
  
      public RSSImageFeedParser( RSSFeedParser parser ) {
  
          super( "image", parser.RSS_NAMESPACES, parser );
  
          setNext( new RSSItemFeedParser( parser ) );
  
      }
  
      public void beginFeedElement() throws FeedParserException {
  
          parser.listener.onImage( parser.state,
                                   getProperty( "title" ),
                                   getProperty( "link" ),
                                   getProperty( "url" ) );
  
      }
      
      public void endFeedElement() throws FeedParserException {
          parser.listener.onImageEnd();
      }
  
  }
  
  class RSSItemFeedParser extends BaseDefaultHandler {
  
      public RSSItemFeedParser( RSSFeedParser parser ) {
  
          super( "item", parser );
          this.namespaces = parser.RSS_NAMESPACES;
  
          setNext( new RSSTitleFeedParser( parser ) );
  
      }
  
      public void beginFeedElement() throws FeedParserException {
  
          parser.listener.onItem( parser.state,
                                  getProperty( "title" ),
                                  getProperty( "link" ),
                                  getProperty( "description" ),
                                  null );
  
      }
  
      public void endFeedElement() throws FeedParserException {
          parser.listener.onItemEnd();
      }
      
  }
  
  class RSSTitleFeedParser extends BaseDefaultHandler {
      
      public RSSTitleFeedParser( RSSFeedParser parser ) {
  
          super( "title", parser );
  
          setNext( new RSSLinkFeedParser( parser ) );
  
      }
  
  }
  
  class RSSLinkFeedParser extends BaseDefaultHandler {
  
      public RSSLinkFeedParser( RSSFeedParser parser ) {
          super( "link", parser );
  
          setNext( new RSSDescriptionFeedParser( parser ) );
      }
  
  }
  
  class RSSDescriptionFeedParser extends BaseDefaultHandler {
  
      public RSSDescriptionFeedParser( RSSFeedParser parser ) {
          super( "description", parser );
      }
  
  }
  
  /**
   * dc:subject support
   */
  class RSSDcSubjectFeedParser extends BaseDefaultHandler {
  
      //MetaFeedParserListener metadataListener= null;
      
      public RSSDcSubjectFeedParser( RSSFeedParser parser ) {
          super( "subject", parser );
      }
  
      public void beginFeedElement() {
  
          //only if it's dc:subject
          //listener.onSubject( parser.state, parser.getProperty( "subject" ) );
  
      }
  
      public void endFeedElement() {
  
      }
  
  }
  
  class BaseDefaultHandler extends DefaultHandler {
  
      public static int STRING_BUFFER_CAPACITY = 100000;
      
      //BUG: this will break on nested code:
  
      //     <foo>
      //         <foo>
      //
      //         </foo>
      //
      //     </foo>
  
      // won't be smart enough to realize it's nested
      
      /**
       * The local name of the element
       */
      private String local = null;
  
      //FIXME: move to a FastStringBuffer that's not synchronized.
      private StringBuffer buff = null;
  
      private boolean onElement = false;
  
      private boolean includeContent = false;
      
      BaseDefaultHandler next = null;
  
      FeedParserListener listener = null;
  
      RSSFeedParser parser = null;
  
      static HashMap nsPrefixMapping = new HashMap();
      
      /**
       * Store a hashset of namespaces that the given URL supports.
       *
       */
      HashSet namespaces = null;
  
      public BaseDefaultHandler( String local ) {
          this.local = local;
      }
  
      public BaseDefaultHandler( String local, RSSFeedParser parser ) {
  
          this.local = local;
          this.parser = parser;
          
      }
  
      public BaseDefaultHandler( String local,
                                 HashSet namespaces,
                                 RSSFeedParser parser ) {
  
          this.local = local;
          this.namespaces = namespaces;
          this.parser = parser;
          
      }
  
      /**
       * If true we include the RAW XML content from the parser.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public void setIncludeContent( boolean includeContent ) {
          this.includeContent = includeContent;
      }
      
      /**
       * Set the next template to process in this chain.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public void setNext( BaseDefaultHandler next ) {
          this.next = next;
      }
      
      /**
       * Return the value of character data forfor the element.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public String toString() {
  
          if ( buff == null )
              return null;
          
          if ( buff.length() == 0 )
              return null;
  
          return buff.toString();
      }
  
      /**
       * Return true if the namespace is valid and this class is handling the
       * given element name
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      boolean isLocal( String namespace, String local ) {
  
          //wee if we need to test forfor namespaces
          if ( namespace != null && namespaces != null && ! namespaces.contains(
namespace ) )
              return false;
  
          return this.local.equals( local );
      }
  
      /**
       * Get the value of a string property we found whilewhile parsing
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public String getProperty( String name ) {
          return (String)parser.properties.get( name );
      }
  
      public boolean getBoolean( String name ) {
  
          return "true".equals( getProperty( name ) );
          
      }
  
      /**
       * Method to call when we're finished processing this element but BEFORE
       * processing of the next element in the chain.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public void beginFeedElement() throws FeedParserException {}
  
      /**
       * Method to call when we're finished processing this element but AFTER
       * processing of the next element in the chain.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public void endFeedElement() throws FeedParserException {}
  
      private boolean includeContentPrefix( String namespace ) {
  
          if ( namespace != null ) {
  
              String prefix = (String)nsPrefixMapping.get( namespace );
  
              if ( prefix != null ) {
  
                  buff.append( prefix );
                  buff.append( ":" );
                  return true;
              }
  
          }
  
          return false;
          
      }
      
      // **** SAX DefaultHandler **************************************************
  
      /**
       * Keep track of namespaces.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public void startPrefixMapping( String prefix,
                                      String namespace ) throws SAXException {
  
          if ( prefix != null && ! "".equals( prefix ) ) {
              //System.out.println( namespace + " -> " + prefix );
          
              nsPrefixMapping.put( namespace, prefix );
  
          } 
  
      }
  
      //FIXME: it might be possible to call an item again without a member and the
      //value from the LAST item is used... this needs to be a fatal error and we
      //need to clear ...
  
      public void startElement( String namespace,
                                String local,
                                String qName,
                                Attributes attributes ) throws SAXException {
  
          if ( isLocal( namespace, local ) ) {
  
              //FIXME: is there a more efficient way to clear a buffer than this?
  
              //FIXME: also only do this ifif it's necessary and content has
              //actually been added.  This will save some performance.
  
              //buff = new StringBuffer( STRING_BUFFER_CAPACITY );
  
              //buff = new StringBuffer( 1000 );
  
              if ( buff == null ) {
                  buff = new StringBuffer( 1000 );
              } else {
                  buff.setLength( 0 );
              }
  
              onElement = true;
          }
  
          if ( next != null )
              next.startElement( namespace, local, qName, attributes );
  
          if ( includeContent && onElement ) {
              buff.append( "<" );
  
              boolean hasPrefix = includeContentPrefix( namespace );
              
              buff.append( local );
  
              if ( ! hasPrefix && namespace != null ) {
                  buff.append( " xmlns=\"" );
                  buff.append( namespace );
                  buff.append( "\"" );
              }
  
              //now include attributes
  
              int length = attributes.getLength();
  
              for ( int i = 0; i < length; ++i ) {
  
                  buff.append( " " );
                  buff.append( attributes.getQName( i ) );
                  buff.append( "=" );
                  buff.append( "\"" );
                  buff.append( attributes.getValue( i ) );
                  buff.append( "\"" );
  
              }
              
              buff.append( ">" );
          }
          
      }
  
      public void characters( char[] ch,
                              int start,
                              int length ) throws SAXException {
   
          if ( onElement ) {
              buff.append( ch, start, length );
          }
  
          if ( next != null )
              next.characters( ch, start, length );
  
      }
      
      public void endElement( String namespace,
                              String local,
                              String qName ) throws SAXException {
  
          try { 
  
              if ( isLocal( namespace, local ) ) {
  
                  onElement = false;
                  parser.properties.put( local, toString() );
  
                  beginFeedElement();
              
              }
  
              if ( next != null )
                  next.endElement( namespace, local, qName );
  
              if ( isLocal( namespace, local ) )
                  endFeedElement();
  
              if ( includeContent && onElement ) {
                  buff.append( "</" );
  
                  includeContentPrefix( namespace );
  
                  buff.append( local );
  
                  buff.append( ">" );
              }
  
          } catch ( FeedParserException fpe ) {
  
              throw new SAXException( fpe );
  
          }
  
      }
  
  }
  
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org


Mime
View raw message