commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bur...@apache.org
Subject cvs commit: jakarta-commons-sandbox/feedparser/tests/locale rss-2.0-en-on-channel-element.xml rss-zh-on-channel-element.xml
Date Mon, 18 Oct 2004 00:00:09 GMT
burton      2004/10/17 17:00:09

  Modified:    feedparser/src/java/org/apache/commons/feedparser
                        BaseParser.java RSSFeedParser.java
  Added:       feedparser/tests/locale rss-2.0-en-on-channel-element.xml
                        rss-zh-on-channel-element.xml
  Log:
  more support for languages.. this time RSS 2.0 and RSS 0.91 support with dc:language and
'language'
  
  Revision  Changes    Path
  1.2       +23 -12    jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/BaseParser.java
  
  Index: BaseParser.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/BaseParser.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- BaseParser.java	17 Oct 2004 23:43:23 -0000	1.1
  +++ BaseParser.java	18 Oct 2004 00:00:09 -0000	1.2
  @@ -49,13 +49,11 @@
           if ( state.metaFeedParserlistener == null )
               return;
   
  -        Attribute attr = getLocaleAttribute( element );
  +        String l = getLocaleString( element );
           
  -        if ( attr != null ) {
  +        if ( l != null ) {
   
  -            String v = attr.getValue();
  -
  -            Locale locale = RFC3066LocaleParser.parse( v );
  +            Locale locale = RFC3066LocaleParser.parse( l );
   
               if ( locale != null )
                   state.metaFeedParserlistener.onLocale( state, locale );
  @@ -72,23 +70,36 @@
           if ( state.metaFeedParserlistener == null )
               return;
   
  -        Attribute attr = getLocaleAttribute( element );
  +        String l = getLocaleString( element );
   
  -        if ( attr != null ) 
  +        if ( l != null ) 
               state.metaFeedParserlistener.onLocaleEnd();
   
       }
   
  -    protected static Attribute getLocaleAttribute( Element element ) {
  +    protected static String getLocaleString( Element element ) {
   
           //hm.. crap. how do we get the 'xml' namespace here?
           Attribute attr = element.getAttribute( "lang" );
   
  +        if ( attr != null )
  +            return attr.getValue();
  +        
           //when stil null see that we have dc:language
  -        if ( attr == null )
  -            attr = element.getAttribute( "language", NS.DC );
   
  -        return attr;
  +        Element lang = element.getChild( "language", NS.DC );
  +
  +        if ( lang != null )
  +            return lang.getText();
  +
  +        //fall over to just using "language" and if it isn't a local string we
  +        //won't parse it.  This is for RSS 0.91 and RSS 2.0 content.
  +        lang = element.getChild( "language" );
  +
  +        if ( lang != null )
  +            return lang.getText();
  +
  +        return null;
   
       }
       
  
  
  
  1.13      +12 -8     jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/RSSFeedParser.java
  
  Index: RSSFeedParser.java
  ===================================================================
  RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/RSSFeedParser.java,v
  retrieving revision 1.12
  retrieving revision 1.13
  diff -u -r1.12 -r1.13
  --- RSSFeedParser.java	3 Sep 2004 19:46:47 -0000	1.12
  +++ RSSFeedParser.java	18 Oct 2004 00:00:09 -0000	1.13
  @@ -38,7 +38,7 @@
    * @author <a href="mailto:burton@apache.org">Kevin A. Burton (burtonator)</a>
    * @version $Id$
    */
  -public class RSSFeedParser {
  +public class RSSFeedParser extends BaseParser {
   
       /**
        * Parse the given document as an OPML document.
  @@ -48,7 +48,7 @@
       public static void parse( FeedParserListener listener,
                                 org.jdom.Document doc ) throws Exception {
   
  -        FeedParserState state = new FeedParserState();
  +        FeedParserState state = new FeedParserState( listener );
   
           FeedVersion v = new FeedVersion();
           v.isRSS = true;
  @@ -60,7 +60,9 @@
           XPath xpath = new XPath( "/descendant::*[local-name() = 'channel']" );
           Element channel = (Element)xpath.selectSingleNode( doc );
           state.current = channel;
  -        doParseChannel( listener, state );
  +        doLocale( state, listener, channel );
  +        doChannel( listener, state );
  +        doLocaleEnd( state, listener, channel );
   
           //*** now process the image. ***
           xpath = new XPath( "/descendant::*[local-name() = 'image']" );
  @@ -85,11 +87,13 @@
           //update items.
           while ( i.hasNext() ) {
   
  -            Element child = (Element)i.next();
  +            Element item = (Element)i.next();
   
  -            state.current = child;
  +            state.current = item;
                   
  +            doLocale( state, listener, item );
               doParseItem( listener, state );
  +            doLocaleEnd( state, listener, item );
   
           }
   
  @@ -102,8 +106,8 @@
        *
        * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
        */
  -    private static void doParseChannel( FeedParserListener listener,
  -                                        FeedParserState state ) throws Exception {
  +    private static void doChannel( FeedParserListener listener,
  +                                   FeedParserState state ) throws Exception {
   
           String link = getChildElementTextByName( state, "link" );
   
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/tests/locale/rss-2.0-en-on-channel-element.xml
  
  Index: rss-2.0-en-on-channel-element.xml
  ===================================================================
  <?xml version="1.0"?>
  <!-- RSS generated by UserLand Frontier v9.0.1 on 10/17/2004; 2:40:35 PM Pacific -->
  <rss version="2.0">
  	<channel>
  		<title>Scripting News</title>
  		<link>http://www.scripting.com/</link>
  		<description>It's even worse than it appears.</description>
  		<language>en-us</language>
  		<copyright>Copyright 1997-2004 Dave Winer</copyright>
  		<pubDate>Sun, 17 Oct 2004 07:00:00 GMT</pubDate>
  		<lastBuildDate>Sun, 17 Oct 2004 21:40:35 GMT</lastBuildDate>
  		<docs>http://blogs.law.harvard.edu/tech/rss</docs>
  		<generator>UserLand Frontier v9.0.1</generator>
  		<managingEditor>dwiner@cyber.law.harvard.edu</managingEditor>
  		<webMaster>dwiner@cyber.law.harvard.edu</webMaster>
  		<item>
  			<description>&lt;a href=&quot;http://www.dawnanddrew.com/archives/001039.php&quot;&gt;I
just listened&lt;/a&gt; to my first Dawn &amp; Drew podcast. They're awesome.
</description>
  			<pubDate>Sun, 17 Oct 2004 19:45:58 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:12:45:58PM</guid>
  			</item>
  		<item>
  			<description>If newspaper &lt;a href=&quot;http://www.command-post.org/2004/2_archives/016036.html&quot;&gt;endorsements&lt;/a&gt;
were votes, Kerry wins Florida in a landslide.</description>
  			<pubDate>Sun, 17 Oct 2004 21:40:33 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:2:40:33PM</guid>
  			</item>
  		<item>
  			<description>&lt;a href=&quot;http://www.scripting.com/images/2001/09/15/usFlag.gif&quot;
title=&quot;THINK!&quot;>&lt;img src=&quot;http://www.scripting.com/images/archiveScriptingCom/2004/05/31/think.gif&quot;
height=&quot;59&quot; width=&quot;69&quot; border=&quot;0&quot; hspace=&quot;15&quot;
vspace=&quot;15&quot; align=&quot;right&quot; alt=&quot;THINK!&quot;>&lt;/a>Watching
&lt;a href=&quot;http://frist.senate.gov/&quot;&gt;Bill Frist&lt;/a&gt;,
Senate Majority Leader, blame Kerry and Edwards for the failure of the Congress to pass tort
reform. I thought I was hearing Bush do the same thing in the last debate. I wonder how many
Americans know that Congress is controlled by the Republicans. They talk as if there were
an adversarial relationship between the legislative and executive branches, which would lead
people to draw the incorrect conclusion that the Democrats are responsible. I think today
we got a preview of the final onslaught of ads the Republicans are going to run, and there
won't be an opportunity to explain that the Democrats don't run Congress. Like the lie that
Saddam Hussein was in league with Osama bin Laden, the Republicans don't mind if you draw
the wrong conclusion, in fact, they'll help you do it. </description>
  			<pubDate>Sun, 17 Oct 2004 17:44:57 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:10:44:57AM</guid>
  			</item>
  		<item>
  			<description>&lt;a href=&quot;http://www.nytimes.com/2004/10/17/magazine/17BUSH.html?ei=5088&amp;en=6a9ce1d022952b10&amp;ex=1255752000&amp;partner=rssnyt&amp;pagewanted=print&amp;position=&quot;&gt;NY
Times Magazine&lt;/a&gt;, quoting a senior White House official, in 2002: &quot;We're
an empire now, and when we act, we create our own reality.&quot;</description>
  			<pubDate>Sun, 17 Oct 2004 18:46:03 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:11:46:03AM</guid>
  			</item>
  		<item>
  			<description>&lt;a href=&quot;http://www.bloggercon.org/2004/10/17#a1605&quot;&gt;Adam
Curry&lt;/a&gt;: &quot;A lot of people have been questioning the use of licensed
music in Podcasts and I too feel that the time has come to face any legal ramifications of
this audio wave we're riding now, and not let it take us by surprise.&quot;</description>
  			<pubDate>Sun, 17 Oct 2004 17:24:24 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:10:24:24AM</guid>
  			</item>
  		<item>
  			<description>&lt;a href=&quot;http://www.sfgate.com/cgi-bin/article.cgi?file=/chronicle/archive/2004/10/17/EDG8O98IQ01.DTL&quot;&gt;Mitch
Kapor&lt;/a&gt;: &quot;We were never meant to have a highly centralized government.&quot;</description>
  			<pubDate>Sun, 17 Oct 2004 17:21:02 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:10:21:02AM</guid>
  			</item>
  		<item>
  			<description>&lt;img src=&quot;http://images.scripting.com/archiveScriptingCom/2004/10/17/lessig.jpg&quot;
width=&quot;45&quot; height=&quot;57&quot; border=&quot;0&quot; align=&quot;right&quot;
hspace=&quot;15&quot; vspace=&quot;5&quot; alt=&quot;A picture named lessig.jpg&quot;&gt;Emailing
with Larry Lessig today, he said something surprising about &lt;a href=&quot;http://creativecommons.org/&quot;&gt;Creative
Commons&lt;/a&gt;. &quot;No author gives up his copyright when putting content
under a CC license. A CC license is just permissions given up front. It rests upon a copyright
(without the copyright, you couldn't impose the permissions). But the copyright owner holds
the copyright, and just says, 'here's how you're free to use my work.'&quot; </description>
  			<pubDate>Sun, 17 Oct 2004 15:57:45 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:8:57:45AM</guid>
  			</item>
  		<item>
  			<description>Doug Kaye &lt;a href=&quot;http://www.itconversations.com/shows/detail225.html&quot;&gt;interview&lt;/a&gt;
with Adam Curry.</description>
  			<pubDate>Sun, 17 Oct 2004 12:15:27 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:5:15:27AM</guid>
  			</item>
  		<item>
  			<description>&lt;a href=&quot;http://www.iol.co.za/index.php?set_id=1&amp;click_id=2969&quot;&gt;RSS
news feeds&lt;/a&gt; from South Africa.</description>
  			<pubDate>Sun, 17 Oct 2004 14:21:26 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:7:21:26AM</guid>
  			<category>/Technology/Formats and Protocols/RSS</category>
  			</item>
  		<item>
  			<description>&lt;a href=&quot;http://static.podcatch.com/manila/gems/un/eps.mp3&quot;&gt;This
is a test&lt;/a&gt;. For the next sixty seconds this station will conduct a test of
the Emergency Podcast System. </description>
  			<pubDate>Sun, 17 Oct 2004 17:36:38 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:10:36:38AM</guid>
  			<enclosure url="http://static.podcatch.com/manila/gems/un/eps.mp3" length="189455"
type="audio/mpeg" />
  			</item>
  		<item>
  			<description>&lt;a href=&quot;http://static.podcatch.com/manila/gems/un/anotherTestAudioBlogPost.mp3&quot;&gt;I
got another&lt;/a&gt; test blog post. An audio test blog post. Pay no attention to
the man behind the curtain.</description>
  			<pubDate>Sun, 17 Oct 2004 19:11:14 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:12:11:14PM</guid>
  			<enclosure url="http://static.podcatch.com/manila/gems/un/anotherTestAudioBlogPost.mp3"
length="106423" type="audio/mpeg" />
  			</item>
  		<item>
  			<description>&lt;a href=&quot;http://www.mediainfo.com/eandp/news/article_display.jsp?vnu_content_id=1000671941&quot;&gt;Editor
&amp; Publisher&lt;/a&gt; has a list of presidential endorsements.</description>
  			<pubDate>Sun, 17 Oct 2004 12:42:24 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:5:42:24AM</guid>
  			</item>
  		<item>
  			<description>&lt;a href=&quot;http://www.undergroundclips.com/undergroundclips/2004/10/richard_clark_o.html&quot;&gt;Undergroundclips&lt;/a&gt;
has the 60 Minutes interview with Richard Clark.</description>
  			<pubDate>Sun, 17 Oct 2004 12:28:53 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:5:28:53AM</guid>
  			</item>
  		<item>
  			<description>&lt;a href=&quot;http://www.nytimes.com/2004/10/17/arts/17rich.html?ex=1255752000&amp;en=ca7f76fa80642517&amp;ei=5088&amp;partner=rssnyt&quot;&gt;Frank
Rich&lt;/a&gt;: &quot;Like the Nixon administration before it, the current White
House has kneecapped with impunity any news organization that challenges its message.&quot;</description>
  			<pubDate>Sun, 17 Oct 2004 14:16:02 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:7:16:02AM</guid>
  			</item>
  		<item>
  			<description>The Boston Globe &lt;a href=&quot;http://www.boston.com/business/articles/2004/10/17/harvards12_billion_man/?rss_id=Boston%20Globe%20--%20Business%20News&quot;&gt;profiles&lt;/a&gt;
Jack Meyer, the investment banker who's in charge of Harvard's $22 billion endowment.</description>
  			<pubDate>Sun, 17 Oct 2004 12:30:44 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:5:30:44AM</guid>
  			</item>
  		<item>
  			<description>&lt;img src=&quot;http://images.scripting.com/archiveScriptingCom/2004/10/17/dubya.jpg&quot;
width=&quot;45&quot; height=&quot;61&quot; border=&quot;0&quot; align=&quot;right&quot;
hspace=&quot;15&quot; vspace=&quot;5&quot; alt=&quot;A picture named dubya.jpg&quot;&gt;In
a speech yesterday Bush said we will not have an all-volunteer army. A few in the audience
shouted, and he &lt;a href=&quot;http://www.turkishpress.com/turkishpress/news.asp?ID=30912&quot;&gt;flipped&lt;/a&gt;
it around. They chuckle when Bush makes a mistake, but what if Kerry had said it? Do you think
the Republicans would have mocked him? Yeah, I think so. I think the Dems should run that
flip-flop as an ad. Fair is fair. And unfair is fair in this election.</description>
  			<pubDate>Sun, 17 Oct 2004 12:03:56 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:5:03:56AM</guid>
  			</item>
  		<item>
  			<description>&lt;a href=&quot;http://jeremy.zawodny.com/blog/archives/002826.html&quot;&gt;Jeremy
Zawodny&lt;/a&gt;, who works at Yahoo, says MSNBC ripped them off. </description>
  			<pubDate>Sun, 17 Oct 2004 12:01:55 GMT</pubDate>
  			<guid>http://archive.scripting.com/2004/10/17#When:5:01:55AM</guid>
  			</item>
  		</channel>
  	</rss>
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/tests/locale/rss-zh-on-channel-element.xml
  
  Index: rss-zh-on-channel-element.xml
  ===================================================================
  <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/"><channel><title>beiqiao的专栏</title><link>http://blog.csdn.net/beiqiao/</link><description
/><dc:language>zh-CHS</dc:language><generator>.Text Version 0.958.2004.2001</generator><item><dc:creator>beiqiao</dc:creator><title>修复IE</title><link>http://blog.csdn.net/beiqiao/archive/2004/10/12/132900.aspx</link><pubDate>Tue,
12 Oct 2004 00:43:00 GMT</pubDate><guid>http://blog.csdn.net/beiqiao/archive/2004/10/12/132900.aspx</guid><wfw:comment>http://blog.csdn.net/beiqiao/comments/132900.aspx</wfw:comment><comments>http://blog.csdn.net/beiqiao/archive/2004/10/12/132900.aspx#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://blog.csdn.net/beiqiao/comments/commentRss/132900.aspx</wfw:commentRss><trackback:ping>http://blog.csdn.net/beiqiao/services/trackbacks/132900.aspx</trackback:ping><description>发现中毒了,每次启动IE,会打开本地一个sp.html文件。这个文件放在windows临时目录下,即使被删除,IE启动后,又被生成。同时注册表中HKEY_LOCAL_MECHINE\Microsoft\Internet
Explorer\main\search bar、search page;HKEY_LOCAL_MECHINE\Microsoft\Internet Explorer\search键值被修改成指向本地的sp.html文件,修改掉这些键值后,打开IE,又被改回来了。&lt;img
src ="http://blog.csdn.net/beiqiao/aggbug/132900.aspx" width = "1" height = "1" /&gt;</description></item><item><dc:creator>beiqiao</dc:creator><title>Linux相关命令</title><link>http://blog.csdn.net/beiqiao/archive/2004/10/11/131800.aspx</link><pubDate>Mon,
11 Oct 2004 10:55:00 GMT</pubDate><guid>http://blog.csdn.net/beiqiao/archive/2004/10/11/131800.aspx</guid><wfw:comment>http://blog.csdn.net/beiqiao/comments/131800.aspx</wfw:comment><comments>http://blog.csdn.net/beiqiao/archive/2004/10/11/131800.aspx#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://blog.csdn.net/beiqiao/comments/commentRss/131800.aspx</wfw:commentRss><trackback:ping>http://blog.csdn.net/beiqiao/services/trackbacks/131800.aspx</trackback:ping><description>列出常用Linux命令以备查询&lt;img
src ="http://blog.csdn.net/beiqiao/aggbug/131800.aspx" width = "1" height = "1" /&gt;</description></item><item><dc:creator>beiqiao</dc:creator><title>使用Jmeter对Mysql进行压力测试无法执行多条sql语句问题</title><link>http://blog.csdn.net/beiqiao/archive/2004/10/10/130966.aspx</link><pubDate>Sun,
10 Oct 2004 16:49:00 GMT</pubDate><guid>http://blog.csdn.net/beiqiao/archive/2004/10/10/130966.aspx</guid><wfw:comment>http://blog.csdn.net/beiqiao/comments/130966.aspx</wfw:comment><comments>http://blog.csdn.net/beiqiao/archive/2004/10/10/130966.aspx#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://blog.csdn.net/beiqiao/comments/commentRss/130966.aspx</wfw:commentRss><trackback:ping>http://blog.csdn.net/beiqiao/services/trackbacks/130966.aspx</trackback:ping><description>使用Jmeter对Mysql进行压力测试无法执行多条sql语句问题&lt;img
src ="http://blog.csdn.net/beiqiao/aggbug/130966.aspx" width = "1" height = "1" /&gt;</description></item></channel></rss>
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org


Mime
View raw message