Return-Path: Delivered-To: apmail-jakarta-commons-dev-archive@www.apache.org Received: (qmail 64961 invoked from network); 13 Aug 2004 23:17:26 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (209.237.227.199) by minotaur-2.apache.org with SMTP; 13 Aug 2004 23:17:26 -0000 Received: (qmail 40502 invoked by uid 500); 13 Aug 2004 23:17:22 -0000 Delivered-To: apmail-jakarta-commons-dev-archive@jakarta.apache.org Received: (qmail 40351 invoked by uid 500); 13 Aug 2004 23:17:21 -0000 Mailing-List: contact commons-dev-help@jakarta.apache.org; run by ezmlm Precedence: bulk List-Unsubscribe: List-Subscribe: List-Help: List-Post: List-Id: "Jakarta Commons Developers List" Reply-To: "Jakarta Commons Developers List" Delivered-To: mailing list commons-dev@jakarta.apache.org Received: (qmail 40338 invoked by uid 500); 13 Aug 2004 23:17:21 -0000 Received: (qmail 40335 invoked by uid 99); 13 Aug 2004 23:17:21 -0000 X-ASF-Spam-Status: No, hits=-2.8 required=10.0 tests=ALL_TRUSTED,NO_REAL_NAME X-Spam-Check-By: apache.org Received: from [209.237.227.194] (HELO minotaur.apache.org) (209.237.227.194) by apache.org (qpsmtpd/0.27.1) with SMTP; Fri, 13 Aug 2004 16:17:19 -0700 Received: (qmail 64860 invoked by uid 1110); 13 Aug 2004 23:17:19 -0000 Date: 13 Aug 2004 23:17:19 -0000 Message-ID: <20040813231719.64859.qmail@minotaur.apache.org> From: burton@apache.org To: jakarta-commons-sandbox-cvs@apache.org Subject: cvs commit: jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test TestFeedLocator.java X-Virus-Checked: Checked X-Spam-Rating: minotaur-2.apache.org 1.6.2 0/1000/N burton 2004/08/13 16:17:19 Modified: feedparser/src/java/org/apache/commons/feedparser/locate DiscoveryLocator.java feedparser/src/java/org/apache/commons/feedparser/test TestFeedLocator.java Log: Attribute order is no longer required ... more unit tests... Revision Changes Path 1.11 +49 -6 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/DiscoveryLocator.java Index: DiscoveryLocator.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/DiscoveryLocator.java,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- DiscoveryLocator.java 4 Aug 2004 22:17:01 -0000 1.10 +++ DiscoveryLocator.java 13 Aug 2004 23:17:18 -0000 1.11 @@ -28,9 +28,18 @@ */ public class DiscoveryLocator { - //NOTE: this will break if the attributes aren't in the right order. - static Pattern pattern = - Pattern.compile( "]+type=[\"']([^\"']+)[\"'][^>]+href=[\"']([^\"']+)" ); + /** + * Get a FULL link within the content. We then pull the attributes out of + * this. + */ + static Pattern element_pattern = + Pattern.compile( "]+" ); + + /** + * Regex to match on + */ + static Pattern attr_pattern = + Pattern.compile( "([a-zA-Z]+)=[\"']([^\"']+)[\"']" ); static HashSet mediatypes = new HashSet(); @@ -58,16 +67,27 @@ //elements forward until I discover . Also note that this isn't //doing all feed URLs just the first ones it finds. - Matcher m = pattern.matcher( content ); + Matcher m = element_pattern.matcher( content ); while( m.find() ) { - String type=m.group( 1 ); + //the value of the link element XML... example: + + // + + String element = m.group( 0 ); + + HashMap attributes = getAttributes( element ); + + String type = (String)attributes.get( "type" ); if ( mediatypes.contains( type ) ) { //expand the href - String href = m.group( 2 ); + String href = (String)attributes.get( "href" ); href = ResourceExpander.expand( resource, href ); FeedReference feedReference = new FeedReference( href, type ); @@ -85,6 +105,29 @@ } return list; + + } + + public static HashMap getAttributes( String link ) { + + HashMap map = new HashMap(); + + Matcher m = attr_pattern.matcher( link ); + + int index = 0; + + while ( m.find( index ) ) { + + String name = m.group( 1 ); + String value = m.group( 2 ); + + map.put( name, value ); + + index = m.end(); + + } + + return map; } 1.2 +10 -7 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestFeedLocator.java Index: TestFeedLocator.java =================================================================== RCS file: /home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestFeedLocator.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- TestFeedLocator.java 13 Aug 2004 21:53:57 -0000 1.1 +++ TestFeedLocator.java 13 Aug 2004 23:17:19 -0000 1.2 @@ -60,15 +60,18 @@ throw new Exception( "NO LINKS FOUND" ); } + System.out.println( "Atom: " + l.getAdAtomFeed() ); + System.out.println( "RSS: " + l.getAdRSSFeed() ); + } public void test1() throws Exception { -// doTest( "file:///projects/feedparser/tests/locate1.html" ); -// doTest( "file:///projects/feedparser/tests/locate2.html" ); -// doTest( "file:///projects/feedparser/tests/locate3.html" ); -// doTest( "file:///projects/feedparser/tests/locate4.html" ); -// doTest( "file:///projects/feedparser/tests/locate5.html" ); -// doTest( "file:///projects/feedparser/tests/locate6.html" ); + doTest( "file:///projects/feedparser/tests/locate1.html" ); + doTest( "file:///projects/feedparser/tests/locate2.html" ); + doTest( "file:///projects/feedparser/tests/locate3.html" ); + doTest( "file:///projects/feedparser/tests/locate4.html" ); + doTest( "file:///projects/feedparser/tests/locate5.html" ); + doTest( "file:///projects/feedparser/tests/locate6.html" ); doTest( "file:///projects/feedparser/tests/locate7.html" ); } --------------------------------------------------------------------- To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org For additional commands, e-mail: commons-dev-help@jakarta.apache.org