lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1031467 - in /lucene/dev/trunk/lucene/contrib: CHANGES.txt demo/src/java/org/apache/lucene/demo/html/HTMLParser.java demo/src/java/org/apache/lucene/demo/html/HTMLParser.jj demo/src/test/org/apache/lucene/demo/html/TestHtmlParser.java
Date Fri, 05 Nov 2010 08:19:34 GMT
Author: rmuir
Date: Fri Nov  5 08:19:34 2010
New Revision: 1031467

URL: http://svn.apache.org/viewvc?rev=1031467&view=rev
Log:
LUCENE-590: Demo HTML parser gives incorrect summaries when title is repeated as a heading

Modified:
    lucene/dev/trunk/lucene/contrib/CHANGES.txt
    lucene/dev/trunk/lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.java
    lucene/dev/trunk/lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.jj
    lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/html/TestHtmlParser.java

Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=1031467&r1=1031466&r2=1031467&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Fri Nov  5 08:19:34 2010
@@ -140,6 +140,9 @@ Bug fixes
   
 * LUCENE-2246: Fix contrib/demo for Turkish html documents.
   (Selim Nadi via Robert Muir)  
+  
+* LUCENE-590: Demo HTML parser gives incorrect summaries when title is repeated as a heading
+  (Curtis d'Entremont via Robert Muir)
    
 API Changes
 

Modified: lucene/dev/trunk/lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.java?rev=1031467&r1=1031466&r2=1031467&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.java
(original)
+++ lucene/dev/trunk/lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.java
Fri Nov  5 08:19:34 2010
@@ -84,7 +84,7 @@ InterruptedException {
 
     String sum = summary.toString().trim();
     String tit = getTitle();
-    if (sum.startsWith(tit) || sum.equals(""))
+    if (sum.equals(""))
       return tit;
     else
       return sum;

Modified: lucene/dev/trunk/lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.jj
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.jj?rev=1031467&r1=1031466&r2=1031467&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.jj
(original)
+++ lucene/dev/trunk/lucene/contrib/demo/src/java/org/apache/lucene/demo/html/HTMLParser.jj
Fri Nov  5 08:19:34 2010
@@ -111,7 +111,7 @@ InterruptedException {
 
     String sum = summary.toString().trim();
     String tit = getTitle();
-    if (sum.startsWith(tit) || sum.equals(""))
+    if (sum.equals(""))
       return tit;
     else
       return sum;

Modified: lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/html/TestHtmlParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/html/TestHtmlParser.java?rev=1031467&r1=1031466&r2=1031467&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/html/TestHtmlParser.java
(original)
+++ lucene/dev/trunk/lucene/contrib/demo/src/test/org/apache/lucene/demo/html/TestHtmlParser.java
Fri Nov  5 08:19:34 2010
@@ -105,6 +105,13 @@ public class TestHtmlParser extends Luce
     assertEquals(200, parser.getSummary().length());
   }
   
+  // LUCENE-590
+  public void testSummaryTitle() throws Exception {
+    String text = "<html><head><title>Summary</title></head><body>Summary
of the document</body></html>";
+    HTMLParser parser = new HTMLParser(new StringReader(text));
+    assertEquals("Summary of the document", parser.getSummary());
+  }
+  
   // LUCENE-2246
   public void testTurkish() throws Exception {
     String text = "<html><body>" +



Mime
View raw message