lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [1/6] lucenenet git commit: Lucene.Net.Tests.Analysis.Common.Analysis.CharFilter.HTMLStripCharFilterTest: Added note about version compatibility level 4.8.1 and reformatted tests for easier reading
Date Sun, 26 Mar 2017 03:52:43 GMT
Repository: lucenenet
Updated Branches:
  refs/heads/api-work 54bad2c2d -> 548e768cc


Lucene.Net.Tests.Analysis.Common.Analysis.CharFilter.HTMLStripCharFilterTest: Added note about
version compatibility level 4.8.1 and reformatted tests for easier reading


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/31ceeb20
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/31ceeb20
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/31ceeb20

Branch: refs/heads/api-work
Commit: 31ceeb20653ba84a26606f28c7f51e7baebe361b
Parents: 54bad2c
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Sun Mar 26 04:19:18 2017 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Sun Mar 26 04:19:18 2017 +0700

----------------------------------------------------------------------
 .../Analysis/CharFilter/HTMLStripCharFilter.cs  |   1 +
 .../CharFilters/HTMLStripCharFilterTest.cs      | 228 ++++++++++++++++++-
 2 files changed, 217 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/31ceeb20/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
index 7dba4f6..7184212 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/HTMLStripCharFilter.cs
@@ -29,6 +29,7 @@ namespace Lucene.Net.Analysis.CharFilters
     /// <summary>
     /// A <see cref="CharFilter"/> that wraps another <see cref="TextReader"/>
and attempts to strip out HTML constructs.
     /// </summary>
+    // LUCENENET NOTE: Version compatibility level 4.8.1 (added fix for SOLR-5983: HTMLStripCharFilter
is treating CDATA sections incorrectly)
     public sealed class HTMLStripCharFilter : BaseCharFilter
     {
         /// <summary>This character denotes the end of file</summary>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/31ceeb20/src/Lucene.Net.Tests.Analysis.Common/Analysis/CharFilters/HTMLStripCharFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/CharFilters/HTMLStripCharFilterTest.cs
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/CharFilters/HTMLStripCharFilterTest.cs
index 2b07c0d..0db491d 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/CharFilters/HTMLStripCharFilterTest.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/CharFilters/HTMLStripCharFilterTest.cs
@@ -25,6 +25,7 @@ namespace Lucene.Net.Analysis.CharFilters
 	 * limitations under the License.
 	 */
 
+    // LUCENENET NOTE: Version compatibility level 4.8.1 (added fix for SOLR-5983: HTMLStripCharFilter
is treating CDATA sections incorrectly)
     public class HTMLStripCharFilterTest : BaseTokenStreamTestCase
     {
 
@@ -56,8 +57,12 @@ namespace Lucene.Net.Analysis.CharFilters
         [Test]
         public virtual void Test()
         {
-            string html = "<div class=\"foo\">this is some text</div> here is
a <a href=\"#bar\">link</a> and " + "another <a href=\"http://lucene.apache.org/\">link</a>.
" + "This is an entity: &amp; plus a &lt;.  Here is an &. <!-- is a comment
-->";
-            string gold = "\nthis is some text\n here is a link and " + "another link. "
+ "This is an entity: & plus a <.  Here is an &. ";
+            string html = "<div class=\"foo\">this is some text</div> here is
a <a href=\"#bar\">link</a> and " + 
+                "another <a href=\"http://lucene.apache.org/\">link</a>. " +

+                "This is an entity: &amp; plus a &lt;.  Here is an &. <!--
is a comment -->";
+            string gold = "\nthis is some text\n here is a link and " + 
+                "another link. " + 
+                "This is an entity: & plus a <.  Here is an &. ";
             AssertHTMLStripsTo(html, gold, null);
         }
 
@@ -95,7 +100,8 @@ namespace Lucene.Net.Analysis.CharFilters
                 builder.Append((char)ch);
             }
             // Compare trim()'d output to gold
-            assertEquals("'" + builder.ToString().Trim() + "' is not equal to '" + gold +
"'", gold, builder.ToString().Trim());
+            assertEquals("'" + builder.ToString().Trim() + "' is not equal to '" + gold +
"'", 
+                gold, builder.ToString().Trim());
         }
 
         [Test]
@@ -144,7 +150,169 @@ namespace Lucene.Net.Analysis.CharFilters
         [Test]
         public virtual void TestMalformedHTML()
         {
-            string[] testGold = new string[] { "a <a hr<ef=aa<a>> </close</a>",
"a <a hr<ef=aa> </close", "<a href=http://dmoz.org/cgi-bin/add.cgi?where=/arts/\"
class=lu style=\"font-size: 9px\" target=dmoz>Submit a Site</a>", "Submit a Site",
"<a href=javascript:ioSwitch('p8','http://www.csmonitor.com/') title=expand id=e8 class=expanded
rel=http://www.csmonitor.com/>Christian Science", "Christian Science", "<link rel=\"alternate\"
type=\"application/rss+xml\" title=\"San Francisco \" 2008 RSS Feed\" href=\"http://2008.sf.wordcamp.org/feed/\"
/>", "\n", "<a href=\" http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery
symptoms.html, heat congestive heart failure <a href=\" http://www.symptoms1bad.happyhost.org/canine",
"<a href=\" http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery symptoms.html,
heat congestive heart failure <a href=\" http://www.symptoms1bad.happyhost.org/canine",
"<a href=\"http://ucblibraries.colorado.edu/how/index.htm\"cl
 ass=\"pageNavAreaText\">", "", "<link title=\"^\\\" 21Sta's Blog\" rel=\"search\" 
type=\"application/opensearchdescription+xml\"  href=\"http://21sta.com/blog/inc/opensearch.php\"
/>", "\n", "<a href=\"#postcomment\" title=\"\"Leave a comment\";\">?", "?", "<a
href='/modern-furniture'   ' id='21txt' class='offtab'   onMouseout=\"this.className='offtab';
 return true;\" onMouseover=\"this.className='ontab';  return true;\">", "", "<a href='http://alievi.wordpress.com/category/01-todos-posts/'
style='font-size: 275%; padding: 1px; margin: 1px;' title='01 - Todos Post's (83)'>", "",
"The <a href=<a href=\"http://www.advancedmd.com>medical\">http://www.advancedmd.com>medical</a>
practice software</a>", "The <a href=medical\">http://www.advancedmd.com>medical
practice software", "<a href=\"node/21426\" class=\"clipTitle2\" title=\"Levi.com/BMX 2008
Clip of the Week 29 \"Morgan Wade Leftover Clips\"\">Levi.com/BMX 2008 Clip of the Week
29...", "Levi.com/BMX 2008 Clip of the Week 29...", 
 "<a href=\"printer_friendly.php?branch=&year=&submit=go&screen=\";\">Printer
Friendly", "Printer Friendly", "<a href=#\" ondragstart=\"return false\" onclick=\"window.external.AddFavorite('http://www.amazingtextures.com',
'Amazing Textures');return false\" onmouseover=\"window.status='Add to Favorites';return true\">Add
to Favorites", "Add to Favorites", "<a href=\"../at_home/at_home_search.html\"../_home/at_home_search.html\">At",
"At", "E-mail: <a href=\"\"mailto:XXXXXX@example.com\" \">XXXXXX@example.com </a>",
"E-mail: XXXXXX@example.com ", "<li class=\"farsi\"><a title=\"A'13?\" alt=\"A'13?\"
href=\"http://www.america.gov/persian\" alt=\"\" name=\"A'13?\"A'13? title=\"A'13?\">A'13?</a></li>",
"\nA'13?\n", "<li><a href=\"#28\" title=\"Hubert \"Geese\" Ausby\">Hubert \"Geese\"
Ausby</a></li>", "\nHubert \"Geese\" Ausby\n", "<href=\"http://anbportal.com/mms/login.asp\">",
"\n", "<a href=\"", "<a href=\"", "<a href=\">", "", "<a rel=\"nofollow\" href=\"http://anissanina31.skyrock.c
 om/1895039493-Hi-tout-le-monde.html\" title=\" Hi, tout le monde !>#</a>", "#",
"<a href=\"http://annunciharleydavidsonusate.myblog.it/\" title=\"Annunci Moto e Accessori
Harley Davidson\" target=\"_blank\"><img src=\"http://annunciharleydavidsonusate.myblog.it/images/Antipixel.gif\"
/></a>", "", "<a href=\"video/addvideo&v=120838887181\" onClick=\"return confirm('Are
you sure you want  add this video to your profile? If it exists some video in your profile
will be overlapped by this video!!')\" \" onmouseover=\"this.className='border2'\" onmouseout=\"this.className=''\">",
"", "<a href=#Services & Support>", "", "<input type=\"image\" src=\"http://apologyindex.com/ThemeFiles/83401-72905/images/btn_search.gif\"value=\"Search\"
name=\"Search\" alt=\"Search\" class=\"searchimage\" onclick=\"incom ='&sc=' + document.getElementById('sel').value
; var dt ='&dt=' + document.getElementById('dt').value; var searchKeyword = document.getElementById('q').value
; searchKeyword = searchKeyword.r
 eplace(/\\s/g,''); if (searchKeyword.length < 3){alert('Nothing to search. Search keyword
should contain atleast 3 chars.'); return false; } var al='&al=' +  document.getElementById('advancedlink').style.display
;  document.location.href='http://apologyindex.com/search.aspx?q=' + document.getElementById('q').value
+ incom + dt + al;\" />", "", "<input type=\"image\" src=\"images/afbe.gif\" width=\"22\"
height=\"22\"  hspace=\"4\" title=\"Add to Favorite\" alt=\"Add to Favorite\"onClick=\" if(window.sidebar){
window.sidebar.addPanel(document.title,location.href,''); }else if(window.external){ window.external.AddFavorite(location.href,document.title);
}else if(window.opera&&window.print) { return true; }\">", "", "<area shape=\"rect\"
coords=\"12,153,115,305\" href=\"http://statenislandtalk.com/v-web/gallery/Osmundsen-family\"Art's
Norwegian Roots in Rogaland\">", "\n", "<a rel=\"nofollow\" href=\"http://arth26.skyrock.com/660188240-bonzai.html\"
title=\"bonza>#", "#", "<a href=  >", 
 "", "<ahref=http:..", "<ahref=http:..", "<ahref=http:..>", "\n", "<ahref=\"http://aseigo.bddf.ca/cms/1025\">A",
"\nA", "<a href=\"javascript:calendar_window=window.open('/calendar.aspx?formname=frmCalendar.txtDate','calendar_window','width=154,height=188');calendar_window.focus()\">",
"", "<a href=\"/applications/defenseaerospace/19+rackmounts\" title=\"19\" Rackmounts\">",
"", "<a href=http://www.azimprimerie.fr/flash/backup/lewes-zip-code/savage-model-110-manual.html
title=savage model 110 manual rel=dofollow>", "", "<a class=\"at\" name=\"Lamborghini
 href=\"http://lamborghini.coolbegin.com\">Lamborghini /a>", "Lamborghini /a>", "<A
href='newslink.php?news_link=http%3A%2F%2Fwww.worldnetdaily.com%2Findex.php%3Ffa%3DPAGE.view%26pageId%3D85729&news_title=Florida
QB makes 'John 3:16' hottest Google search Tebow inscribed Bible reference on eye black for
championship game' TARGET=_blank>", "", "<a href=/myspace !style='color:#993333'>",
"", "<meta name=3DProgId content=3DExcel.Sheet>"
 , "\n", "<link id=3D\"shLink\" href=3D\"PSABrKelly-BADMINTONCupResults08FINAL2008_09_19=_files/sheet004.htm\">",
"\n", "<td bgcolor=3D\"#FFFFFF\" nowrap>", "\n", "<a href=\"http://basnect.info/usersearch/\"predicciones-mundiales-2009\".html\">\"predicciones
mundiales 2009\"</a>", "\"predicciones mundiales 2009\"", "<a class=\"comment-link\"
href=\"https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588\"location.href=https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588;>",
"", "<a href = \"/videos/Bishop\"/\" title = \"click to see more Bishop\" videos\">Bishop\"</a>",
"Bishop\"", "<a href=\"http://bhaa.ie/calendar/event.php?eid=20081203150127531\"\">BHAA
Eircom 2 &amp; 5 miles CC combined start</a>", "BHAA Eircom 2 & 5 miles CC combined
start", "<a href=\"http://people.tribe.net/wolfmana\" onClick='setClick(\"Application[tribe].Person[bb7df210-9dc0-478c-917f-436b896bcb79]\")'\"
title=\"Mana\">", "", "<a  href=\"http://blog.edu-cyberpg.com/
 ct.ashx?id=6143c528-080c-4bb2-b765-5ec56c8256d3&url=http%3a%2f%2fwww.gsa.ac.uk%2fmackintoshsketchbook%2f\"\"
eudora=\"autourl\">", "", "<input type=\"text\" value=\"<search here>\">",
"<input type=\"text\" value=\"\n\">", "<input type=\"text\" value=\"<search here\">",
"<input type=\"text\" value=\"\n", "<input type=\"text\" value=\"search here>\">",
"\">", "<input type=\"text\" value=\"&lt;search here&gt;\" onFocus=\"this.value='<search
here>'\">", "", "<![if ! IE]>\n<link href=\"http://i.deviantart.com/icons/favicon.png\"
rel=\"shortcut icon\"/>\n<![endif]>", "\n\n\n", "<![if supportMisalignedColumns]>\n<tr
height=0 style='display:none'>\n<td width=64 style='width:48pt'></td>\n</tr>\n<![endif]>",
"\n\n\n\n\n\n\n\n" };
+            string[] testGold = {
+                "a <a hr<ef=aa<a>> </close</a>",
+                "a <a hr<ef=aa> </close",
+
+                "<a href=http://dmoz.org/cgi-bin/add.cgi?where=/arts/\" class=lu style=\"font-size:
9px\" target=dmoz>Submit a Site</a>",
+                "Submit a Site",
+
+                "<a href=javascript:ioSwitch('p8','http://www.csmonitor.com/') title=expand
id=e8 class=expanded rel=http://www.csmonitor.com/>Christian Science",
+                "Christian Science",
+
+                "<link rel=\"alternate\" type=\"application/rss+xml\" title=\"San Francisco
\" 2008 RSS Feed\" href=\"http://2008.sf.wordcamp.org/feed/\" />",
+                "\n",
+
+                "<a href=\" http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery
symptoms.html, heat congestive heart failure <a href=\" http://www.symptoms1bad.happyhost.org/canine",
+                "<a href=\" http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery
symptoms.html, heat congestive heart failure <a href=\" http://www.symptoms1bad.happyhost.org/canine",
+
+                "<a href=\"http://ucblibraries.colorado.edu/how/index.htm\"class=\"pageNavAreaText\">",
+                "",
+
+                "<link title=\"^\\\" 21Sta's Blog\" rel=\"search\"  type=\"application/opensearchdescription+xml\"
 href=\"http://21sta.com/blog/inc/opensearch.php\" />",
+                "\n",
+
+                "<a href=\"#postcomment\" title=\"\"Leave a comment\";\">?",
+                "?",
+
+                "<a href='/modern-furniture'   ' id='21txt' class='offtab'   onMouseout=\"this.className='offtab';
 return true;\" onMouseover=\"this.className='ontab';  return true;\">",
+                "",
+
+                "<a href='http://alievi.wordpress.com/category/01-todos-posts/' style='font-size:
275%; padding: 1px; margin: 1px;' title='01 - Todos Post's (83)'>",
+                "",
+
+                "The <a href=<a href=\"http://www.advancedmd.com>medical\">http://www.advancedmd.com>medical</a>
practice software</a>",
+                "The <a href=medical\">http://www.advancedmd.com>medical practice
software",
+
+                "<a href=\"node/21426\" class=\"clipTitle2\" title=\"Levi.com/BMX 2008
Clip of the Week 29 \"Morgan Wade Leftover Clips\"\">Levi.com/BMX 2008 Clip of the Week
29...",
+                "Levi.com/BMX 2008 Clip of the Week 29...",
+
+                "<a href=\"printer_friendly.php?branch=&year=&submit=go&screen=\";\">Printer
Friendly",
+                "Printer Friendly",
+
+                "<a href=#\" ondragstart=\"return false\" onclick=\"window.external.AddFavorite('http://www.amazingtextures.com',
'Amazing Textures');return false\" onmouseover=\"window.status='Add to Favorites';return true\">Add
to Favorites",
+                "Add to Favorites",
+
+                "<a href=\"../at_home/at_home_search.html\"../_home/at_home_search.html\">At",
+                "At",
+
+                "E-mail: <a href=\"\"mailto:XXXXXX@example.com\" \">XXXXXX@example.com
</a>",
+                "E-mail: XXXXXX@example.com ",
+
+                "<li class=\"farsi\"><a title=\"A'13?\" alt=\"A'13?\" href=\"http://www.america.gov/persian\"
alt=\"\" name=\"A'13?\"A'13? title=\"A'13?\">A'13?</a></li>",
+                "\nA'13?\n",
+
+                "<li><a href=\"#28\" title=\"Hubert \"Geese\" Ausby\">Hubert
\"Geese\" Ausby</a></li>",
+                "\nHubert \"Geese\" Ausby\n",
+
+                "<href=\"http://anbportal.com/mms/login.asp\">",
+                "\n",
+
+                "<a href=\"",
+                "<a href=\"",
+
+                "<a href=\">",
+                "",
+
+                "<a rel=\"nofollow\" href=\"http://anissanina31.skyrock.com/1895039493-Hi-tout-le-monde.html\"
title=\" Hi, tout le monde !>#</a>",
+                "#",
+
+                "<a href=\"http://annunciharleydavidsonusate.myblog.it/\" title=\"Annunci
Moto e Accessori Harley Davidson\" target=\"_blank\"><img src=\"http://annunciharleydavidsonusate.myblog.it/images/Antipixel.gif\"
/></a>",
+                "",
+
+                "<a href=\"video/addvideo&v=120838887181\" onClick=\"return confirm('Are
you sure you want  add this video to your profile? If it exists some video in your profile
will be overlapped by this video!!')\" \" onmouseover=\"this.className='border2'\" onmouseout=\"this.className=''\">",
+                "",
+
+                "<a href=#Services & Support>",
+                "",
+
+                "<input type=\"image\" src=\"http://apologyindex.com/ThemeFiles/83401-72905/images/btn_search.gif\"value=\"Search\"
name=\"Search\" alt=\"Search\" class=\"searchimage\" onclick=\"incom ='&sc=' + document.getElementById('sel').value
; var dt ='&dt=' + document.getElementById('dt').value; var searchKeyword = document.getElementById('q').value
; searchKeyword = searchKeyword.replace(/\\s/g,''); if (searchKeyword.length < 3){alert('Nothing
to search. Search keyword should contain atleast 3 chars.'); return false; } var al='&al='
+  document.getElementById('advancedlink').style.display ;  document.location.href='http://apologyindex.com/search.aspx?q='
+ document.getElementById('q').value + incom + dt + al;\" />",
+                "",
+
+                "<input type=\"image\" src=\"images/afbe.gif\" width=\"22\" height=\"22\"
 hspace=\"4\" title=\"Add to Favorite\" alt=\"Add to Favorite\"onClick=\" if(window.sidebar){
window.sidebar.addPanel(document.title,location.href,''); }else if(window.external){ window.external.AddFavorite(location.href,document.title);
}else if(window.opera&&window.print) { return true; }\">",
+                "",
+
+                "<area shape=\"rect\" coords=\"12,153,115,305\" href=\"http://statenislandtalk.com/v-web/gallery/Osmundsen-family\"Art's
Norwegian Roots in Rogaland\">",
+                "\n",
+
+                "<a rel=\"nofollow\" href=\"http://arth26.skyrock.com/660188240-bonzai.html\"
title=\"bonza>#",
+                "#",
+
+                "<a href=  >",
+                "",
+
+                "<ahref=http:..",
+                "<ahref=http:..",
+
+                "<ahref=http:..>",
+                "\n",
+
+                "<ahref=\"http://aseigo.bddf.ca/cms/1025\">A",
+                "\nA",
+
+                "<a href=\"javascript:calendar_window=window.open('/calendar.aspx?formname=frmCalendar.txtDate','calendar_window','width=154,height=188');calendar_window.focus()\">",
+                "",
+
+                "<a href=\"/applications/defenseaerospace/19+rackmounts\" title=\"19\"
Rackmounts\">",
+                "",
+
+                "<a href=http://www.azimprimerie.fr/flash/backup/lewes-zip-code/savage-model-110-manual.html
title=savage model 110 manual rel=dofollow>",
+                "",
+
+                "<a class=\"at\" name=\"Lamborghini  href=\"http://lamborghini.coolbegin.com\">Lamborghini
/a>",
+                "Lamborghini /a>",
+
+                "<A href='newslink.php?news_link=http%3A%2F%2Fwww.worldnetdaily.com%2Findex.php%3Ffa%3DPAGE.view%26pageId%3D85729&news_title=Florida
QB makes 'John 3:16' hottest Google search Tebow inscribed Bible reference on eye black for
championship game' TARGET=_blank>",
+                "",
+
+                "<a href=/myspace !style='color:#993333'>",
+                "",
+
+                "<meta name=3DProgId content=3DExcel.Sheet>",
+                "\n",
+
+                "<link id=3D\"shLink\" href=3D\"PSABrKelly-BADMINTONCupResults08FINAL2008_09_19=_files/sheet004.htm\">",
+                "\n",
+
+                "<td bgcolor=3D\"#FFFFFF\" nowrap>",
+                "\n",
+
+                "<a href=\"http://basnect.info/usersearch/\"predicciones-mundiales-2009\".html\">\"predicciones
mundiales 2009\"</a>",
+                "\"predicciones mundiales 2009\"",
+
+                "<a class=\"comment-link\" href=\"https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588\"location.href=https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588;>",
+                "",
+
+                "<a href = \"/videos/Bishop\"/\" title = \"click to see more Bishop\"
videos\">Bishop\"</a>",
+                "Bishop\"",
+
+                "<a href=\"http://bhaa.ie/calendar/event.php?eid=20081203150127531\"\">BHAA
Eircom 2 &amp; 5 miles CC combined start</a>",
+                "BHAA Eircom 2 & 5 miles CC combined start",
+
+                "<a href=\"http://people.tribe.net/wolfmana\" onClick='setClick(\"Application[tribe].Person[bb7df210-9dc0-478c-917f-436b896bcb79]\")'\"
title=\"Mana\">",
+                "",
+
+                "<a  href=\"http://blog.edu-cyberpg.com/ct.ashx?id=6143c528-080c-4bb2-b765-5ec56c8256d3&url=http%3a%2f%2fwww.gsa.ac.uk%2fmackintoshsketchbook%2f\"\"
eudora=\"autourl\">",
+                "",
+
+                "<input type=\"text\" value=\"<search here>\">",
+                "<input type=\"text\" value=\"\n\">",
+
+                "<input type=\"text\" value=\"<search here\">",
+                "<input type=\"text\" value=\"\n",
+
+                "<input type=\"text\" value=\"search here>\">",
+                "\">",
+
+                "<input type=\"text\" value=\"&lt;search here&gt;\" onFocus=\"this.value='<search
here>'\">",
+                "",
+
+                "<![if ! IE]>\n<link href=\"http://i.deviantart.com/icons/favicon.png\"
rel=\"shortcut icon\"/>\n<![endif]>",
+                "\n\n\n",
+
+                "<![if supportMisalignedColumns]>\n<tr height=0 style='display:none'>\n<td
width=64 style='width:48pt'></td>\n</tr>\n<![endif]>",
+                "\n\n\n\n\n\n\n\n"
+            };
             for (int i = 0; i < testGold.Length; i += 2)
             {
                 AssertHTMLStripsTo(testGold[i], testGold[i + 1], null);
@@ -249,7 +417,8 @@ namespace Lucene.Net.Analysis.CharFilters
             while ((ch = reader.Read()) > 0)
             {
                 int correction = reader.CorrectOffset(off);
-                assertTrue("invalid offset correction: " + off + "->" + correction + "
for doc of length: " + length, correction <= length);
+                assertTrue("invalid offset correction: " + off + "->" + correction + "
for doc of length: " + length, 
+                    correction <= length);
                 off++;
             }
         }
@@ -284,7 +453,9 @@ namespace Lucene.Net.Analysis.CharFilters
         [Test]
         public virtual void TestServerSideIncludes()
         {
-            string test = "one<img src=\"image.png\"\n" + " alt =  \"Alt: <!--#echo
var='${IMAGE_CAPTION:<!--comment-->\\'Comment\\'}'  -->\"\n\n" + " title=\"Title:
<!--#echo var=\"IMAGE_CAPTION\"-->\">two";
+            string test = "one<img src=\"image.png\"\n" + 
+                " alt =  \"Alt: <!--#echo var='${IMAGE_CAPTION:<!--comment-->\\'Comment\\'}'
 -->\"\n\n" + 
+                " title=\"Title: <!--#echo var=\"IMAGE_CAPTION\"-->\">two";
             string gold = "onetwo";
             AssertHTMLStripsTo(test, gold, null);
 
@@ -317,7 +488,11 @@ namespace Lucene.Net.Analysis.CharFilters
         [Test]
         public virtual void TestStyle()
         {
-            string test = "one<style type=\"text/css\">\n" + "<!--\n" + "@import
url('http://www.lasletrasdecanciones.com/css.css');\n" + "-->\n" + "</style>two";
+            string test = "one<style type=\"text/css\">\n" + 
+                "<!--\n" + 
+                "@import url('http://www.lasletrasdecanciones.com/css.css');\n" + 
+                "-->\n" + 
+                "</style>two";
             string gold = "one\ntwo";
             AssertHTMLStripsTo(test, gold, null);
         }
@@ -334,7 +509,13 @@ namespace Lucene.Net.Analysis.CharFilters
         [Test]
         public virtual void TestBR()
         {
-            string[] testGold = new string[] { "one<BR />two<br>three", "one\ntwo\nthree",
"one<BR some stuff here too>two</BR>", "one\ntwo\n" };
+            string[] testGold = {
+                "one<BR />two<br>three",
+                "one\ntwo\nthree",
+
+                "one<BR some stuff here too>two</BR>",
+                "one\ntwo\n"
+            };
             for (int i = 0; i < testGold.Length; i += 2)
             {
                 AssertHTMLStripsTo(testGold[i], testGold[i + 1], null);
@@ -361,13 +542,37 @@ namespace Lucene.Net.Analysis.CharFilters
         public virtual void TestCDATA()
         {
             int maxNumElems = 100;
-            string randomHtmlishString1 = TestUtil.RandomHtmlishString(Random(), maxNumElems).Replace(">",
" ").replaceFirst("^--", "__"); // Don't create a comment (disallow "<!--") and don't include
a closing ">"
+            string randomHtmlishString1 // Don't create a comment (disallow "<!--") and
don't include a closing ">"
+                = TestUtil.RandomHtmlishString(Random(), maxNumElems).Replace(">", " ").replaceFirst("^--",
"__");
             string closedAngleBangNonCDATA = "<!" + randomHtmlishString1 + "-[CDATA[&]]>";
 
-            string randomHtmlishString2 = TestUtil.RandomHtmlishString(Random(), maxNumElems).Replace(">",
" ").replaceFirst("^--", "__"); // Don't create a comment (disallow "<!--") and don't include
a closing ">"
+            string randomHtmlishString2 // Don't create a comment (disallow "<!--") and
don't include a closing ">"
+                = TestUtil.RandomHtmlishString(Random(), maxNumElems).Replace(">", " ").replaceFirst("^--",
"__");
             string unclosedAngleBangNonCDATA = "<!" + randomHtmlishString1 + "-[CDATA[";
 
-            string[] testGold = new string[] { "one<![CDATA[<one><two>three<four></four></two></one>]]>two",
"one<one><two>three<four></four></two></one>two", "one<![CDATA[two<![CDATA[three]]]]><![CDATA[>four]]>five",
"onetwo<![CDATA[three]]>fourfive", "<! [CDATA[&]]>", "", "<! [CDATA[&]
] >", "", "<! [CDATA[&]]", "<! [CDATA[&]]", "<!\u2009[CDATA[&]]>",
"", "<!\u2009[CDATA[&]\u2009]\u2009>", "", "<!\u2009[CDATA[&]\u2009]\u2009",
"<!\u2009[CDATA[&]\u2009]\u2009", closedAngleBangNonCDATA, "", "<![CDATA[", "",
"<![CDATA[<br>", "<br>", "<![CDATA[<br>]]", "<br>]]", "<![CDATA[<br>]]>",
"<br>", "<![CDATA[<br>] ] >", "<br>] ] >", "<![CDATA[<br>]\u2009]\u2009>",
"<br>]\u2009]\u2009>", "<!\u2009[CDATA[", "<!\u2009[CDATA[", unclosedAngleBangNonCDATA,
unclosedAngleBangNonCDATA };
+            string[] testGold = {
+                "one<![CDATA[<one><two>three<four></four></two></one>]]>two",
+                "one<one><two>three<four></four></two></one>two",
+
+                "one<![CDATA[two<![CDATA[three]]]]><![CDATA[>four]]>five",
+                "onetwo<![CDATA[three]]>fourfive",
+
+                "<! [CDATA[&]]>", "",
+                "<! [CDATA[&] ] >", "",
+                "<! [CDATA[&]]", "<! [CDATA[&]]", // unclosed angle bang -
all input is output
+                "<!\u2009[CDATA[&]]>", "",
+                "<!\u2009[CDATA[&]\u2009]\u2009>", "",
+                "<!\u2009[CDATA[&]\u2009]\u2009", "<!\u2009[CDATA[&]\u2009]\u2009",
// unclosed angle bang - all input is output
+                closedAngleBangNonCDATA, "",
+                "<![CDATA[", "",
+                "<![CDATA[<br>", "<br>",
+                "<![CDATA[<br>]]", "<br>]]",
+                "<![CDATA[<br>]]>", "<br>",
+                "<![CDATA[<br>] ] >", "<br>] ] >",
+                "<![CDATA[<br>]\u2009]\u2009>", "<br>]\u2009]\u2009>",
+                "<!\u2009[CDATA[", "<!\u2009[CDATA[",
+                unclosedAngleBangNonCDATA, unclosedAngleBangNonCDATA
+            };
             for (int i = 0; i < testGold.Length; i += 2)
             {
                 AssertHTMLStripsTo(testGold[i], testGold[i + 1], null);
@@ -525,5 +730,4 @@ namespace Lucene.Net.Analysis.CharFilters
             assertEquals("'" + builder.ToString() + "' is not equal to '" + gold + "'", gold,
builder.ToString());
         }
     }
-
 }
\ No newline at end of file


Mime
View raw message