lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [25/27] lucenenet git commit: adding converted analysis common tests
Date Thu, 10 Dec 2015 18:39:14 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/HTMLStripCharFilterTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/HTMLStripCharFilterTest.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/HTMLStripCharFilterTest.cs
new file mode 100644
index 0000000..81bdd31
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/HTMLStripCharFilterTest.cs
@@ -0,0 +1,570 @@
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.charfilter
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using TestUtil = org.apache.lucene.util.TestUtil;
+
+	public class HTMLStripCharFilterTest : BaseTokenStreamTestCase
+	{
+
+	  private static Analyzer newTestAnalyzer()
+	  {
+		return new AnalyzerAnonymousInnerClassHelper();
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  public AnalyzerAnonymousInnerClassHelper()
+		  {
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+			return new TokenStreamComponents(tokenizer, tokenizer);
+		  }
+
+		  protected internal override Reader initReader(string fieldName, Reader reader)
+		  {
+			return new HTMLStripCharFilter(reader);
+		  }
+	  }
+
+	  //this is some text  here is a  link  and another  link . This is an entity: & plus a <.  Here is an &
+	  //
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test() throws Exception
+	  public virtual void test()
+	  {
+		string html = "<div class=\"foo\">this is some text</div> here is a <a href=\"#bar\">link</a> and " + "another <a href=\"http://lucene.apache.org/\">link</a>. " + "This is an entity: &amp; plus a &lt;.  Here is an &. <!-- is a comment -->";
+		string gold = "\nthis is some text\n here is a link and " + "another link. " + "This is an entity: & plus a <.  Here is an &. ";
+		assertHTMLStripsTo(html, gold, null);
+	  }
+
+	  //Some sanity checks, but not a full-fledged check
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testHTML() throws Exception
+	  public virtual void testHTML()
+	  {
+		System.IO.Stream stream = this.GetType().getResourceAsStream("htmlStripReaderTest.html");
+		HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(stream, Encoding.UTF8));
+		StringBuilder builder = new StringBuilder();
+		int ch = -1;
+		while ((ch = reader.read()) != -1)
+		{
+		  builder.Append((char)ch);
+		}
+		string str = builder.ToString();
+		assertTrue("Entity not properly escaped", str.IndexOf("&lt;", StringComparison.Ordinal) == -1); //there is one > in the text
+		assertTrue("Forrest should have been stripped out", str.IndexOf("forrest", StringComparison.Ordinal) == -1 && str.IndexOf("Forrest", StringComparison.Ordinal) == -1);
+		assertTrue("File should start with 'Welcome to Solr' after trimming", str.Trim().StartsWith("Welcome to Solr", StringComparison.Ordinal));
+
+		assertTrue("File should start with 'Foundation.' after trimming", str.Trim().EndsWith("Foundation.", StringComparison.Ordinal));
+
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMSWord14GeneratedHTML() throws Exception
+	  public virtual void testMSWord14GeneratedHTML()
+	  {
+		System.IO.Stream stream = this.GetType().getResourceAsStream("MS-Word 14 generated.htm");
+		HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(stream, Encoding.UTF8));
+		string gold = "This is a test";
+		StringBuilder builder = new StringBuilder();
+		int ch = 0;
+		while ((ch = reader.read()) != -1)
+		{
+		  builder.Append((char)ch);
+		}
+		// Compare trim()'d output to gold
+		assertEquals("'" + builder.ToString().Trim() + "' is not equal to '" + gold + "'", gold, builder.ToString().Trim());
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testGamma() throws Exception
+	  public virtual void testGamma()
+	  {
+		assertHTMLStripsTo("&Gamma;", "\u0393", new HashSet<>(Arrays.asList("reserved")));
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEntities() throws Exception
+	  public virtual void testEntities()
+	  {
+		string test = "&nbsp; &lt;foo&gt; &Uuml;bermensch &#61; &Gamma; bar &#x393;";
+		string gold = "  <foo> \u00DCbermensch = \u0393 bar \u0393";
+		assertHTMLStripsTo(test, gold, new HashSet<>(Arrays.asList("reserved")));
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMoreEntities() throws Exception
+	  public virtual void testMoreEntities()
+	  {
+		string test = "&nbsp; &lt;junk/&gt; &nbsp; &#33; &#64; and &#8217;";
+		string gold = "  <junk/>   ! @ and ’";
+		assertHTMLStripsTo(test, gold, new HashSet<>(Arrays.asList("reserved")));
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReserved() throws Exception
+	  public virtual void testReserved()
+	  {
+		string test = "aaa bbb <reserved ccc=\"ddddd\"> eeee </reserved> ffff <reserved ggg=\"hhhh\"/> <other/>";
+		ISet<string> set = new HashSet<string>();
+		set.Add("reserved");
+		Reader reader = new HTMLStripCharFilter(new StringReader(test), set);
+		StringBuilder builder = new StringBuilder();
+		int ch = 0;
+		while ((ch = reader.read()) != -1)
+		{
+		  builder.Append((char)ch);
+		}
+		string result = builder.ToString();
+		// System.out.println("Result: " + result);
+		assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", StringComparison.Ordinal), result.IndexOf("reserved", StringComparison.Ordinal) == 9);
+		assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", 15, StringComparison.Ordinal), result.IndexOf("reserved", 15, StringComparison.Ordinal) == 38);
+		assertTrue("Escaped tag not preserved: " + result.IndexOf("reserved", 41, StringComparison.Ordinal), result.IndexOf("reserved", 41, StringComparison.Ordinal) == 54);
+		assertTrue("Other tag should be removed", result.IndexOf("other", StringComparison.Ordinal) == -1);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMalformedHTML() throws Exception
+	  public virtual void testMalformedHTML()
+	  {
+		string[] testGold = new string[] {"a <a hr<ef=aa<a>> </close</a>", "a <a hr<ef=aa> </close", "<a href=http://dmoz.org/cgi-bin/add.cgi?where=/arts/\" class=lu style=\"font-size: 9px\" target=dmoz>Submit a Site</a>", "Submit a Site", "<a href=javascript:ioSwitch('p8','http://www.csmonitor.com/') title=expand id=e8 class=expanded rel=http://www.csmonitor.com/>Christian Science", "Christian Science", "<link rel=\"alternate\" type=\"application/rss+xml\" title=\"San Francisco \" 2008 RSS Feed\" href=\"http://2008.sf.wordcamp.org/feed/\" />", "\n", "<a href=\" http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery symptoms.html, heat congestive heart failure <a href=\" http://www.symptoms1bad.happyhost.org/canine", "<a href=\" http://www.surgery4was.happyhost.org/video-of-arthroscopic-knee-surgery symptoms.html, heat congestive heart failure <a href=\" http://www.symptoms1bad.happyhost.org/canine", "<a href=\"http://ucblibraries.colorado.edu/how/index.htm\"class=\"pageN
 avAreaText\">", "", "<link title=\"^\\\" 21Sta's Blog\" rel=\"search\"  type=\"application/opensearchdescription+xml\"  href=\"http://21sta.com/blog/inc/opensearch.php\" />", "\n", "<a href=\"#postcomment\" title=\"\"Leave a comment\";\">?", "?", "<a href='/modern-furniture'   ' id='21txt' class='offtab'   onMouseout=\"this.className='offtab';  return true;\" onMouseover=\"this.className='ontab';  return true;\">", "", "<a href='http://alievi.wordpress.com/category/01-todos-posts/' style='font-size: 275%; padding: 1px; margin: 1px;' title='01 - Todos Post's (83)'>", "", "The <a href=<a href=\"http://www.advancedmd.com>medical\">http://www.advancedmd.com>medical</a> practice software</a>", "The <a href=medical\">http://www.advancedmd.com>medical practice software", "<a href=\"node/21426\" class=\"clipTitle2\" title=\"Levi.com/BMX 2008 Clip of the Week 29 \"Morgan Wade Leftover Clips\"\">Levi.com/BMX 2008 Clip of the Week 29...", "Levi.com/BMX 2008 Clip of the Week 29...", "<a href=\"
 printer_friendly.php?branch=&year=&submit=go&screen=\";\">Printer Friendly", "Printer Friendly", "<a href=#\" ondragstart=\"return false\" onclick=\"window.external.AddFavorite('http://www.amazingtextures.com', 'Amazing Textures');return false\" onmouseover=\"window.status='Add to Favorites';return true\">Add to Favorites", "Add to Favorites", "<a href=\"../at_home/at_home_search.html\"../_home/at_home_search.html\">At", "At", "E-mail: <a href=\"\"mailto:XXXXXX@example.com\" \">XXXXXX@example.com </a>", "E-mail: XXXXXX@example.com ", "<li class=\"farsi\"><a title=\"A'13?\" alt=\"A'13?\" href=\"http://www.america.gov/persian\" alt=\"\" name=\"A'13?\"A'13? title=\"A'13?\">A'13?</a></li>", "\nA'13?\n", "<li><a href=\"#28\" title=\"Hubert \"Geese\" Ausby\">Hubert \"Geese\" Ausby</a></li>", "\nHubert \"Geese\" Ausby\n", "<href=\"http://anbportal.com/mms/login.asp\">", "\n", "<a href=\"", "<a href=\"", "<a href=\">", "", "<a rel=\"nofollow\" href=\"http://anissanina31.skyrock.com/18950394
 93-Hi-tout-le-monde.html\" title=\" Hi, tout le monde !>#</a>", "#", "<a href=\"http://annunciharleydavidsonusate.myblog.it/\" title=\"Annunci Moto e Accessori Harley Davidson\" target=\"_blank\"><img src=\"http://annunciharleydavidsonusate.myblog.it/images/Antipixel.gif\" /></a>", "", "<a href=\"video/addvideo&v=120838887181\" onClick=\"return confirm('Are you sure you want  add this video to your profile? If it exists some video in your profile will be overlapped by this video!!')\" \" onmouseover=\"this.className='border2'\" onmouseout=\"this.className=''\">", "", "<a href=#Services & Support>", "", "<input type=\"image\" src=\"http://apologyindex.com/ThemeFiles/83401-72905/images/btn_search.gif\"value=\"Search\" name=\"Search\" alt=\"Search\" class=\"searchimage\" onclick=\"incom ='&sc=' + document.getElementById('sel').value ; var dt ='&dt=' + document.getElementById('dt').value; var searchKeyword = document.getElementById('q').value ; searchKeyword = searchKeyword.replace(/\\s
 /g,''); if (searchKeyword.length < 3){alert('Nothing to search. Search keyword should contain atleast 3 chars.'); return false; } var al='&al=' +  document.getElementById('advancedlink').style.display ;  document.location.href='http://apologyindex.com/search.aspx?q=' + document.getElementById('q').value + incom + dt + al;\" />", "", "<input type=\"image\" src=\"images/afbe.gif\" width=\"22\" height=\"22\"  hspace=\"4\" title=\"Add to Favorite\" alt=\"Add to Favorite\"onClick=\" if(window.sidebar){ window.sidebar.addPanel(document.title,location.href,''); }else if(window.external){ window.external.AddFavorite(location.href,document.title); }else if(window.opera&&window.print) { return true; }\">", "", "<area shape=\"rect\" coords=\"12,153,115,305\" href=\"http://statenislandtalk.com/v-web/gallery/Osmundsen-family\"Art's Norwegian Roots in Rogaland\">", "\n", "<a rel=\"nofollow\" href=\"http://arth26.skyrock.com/660188240-bonzai.html\" title=\"bonza>#", "#", "<a href=  >", "", "<ahref
 =http:..", "<ahref=http:..", "<ahref=http:..>", "\n", "<ahref=\"http://aseigo.bddf.ca/cms/1025\">A", "\nA", "<a href=\"javascript:calendar_window=window.open('/calendar.aspx?formname=frmCalendar.txtDate','calendar_window','width=154,height=188');calendar_window.focus()\">", "", "<a href=\"/applications/defenseaerospace/19+rackmounts\" title=\"19\" Rackmounts\">", "", "<a href=http://www.azimprimerie.fr/flash/backup/lewes-zip-code/savage-model-110-manual.html title=savage model 110 manual rel=dofollow>", "", "<a class=\"at\" name=\"Lamborghini  href=\"http://lamborghini.coolbegin.com\">Lamborghini /a>", "Lamborghini /a>", "<A href='newslink.php?news_link=http%3A%2F%2Fwww.worldnetdaily.com%2Findex.php%3Ffa%3DPAGE.view%26pageId%3D85729&news_title=Florida QB makes 'John 3:16' hottest Google search Tebow inscribed Bible reference on eye black for championship game' TARGET=_blank>", "", "<a href=/myspace !style='color:#993333'>", "", "<meta name=3DProgId content=3DExcel.Sheet>", "\n", "<l
 ink id=3D\"shLink\" href=3D\"PSABrKelly-BADMINTONCupResults08FINAL2008_09_19=_files/sheet004.htm\">", "\n", "<td bgcolor=3D\"#FFFFFF\" nowrap>", "\n", "<a href=\"http://basnect.info/usersearch/\"predicciones-mundiales-2009\".html\">\"predicciones mundiales 2009\"</a>", "\"predicciones mundiales 2009\"", "<a class=\"comment-link\" href=\"https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588\"location.href=https://www.blogger.com/comment.g?blogID=19402125&postID=114070605958684588;>", "", "<a href = \"/videos/Bishop\"/\" title = \"click to see more Bishop\" videos\">Bishop\"</a>", "Bishop\"", "<a href=\"http://bhaa.ie/calendar/event.php?eid=20081203150127531\"\">BHAA Eircom 2 &amp; 5 miles CC combined start</a>", "BHAA Eircom 2 & 5 miles CC combined start", "<a href=\"http://people.tribe.net/wolfmana\" onClick='setClick(\"Application[tribe].Person[bb7df210-9dc0-478c-917f-436b896bcb79]\")'\" title=\"Mana\">", "", "<a  href=\"http://blog.edu-cyberpg.com/ct.ashx?id=
 6143c528-080c-4bb2-b765-5ec56c8256d3&url=http%3a%2f%2fwww.gsa.ac.uk%2fmackintoshsketchbook%2f\"\" eudora=\"autourl\">", "", "<input type=\"text\" value=\"<search here>\">", "<input type=\"text\" value=\"\n\">", "<input type=\"text\" value=\"<search here\">", "<input type=\"text\" value=\"\n", "<input type=\"text\" value=\"search here>\">", "\">", "<input type=\"text\" value=\"&lt;search here&gt;\" onFocus=\"this.value='<search here>'\">", "", "<![if ! IE]>\n<link href=\"http://i.deviantart.com/icons/favicon.png\" rel=\"shortcut icon\"/>\n<![endif]>", "\n\n\n", "<![if supportMisalignedColumns]>\n<tr height=0 style='display:none'>\n<td width=64 style='width:48pt'></td>\n</tr>\n<![endif]>", "\n\n\n\n\n\n\n\n"};
+		for (int i = 0 ; i < testGold.Length ; i += 2)
+		{
+		  assertHTMLStripsTo(testGold[i], testGold[i + 1], null);
+		}
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBufferOverflow() throws Exception
+	  public virtual void testBufferOverflow()
+	  {
+		StringBuilder testBuilder = new StringBuilder(HTMLStripCharFilter.InitialBufferSize + 50);
+		testBuilder.Append("ah<?> ??????");
+		appendChars(testBuilder, HTMLStripCharFilter.InitialBufferSize + 500);
+		Reader reader = new HTMLStripCharFilter(new System.IO.StreamReader(new StringReader(testBuilder.ToString()))); //force the use of BufferedReader
+		assertHTMLStripsTo(reader, testBuilder.ToString(), null);
+
+		testBuilder.Length = 0;
+		testBuilder.Append("<!--"); //comments
+		appendChars(testBuilder, 3 * HTMLStripCharFilter.InitialBufferSize + 500); //comments have two lookaheads
+
+		testBuilder.Append("-->foo");
+		string gold = "foo";
+		assertHTMLStripsTo(testBuilder.ToString(), gold, null);
+
+		testBuilder.Length = 0;
+		testBuilder.Append("<?");
+		appendChars(testBuilder, HTMLStripCharFilter.InitialBufferSize + 500);
+		testBuilder.Append("?>");
+		gold = "";
+		assertHTMLStripsTo(testBuilder.ToString(), gold, null);
+
+		testBuilder.Length = 0;
+		testBuilder.Append("<b ");
+		appendChars(testBuilder, HTMLStripCharFilter.InitialBufferSize + 500);
+		testBuilder.Append("/>");
+		gold = "";
+		assertHTMLStripsTo(testBuilder.ToString(), gold, null);
+	  }
+
+	  private void appendChars(StringBuilder testBuilder, int numChars)
+	  {
+		int i1 = numChars / 2;
+		for (int i = 0; i < i1; i++)
+		{
+		  testBuilder.Append('a').Append(' '); //tack on enough to go beyond the mark readahead limit, since <?> makes HTMLStripCharFilter think it is a processing instruction
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testComment() throws Exception
+	  public virtual void testComment()
+	  {
+		string test = "<!--- three dashes, still a valid comment ---> ";
+		string gold = " ";
+		assertHTMLStripsTo(test, gold, null);
+
+		test = "<! -- blah > "; // should not be recognized as a comment
+		gold = " ";
+		assertHTMLStripsTo(test, gold, null);
+
+		StringBuilder testBuilder = new StringBuilder("<!--");
+		appendChars(testBuilder, TestUtil.Next(random(), 0, 1000));
+		gold = "";
+		assertHTMLStripsTo(testBuilder.ToString(), gold, null);
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void doTestOffsets(String in) throws Exception
+	  public virtual void doTestOffsets(string @in)
+	  {
+		HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(new StringReader(@in)));
+		int ch = 0;
+		int off = 0; // offset in the reader
+		int strOff = -1; // offset in the original string
+		while ((ch = reader.read()) != -1)
+		{
+		  int correctedOff = reader.correctOffset(off);
+
+		  if (ch == 'X')
+		  {
+			strOff = @in.IndexOf('X',strOff + 1);
+			assertEquals(strOff, correctedOff);
+		  }
+
+		  off++;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testOffsets() throws Exception
+	  public virtual void testOffsets()
+	  {
+	//    doTestOffsets("hello X how X are you");
+		doTestOffsets("hello <p> X<p> how <p>X are you");
+		doTestOffsets("X &amp; X &#40; X &lt; &gt; X");
+
+		// test backtracking
+		doTestOffsets("X < &zz >X &# < X > < &l > &g < X");
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: static void assertLegalOffsets(String in) throws Exception
+	  internal static void assertLegalOffsets(string @in)
+	  {
+		int length = @in.Length;
+		HTMLStripCharFilter reader = new HTMLStripCharFilter(new System.IO.StreamReader(new StringReader(@in)));
+		int ch = 0;
+		int off = 0;
+		while ((ch = reader.read()) != -1)
+		{
+		  int correction = reader.correctOffset(off);
+		  assertTrue("invalid offset correction: " + off + "->" + correction + " for doc of length: " + length, correction <= length);
+		  off++;
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testLegalOffsets() throws Exception
+	  public virtual void testLegalOffsets()
+	  {
+		assertLegalOffsets("hello world");
+		assertLegalOffsets("hello &#x world");
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandom() throws Exception
+	  public virtual void testRandom()
+	  {
+		int numRounds = RANDOM_MULTIPLIER * 1000;
+		checkRandomData(random(), newTestAnalyzer(), numRounds);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomHugeStrings() throws Exception
+	  public virtual void testRandomHugeStrings()
+	  {
+		int numRounds = RANDOM_MULTIPLIER * 100;
+		checkRandomData(random(), newTestAnalyzer(), numRounds, 8192);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCloseBR() throws Exception
+	  public virtual void testCloseBR()
+	  {
+		checkAnalysisConsistency(random(), newTestAnalyzer(), random().nextBoolean(), " Secretary)</br> [[M");
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testServerSideIncludes() throws Exception
+	  public virtual void testServerSideIncludes()
+	  {
+		string test = "one<img src=\"image.png\"\n" + " alt =  \"Alt: <!--#echo var='${IMAGE_CAPTION:<!--comment-->\\'Comment\\'}'  -->\"\n\n" + " title=\"Title: <!--#echo var=\"IMAGE_CAPTION\"-->\">two";
+		string gold = "onetwo";
+		assertHTMLStripsTo(test, gold, null);
+
+		test = "one<script><!-- <!--#config comment=\"<!-- \\\"comment\\\"-->\"--> --></script>two";
+		gold = "one\ntwo";
+		assertHTMLStripsTo(test, gold, null);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testScriptQuotes() throws Exception
+	  public virtual void testScriptQuotes()
+	  {
+		string test = "one<script attr= bare><!-- action('<!-- comment -->', \"\\\"-->\\\"\"); --></script>two";
+		string gold = "one\ntwo";
+		assertHTMLStripsTo(test, gold, null);
+
+		test = "hello<script><!-- f('<!--internal--></script>'); --></script>";
+		gold = "hello\n";
+		assertHTMLStripsTo(test, gold, null);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEscapeScript() throws Exception
+	  public virtual void testEscapeScript()
+	  {
+		string test = "one<script no-value-attr>callSomeMethod();</script>two";
+		string gold = "one<script no-value-attr></script>two";
+		ISet<string> escapedTags = new HashSet<string>(Arrays.asList("SCRIPT"));
+		assertHTMLStripsTo(test, gold, escapedTags);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testStyle() throws Exception
+	  public virtual void testStyle()
+	  {
+		string test = "one<style type=\"text/css\">\n" + "<!--\n" + "@import url('http://www.lasletrasdecanciones.com/css.css');\n" + "-->\n" + "</style>two";
+		string gold = "one\ntwo";
+		assertHTMLStripsTo(test, gold, null);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEscapeStyle() throws Exception
+	  public virtual void testEscapeStyle()
+	  {
+		string test = "one<style type=\"text/css\"> body,font,a { font-family:arial; } </style>two";
+		string gold = "one<style type=\"text/css\"></style>two";
+		ISet<string> escapedTags = new HashSet<string>(Arrays.asList("STYLE"));
+		assertHTMLStripsTo(test, gold, escapedTags);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBR() throws Exception
+	  public virtual void testBR()
+	  {
+		string[] testGold = new string[] {"one<BR />two<br>three", "one\ntwo\nthree", "one<BR some stuff here too>two</BR>", "one\ntwo\n"};
+		for (int i = 0 ; i < testGold.Length ; i += 2)
+		{
+		  assertHTMLStripsTo(testGold[i], testGold[i + 1], null);
+		}
+	  }
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEscapeBR() throws Exception
+	  public virtual void testEscapeBR()
+	  {
+		string test = "one<BR class='whatever'>two</\nBR\n>";
+		string gold = "one<BR class='whatever'>two</\nBR\n>";
+		ISet<string> escapedTags = new HashSet<string>(Arrays.asList("BR"));
+		assertHTMLStripsTo(test, gold, escapedTags);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testInlineTagsNoSpace() throws Exception
+	  public virtual void testInlineTagsNoSpace()
+	  {
+		string test = "one<sPAn class=\"invisible\">two<sup>2<sup>e</sup></sup>.</SpaN>three";
+		string gold = "onetwo2e.three";
+		assertHTMLStripsTo(test, gold, null);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testCDATA() throws Exception
+	  public virtual void testCDATA()
+	  {
+		int maxNumElems = 100;
+		string randomHtmlishString1 = TestUtil.randomHtmlishString(random(), maxNumElems).replaceAll(">", " ").replaceFirst("^--","__"); // Don't create a comment (disallow "<!--") and don't include a closing ">"
+		string closedAngleBangNonCDATA = "<!" + randomHtmlishString1 + "-[CDATA[&]]>";
+
+		string randomHtmlishString2 = TestUtil.randomHtmlishString(random(), maxNumElems).replaceAll(">", " ").replaceFirst("^--","__"); // Don't create a comment (disallow "<!--") and don't include a closing ">"
+		string unclosedAngleBangNonCDATA = "<!" + randomHtmlishString1 + "-[CDATA[";
+
+		string[] testGold = new string[] {"one<![CDATA[<one><two>three<four></four></two></one>]]>two", "one<one><two>three<four></four></two></one>two", "one<![CDATA[two<![CDATA[three]]]]><![CDATA[>four]]>five", "onetwo<![CDATA[three]]>fourfive", "<! [CDATA[&]]>", "", "<! [CDATA[&] ] >", "", "<! [CDATA[&]]", "<! [CDATA[&]]", "<!\u2009[CDATA[&]]>", "", "<!\u2009[CDATA[&]\u2009]\u2009>", "", "<!\u2009[CDATA[&]\u2009]\u2009", "<!\u2009[CDATA[&]\u2009]\u2009", closedAngleBangNonCDATA, "", "<![CDATA[", "", "<![CDATA[<br>", "<br>", "<![CDATA[<br>]]", "<br>]]", "<![CDATA[<br>]]>", "<br>", "<![CDATA[<br>] ] >", "<br>] ] >", "<![CDATA[<br>]\u2009]\u2009>", "<br>]\u2009]\u2009>", "<!\u2009[CDATA[", "<!\u2009[CDATA[", unclosedAngleBangNonCDATA, unclosedAngleBangNonCDATA};
+		for (int i = 0 ; i < testGold.Length ; i += 2)
+		{
+		  assertHTMLStripsTo(testGold[i], testGold[i + 1], null);
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUnclosedAngleBang() throws Exception
+	  public virtual void testUnclosedAngleBang()
+	  {
+		assertHTMLStripsTo("<![endif]", "<![endif]", null);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUppercaseCharacterEntityVariants() throws Exception
+	  public virtual void testUppercaseCharacterEntityVariants()
+	  {
+		string test = " &QUOT;-&COPY;&GT;>&LT;<&REG;&AMP;";
+		string gold = " \"-\u00A9>><<\u00AE&";
+		assertHTMLStripsTo(test, gold, null);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMSWordMalformedProcessingInstruction() throws Exception
+	  public virtual void testMSWordMalformedProcessingInstruction()
+	  {
+		string test = "one<?xml:namespace prefix = o ns = \"urn:schemas-microsoft-com:office:office\" />two";
+		string gold = "onetwo";
+		assertHTMLStripsTo(test, gold, null);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSupplementaryCharsInTags() throws Exception
+	  public virtual void testSupplementaryCharsInTags()
+	  {
+		string test = "one<𩬅艱鍟䇹愯瀛>two<瀛愯𩬅>three 瀛愯𩬅</瀛愯𩬅>four</𩬅艱鍟䇹愯瀛>five<𠀀𠀀>six<𠀀𠀀/>seven";
+		string gold = "one\ntwo\nthree 瀛愯𩬅\nfour\nfive\nsix\nseven";
+		assertHTMLStripsTo(test, gold, null);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomBrokenHTML() throws Exception
+	  public virtual void testRandomBrokenHTML()
+	  {
+		int maxNumElements = 10000;
+		string text = TestUtil.randomHtmlishString(random(), maxNumElements);
+		checkAnalysisConsistency(random(), newTestAnalyzer(), random().nextBoolean(), text);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomText() throws Exception
+	  public virtual void testRandomText()
+	  {
+		StringBuilder text = new StringBuilder();
+		int minNumWords = 10;
+		int maxNumWords = 10000;
+		int minWordLength = 3;
+		int maxWordLength = 20;
+		int numWords = TestUtil.Next(random(), minNumWords, maxNumWords);
+		switch (TestUtil.Next(random(), 0, 4))
+		{
+		  case 0:
+		  {
+			for (int wordNum = 0 ; wordNum < numWords ; ++wordNum)
+			{
+			  text.Append(TestUtil.randomUnicodeString(random(), maxWordLength));
+			  text.Append(' ');
+			}
+			break;
+		  }
+		  case 1:
+		  {
+			for (int wordNum = 0 ; wordNum < numWords ; ++wordNum)
+			{
+			  text.Append(TestUtil.randomRealisticUnicodeString(random(), minWordLength, maxWordLength));
+			  text.Append(' ');
+			}
+			break;
+		  }
+		  default:
+		  { // ASCII 50% of the time
+			for (int wordNum = 0 ; wordNum < numWords ; ++wordNum)
+			{
+			  text.Append(TestUtil.randomSimpleString(random()));
+			  text.Append(' ');
+			}
+		  }
+	  break;
+		}
+		Reader reader = new HTMLStripCharFilter(new StringReader(text.ToString()));
+		while (reader.read() != -1);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testUTF16Surrogates() throws Exception
+	  public virtual void testUTF16Surrogates()
+	  {
+		Analyzer analyzer = newTestAnalyzer();
+		// Paired surrogates
+		assertAnalyzesTo(analyzer, " one two &#xD86C;&#XdC01;three", new string[] {"one", "two", "\uD86C\uDC01three"});
+		assertAnalyzesTo(analyzer, " &#55404;&#XdC01;", new string[] {"\uD86C\uDC01"});
+		assertAnalyzesTo(analyzer, " &#xD86C;&#56321;", new string[] {"\uD86C\uDC01"});
+		assertAnalyzesTo(analyzer, " &#55404;&#56321;", new string[] {"\uD86C\uDC01"});
+
+		// Improperly paired surrogates
+		assertAnalyzesTo(analyzer, " &#55404;&#57999;", new string[] {"\uFFFD\uE28F"});
+		assertAnalyzesTo(analyzer, " &#xD86C;&#57999;", new string[] {"\uFFFD\uE28F"});
+		assertAnalyzesTo(analyzer, " &#55002;&#XdC01;", new string[] {"\uD6DA\uFFFD"});
+		assertAnalyzesTo(analyzer, " &#55002;&#56321;", new string[] {"\uD6DA\uFFFD"});
+
+		// Unpaired high surrogates
+		assertAnalyzesTo(analyzer, " &#Xd921;", new string[] {"\uFFFD"});
+		assertAnalyzesTo(analyzer, " &#Xd921", new string[] {"\uFFFD"});
+		assertAnalyzesTo(analyzer, " &#Xd921<br>", new string[] {"&#Xd921"});
+		assertAnalyzesTo(analyzer, " &#55528;", new string[] {"\uFFFD"});
+		assertAnalyzesTo(analyzer, " &#55528", new string[] {"\uFFFD"});
+		assertAnalyzesTo(analyzer, " &#55528<br>", new string[] {"&#55528"});
+
+		// Unpaired low surrogates
+		assertAnalyzesTo(analyzer, " &#xdfdb;", new string[] {"\uFFFD"});
+		assertAnalyzesTo(analyzer, " &#xdfdb", new string[] {"\uFFFD"});
+		assertAnalyzesTo(analyzer, " &#xdfdb<br>", new string[] {"&#xdfdb"});
+		assertAnalyzesTo(analyzer, " &#57209;", new string[] {"\uFFFD"});
+		assertAnalyzesTo(analyzer, " &#57209", new string[] {"\uFFFD"});
+		assertAnalyzesTo(analyzer, " &#57209<br>", new string[] {"&#57209"});
+	  }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static void assertHTMLStripsTo(String input, String gold, java.util.Set<String> escapedTags) throws Exception
+	  public static void assertHTMLStripsTo(string input, string gold, ISet<string> escapedTags)
+	  {
+		assertHTMLStripsTo(new StringReader(input), gold, escapedTags);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static void assertHTMLStripsTo(java.io.Reader input, String gold, java.util.Set<String> escapedTags) throws Exception
+	  public static void assertHTMLStripsTo(Reader input, string gold, ISet<string> escapedTags)
+	  {
+		HTMLStripCharFilter reader;
+		if (null == escapedTags)
+		{
+		  reader = new HTMLStripCharFilter(input);
+		}
+		else
+		{
+		  reader = new HTMLStripCharFilter(input, escapedTags);
+		}
+		int ch = 0;
+		StringBuilder builder = new StringBuilder();
+		try
+		{
+		  while ((ch = reader.read()) != -1)
+		  {
+			builder.Append((char)ch);
+		  }
+		}
+		catch (Exception e)
+		{
+		  if (gold.Equals(builder.ToString()))
+		  {
+			throw e;
+		  }
+		  throw new Exception("('" + builder.ToString() + "' is not equal to '" + gold + "').  " + e.Message, e);
+		}
+		assertEquals("'" + builder.ToString() + "' is not equal to '" + gold + "'", gold, builder.ToString());
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestHTMLStripCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestHTMLStripCharFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestHTMLStripCharFilterFactory.cs
new file mode 100644
index 0000000..08adf1b
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestHTMLStripCharFilterFactory.cs
@@ -0,0 +1,121 @@
+namespace org.apache.lucene.analysis.charfilter
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+	/// <summary>
+	/// Simple tests to ensure this factory is working
+	/// </summary>
+	public class TestHTMLStripCharFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNothingChanged() throws Exception
+	  public virtual void testNothingChanged()
+	  {
+		//                             11111111112
+		//                   012345678901234567890
+		const string text = "this is only a test.";
+		Reader cs = charFilterFactory("HTMLStrip", "escapedTags", "a, Title").create(new StringReader(text));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[] {"this", "is", "only", "a", "test."}, new int[] {0, 5, 8, 13, 15}, new int[] {4, 7, 12, 14, 20});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNoEscapedTags() throws Exception
+	  public virtual void testNoEscapedTags()
+	  {
+		//                             11111111112222222222333333333344
+		//                   012345678901234567890123456789012345678901
+		const string text = "<u>this</u> is <b>only</b> a <I>test</I>.";
+		Reader cs = charFilterFactory("HTMLStrip").create(new StringReader(text));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[] {"this", "is", "only", "a", "test."}, new int[] {3, 12, 18, 27, 32}, new int[] {11, 14, 26, 28, 41});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEscapedTags() throws Exception
+	  public virtual void testEscapedTags()
+	  {
+		//                             11111111112222222222333333333344
+		//                   012345678901234567890123456789012345678901
+		const string text = "<u>this</u> is <b>only</b> a <I>test</I>.";
+		Reader cs = charFilterFactory("HTMLStrip", "escapedTags", "U i").create(new StringReader(text));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[] {"<u>this</u>", "is", "only", "a", "<I>test</I>."}, new int[] {0, 12, 18, 27, 29}, new int[] {11, 14, 26, 28, 41});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSeparatorOnlyEscapedTags() throws Exception
+	  public virtual void testSeparatorOnlyEscapedTags()
+	  {
+		//                             11111111112222222222333333333344
+		//                   012345678901234567890123456789012345678901
+		const string text = "<u>this</u> is <b>only</b> a <I>test</I>.";
+		Reader cs = charFilterFactory("HTMLStrip", "escapedTags", ",, , ").create(new StringReader(text));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[] {"this", "is", "only", "a", "test."}, new int[] {3, 12, 18, 27, 32}, new int[] {11, 14, 26, 28, 41});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyEscapedTags() throws Exception
+	  public virtual void testEmptyEscapedTags()
+	  {
+		//                             11111111112222222222333333333344
+		//                   012345678901234567890123456789012345678901
+		const string text = "<u>this</u> is <b>only</b> a <I>test</I>.";
+		Reader cs = charFilterFactory("HTMLStrip", "escapedTags", "").create(new StringReader(text));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[] {"this", "is", "only", "a", "test."}, new int[] {3, 12, 18, 27, 32}, new int[] {11, 14, 26, 28, 41});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSingleEscapedTag() throws Exception
+	  public virtual void testSingleEscapedTag()
+	  {
+		//                             11111111112222222222333333333344
+		//                   012345678901234567890123456789012345678901
+		const string text = "<u>this</u> is <b>only</b> a <I>test</I>.";
+		Reader cs = charFilterFactory("HTMLStrip", "escapedTags", ", B\r\n\t").create(new StringReader(text));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[] {"this", "is", "<b>only</b>", "a", "test."}, new int[] {3, 12, 15, 27, 32}, new int[] {11, 14, 26, 28, 41});
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  charFilterFactory("HTMLStrip", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilter.cs
new file mode 100644
index 0000000..8b3d5fa
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilter.cs
@@ -0,0 +1,636 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+using System.Text;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.analysis.charfilter
+{
+
+
+	using TestUtil = org.apache.lucene.util.TestUtil;
+	using UnicodeUtil = org.apache.lucene.util.UnicodeUtil;
+
+	public class TestMappingCharFilter : BaseTokenStreamTestCase
+	{
+
+	  internal NormalizeCharMap normMap;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void setUp() throws Exception
+	  public override void setUp()
+	  {
+		base.setUp();
+		NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+
+		builder.add("aa", "a");
+		builder.add("bbb", "b");
+		builder.add("cccc", "cc");
+
+		builder.add("h", "i");
+		builder.add("j", "jj");
+		builder.add("k", "kkk");
+		builder.add("ll", "llll");
+
+		builder.add("empty", "");
+
+		// BMP (surrogate pair):
+		builder.add(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1), "fclef");
+
+		builder.add("\uff01", "full-width-exclamation");
+
+		normMap = builder.build();
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReaderReset() throws Exception
+	  public virtual void testReaderReset()
+	  {
+		CharFilter cs = new MappingCharFilter(normMap, new StringReader("x"));
+		char[] buf = new char[10];
+		int len = cs.read(buf, 0, 10);
+		assertEquals(1, len);
+		assertEquals('x', buf[0]);
+		len = cs.read(buf, 0, 10);
+		assertEquals(-1, len);
+
+		// rewind
+		cs.reset();
+		len = cs.read(buf, 0, 10);
+		assertEquals(1, len);
+		assertEquals('x', buf[0]);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNothingChange() throws Exception
+	  public virtual void testNothingChange()
+	  {
+		CharFilter cs = new MappingCharFilter(normMap, new StringReader("x"));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[]{"x"}, new int[]{0}, new int[]{1}, 1);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test1to1() throws Exception
+	  public virtual void test1to1()
+	  {
+		CharFilter cs = new MappingCharFilter(normMap, new StringReader("h"));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[]{"i"}, new int[]{0}, new int[]{1}, 1);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test1to2() throws Exception
+	  public virtual void test1to2()
+	  {
+		CharFilter cs = new MappingCharFilter(normMap, new StringReader("j"));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[]{"jj"}, new int[]{0}, new int[]{1}, 1);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test1to3() throws Exception
+	  public virtual void test1to3()
+	  {
+		CharFilter cs = new MappingCharFilter(normMap, new StringReader("k"));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[]{"kkk"}, new int[]{0}, new int[]{1}, 1);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test2to4() throws Exception
+	  public virtual void test2to4()
+	  {
+		CharFilter cs = new MappingCharFilter(normMap, new StringReader("ll"));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[]{"llll"}, new int[]{0}, new int[]{2}, 2);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test2to1() throws Exception
+	  public virtual void test2to1()
+	  {
+		CharFilter cs = new MappingCharFilter(normMap, new StringReader("aa"));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[]{"a"}, new int[]{0}, new int[]{2}, 2);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test3to1() throws Exception
+	  public virtual void test3to1()
+	  {
+		CharFilter cs = new MappingCharFilter(normMap, new StringReader("bbb"));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[]{"b"}, new int[]{0}, new int[]{3}, 3);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test4to2() throws Exception
+	  public virtual void test4to2()
+	  {
+		CharFilter cs = new MappingCharFilter(normMap, new StringReader("cccc"));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[]{"cc"}, new int[]{0}, new int[]{4}, 4);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void test5to0() throws Exception
+	  public virtual void test5to0()
+	  {
+		CharFilter cs = new MappingCharFilter(normMap, new StringReader("empty"));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[0], new int[]{}, new int[]{}, 5);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNonBMPChar() throws Exception
+	  public virtual void testNonBMPChar()
+	  {
+		CharFilter cs = new MappingCharFilter(normMap, new StringReader(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1)));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[]{"fclef"}, new int[]{0}, new int[]{2}, 2);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFullWidthChar() throws Exception
+	  public virtual void testFullWidthChar()
+	  {
+		CharFilter cs = new MappingCharFilter(normMap, new StringReader("\uff01"));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[]{"full-width-exclamation"}, new int[]{0}, new int[]{1}, 1);
+	  }
+
+	  //
+	  //                1111111111222
+	  //      01234567890123456789012
+	  //(in)  h i j k ll cccc bbb aa
+	  //
+	  //                1111111111222
+	  //      01234567890123456789012
+	  //(out) i i jj kkk llll cc b a
+	  //
+	  //    h, 0, 1 =>    i, 0, 1
+	  //    i, 2, 3 =>    i, 2, 3
+	  //    j, 4, 5 =>   jj, 4, 5
+	  //    k, 6, 7 =>  kkk, 6, 7
+	  //   ll, 8,10 => llll, 8,10
+	  // cccc,11,15 =>   cc,11,15
+	  //  bbb,16,19 =>    b,16,19
+	  //   aa,20,22 =>    a,20,22
+	  //
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTokenStream() throws Exception
+	  public virtual void testTokenStream()
+	  {
+		string testString = "h i j k ll cccc bbb aa";
+		CharFilter cs = new MappingCharFilter(normMap, new StringReader(testString));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[]{"i","i","jj","kkk","llll","cc","b","a"}, new int[]{0,2,4,6,8,11,16,20}, new int[]{1,3,5,7,10,15,19,22}, testString.Length);
+	  }
+
+	  //
+	  //
+	  //        0123456789
+	  //(in)    aaaa ll h
+	  //(out-1) aa llll i
+	  //(out-2) a llllllll i
+	  //
+	  // aaaa,0,4 => a,0,4
+	  //   ll,5,7 => llllllll,5,7
+	  //    h,8,9 => i,8,9
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testChained() throws Exception
+	  public virtual void testChained()
+	  {
+		string testString = "aaaa ll h";
+		CharFilter cs = new MappingCharFilter(normMap, new MappingCharFilter(normMap, new StringReader(testString)));
+		TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+		assertTokenStreamContents(ts, new string[]{"a","llllllll","i"}, new int[]{0,5,8}, new int[]{4,7,9}, testString.Length);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandom() throws Exception
+	  public virtual void testRandom()
+	  {
+		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
+
+		int numRounds = RANDOM_MULTIPLIER * 10000;
+		checkRandomData(random(), analyzer, numRounds);
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly TestMappingCharFilter outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper(TestMappingCharFilter outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+			return new TokenStreamComponents(tokenizer, tokenizer);
+		  }
+
+		  protected internal override Reader initReader(string fieldName, Reader reader)
+		  {
+			return new MappingCharFilter(outerInstance.normMap, reader);
+		  }
+	  }
+
+	  //@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFinalOffsetSpecialCase() throws Exception
+	  public virtual void testFinalOffsetSpecialCase()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+		NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+		builder.add("t", "");
+		// even though this below rule has no effect, the test passes if you remove it!!
+		builder.add("tmakdbl", "c");
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final NormalizeCharMap map = builder.build();
+		NormalizeCharMap map = builder.build();
+
+		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this, map);
+
+		string text = "gzw f quaxot";
+		checkAnalysisConsistency(random(), analyzer, false, text);
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  private readonly TestMappingCharFilter outerInstance;
+
+		  private NormalizeCharMap map;
+
+		  public AnalyzerAnonymousInnerClassHelper2(TestMappingCharFilter outerInstance, NormalizeCharMap map)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.map = map;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+			return new TokenStreamComponents(tokenizer, tokenizer);
+		  }
+
+		  protected internal override Reader initReader(string fieldName, Reader reader)
+		  {
+			return new MappingCharFilter(map, reader);
+		  }
+	  }
+
+	  //@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomMaps() throws Exception
+	  public virtual void testRandomMaps()
+	  {
+		int numIterations = atLeast(3);
+		for (int i = 0; i < numIterations; i++)
+		{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final NormalizeCharMap map = randomMap();
+		  NormalizeCharMap map = randomMap();
+		  Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper3(this, map);
+		  int numRounds = 100;
+		  checkRandomData(random(), analyzer, numRounds);
+		}
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper3 : Analyzer
+	  {
+		  private readonly TestMappingCharFilter outerInstance;
+
+		  private NormalizeCharMap map;
+
+		  public AnalyzerAnonymousInnerClassHelper3(TestMappingCharFilter outerInstance, NormalizeCharMap map)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.map = map;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+			return new TokenStreamComponents(tokenizer, tokenizer);
+		  }
+
+		  protected internal override Reader initReader(string fieldName, Reader reader)
+		  {
+			return new MappingCharFilter(map, reader);
+		  }
+	  }
+
+	  private NormalizeCharMap randomMap()
+	  {
+		Random random = random();
+		NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+		// we can't add duplicate keys, or NormalizeCharMap gets angry
+		ISet<string> keys = new HashSet<string>();
+		int num = random.Next(5);
+		//System.out.println("NormalizeCharMap=");
+		for (int i = 0; i < num; i++)
+		{
+		  string key = TestUtil.randomSimpleString(random);
+		  if (!keys.Contains(key) && key.Length != 0)
+		  {
+			string value = TestUtil.randomSimpleString(random);
+			builder.add(key, value);
+			keys.Add(key);
+			//System.out.println("mapping: '" + key + "' => '" + value + "'");
+		  }
+		}
+		return builder.build();
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomMaps2() throws Exception
+	  public virtual void testRandomMaps2()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.Random random = random();
+		Random random = random();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int numIterations = atLeast(3);
+		int numIterations = atLeast(3);
+		for (int iter = 0;iter < numIterations;iter++)
+		{
+
+		  if (VERBOSE)
+		  {
+			Console.WriteLine("\nTEST iter=" + iter);
+		  }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char endLetter = (char) org.apache.lucene.util.TestUtil.nextInt(random, 'b', 'z');
+		  char endLetter = (char) TestUtil.Next(random, 'b', 'z');
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.Map<String,String> map = new java.util.HashMap<>();
+		  IDictionary<string, string> map = new Dictionary<string, string>();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+		  NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int numMappings = atLeast(5);
+		  int numMappings = atLeast(5);
+		  if (VERBOSE)
+		  {
+			Console.WriteLine("  mappings:");
+		  }
+		  while (map.Count < numMappings)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String key = org.apache.lucene.util.TestUtil.randomSimpleStringRange(random, 'a', endLetter, 7);
+			string key = TestUtil.randomSimpleStringRange(random, 'a', endLetter, 7);
+			if (key.Length != 0 && !map.ContainsKey(key))
+			{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String value = org.apache.lucene.util.TestUtil.randomSimpleString(random);
+			  string value = TestUtil.randomSimpleString(random);
+			  map[key] = value;
+			  builder.add(key, value);
+			  if (VERBOSE)
+			  {
+				Console.WriteLine("    " + key + " -> " + value);
+			  }
+			}
+		  }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final NormalizeCharMap charMap = builder.build();
+		  NormalizeCharMap charMap = builder.build();
+
+		  if (VERBOSE)
+		  {
+			Console.WriteLine("  test random documents...");
+		  }
+
+		  for (int iter2 = 0;iter2 < 100;iter2++)
+		  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String content = org.apache.lucene.util.TestUtil.randomSimpleStringRange(random, 'a', endLetter, atLeast(1000));
+			string content = TestUtil.randomSimpleStringRange(random, 'a', endLetter, atLeast(1000));
+
+			if (VERBOSE)
+			{
+			  Console.WriteLine("  content=" + content);
+			}
+
+			// Do stupid dog-slow mapping:
+
+			// Output string:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StringBuilder output = new StringBuilder();
+			StringBuilder output = new StringBuilder();
+
+			// Maps output offset to input offset:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.List<Integer> inputOffsets = new java.util.ArrayList<>();
+			IList<int?> inputOffsets = new List<int?>();
+
+			int cumDiff = 0;
+			int charIdx = 0;
+			while (charIdx < content.Length)
+			{
+
+			  int matchLen = -1;
+			  string matchRepl = null;
+
+			  foreach (KeyValuePair<string, string> ent in map.SetOfKeyValuePairs())
+			  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String match = ent.getKey();
+				string match = ent.Key;
+				if (charIdx + match.Length <= content.Length)
+				{
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int limit = charIdx+match.length();
+				  int limit = charIdx + match.Length;
+				  bool matches = true;
+				  for (int charIdx2 = charIdx;charIdx2 < limit;charIdx2++)
+				  {
+					if (match[charIdx2 - charIdx] != content[charIdx2])
+					{
+					  matches = false;
+					  break;
+					}
+				  }
+
+				  if (matches)
+				  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String repl = ent.getValue();
+					string repl = ent.Value;
+					if (match.Length > matchLen)
+					{
+					  // Greedy: longer match wins
+					  matchLen = match.Length;
+					  matchRepl = repl;
+					}
+				  }
+				}
+			  }
+
+			  if (matchLen != -1)
+			  {
+				// We found a match here!
+				if (VERBOSE)
+				{
+				  Console.WriteLine("    match=" + content.Substring(charIdx, matchLen) + " @ off=" + charIdx + " repl=" + matchRepl);
+				}
+				output.Append(matchRepl);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int minLen = Math.min(matchLen, matchRepl.length());
+				int minLen = Math.Min(matchLen, matchRepl.Length);
+
+				// Common part, directly maps back to input
+				// offset:
+				for (int outIdx = 0;outIdx < minLen;outIdx++)
+				{
+				  inputOffsets.Add(output.Length - matchRepl.Length + outIdx + cumDiff);
+				}
+
+				cumDiff += matchLen - matchRepl.Length;
+				charIdx += matchLen;
+
+				if (matchRepl.Length < matchLen)
+				{
+				  // Replacement string is shorter than matched
+				  // input: nothing to do
+				}
+				else if (matchRepl.Length > matchLen)
+				{
+				  // Replacement string is longer than matched
+				  // input: for all the "extra" chars we map
+				  // back to a single input offset:
+				  for (int outIdx = matchLen;outIdx < matchRepl.Length;outIdx++)
+				  {
+					inputOffsets.Add(output.Length + cumDiff - 1);
+				  }
+				}
+				else
+				{
+				  // Same length: no change to offset
+				}
+
+				Debug.Assert(inputOffsets.Count == output.Length, "inputOffsets.size()=" + inputOffsets.Count + " vs output.length()=" + output.Length);
+			  }
+			  else
+			  {
+				inputOffsets.Add(output.Length + cumDiff);
+				output.Append(content[charIdx]);
+				charIdx++;
+			  }
+			}
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String expected = output.toString();
+			string expected = output.ToString();
+			if (VERBOSE)
+			{
+			  Console.Write("    expected:");
+			  for (int charIdx2 = 0;charIdx2 < expected.Length;charIdx2++)
+			  {
+				Console.Write(" " + expected[charIdx2] + "/" + inputOffsets[charIdx2]);
+			  }
+			  Console.WriteLine();
+			}
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final MappingCharFilter mapFilter = new MappingCharFilter(charMap, new java.io.StringReader(content));
+			MappingCharFilter mapFilter = new MappingCharFilter(charMap, new StringReader(content));
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StringBuilder actualBuilder = new StringBuilder();
+			StringBuilder actualBuilder = new StringBuilder();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.List<Integer> actualInputOffsets = new java.util.ArrayList<>();
+			IList<int?> actualInputOffsets = new List<int?>();
+
+			// Now consume the actual mapFilter, somewhat randomly:
+			while (true)
+			{
+			  if (random.nextBoolean())
+			  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch = mapFilter.read();
+				int ch = mapFilter.read();
+				if (ch == -1)
+				{
+				  break;
+				}
+				actualBuilder.Append((char) ch);
+			  }
+			  else
+			  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer = new char[org.apache.lucene.util.TestUtil.nextInt(random, 1, 100)];
+				char[] buffer = new char[TestUtil.Next(random, 1, 100)];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int off = buffer.length == 1 ? 0 : random.nextInt(buffer.length-1);
+				int off = buffer.Length == 1 ? 0 : random.Next(buffer.Length - 1);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int count = mapFilter.read(buffer, off, buffer.length-off);
+				int count = mapFilter.read(buffer, off, buffer.Length - off);
+				if (count == -1)
+				{
+				  break;
+				}
+				else
+				{
+				  actualBuilder.Append(buffer, off, count);
+				}
+			  }
+
+			  if (random.Next(10) == 7)
+			  {
+				// Map offsets
+				while (actualInputOffsets.Count < actualBuilder.Length)
+				{
+				  actualInputOffsets.Add(mapFilter.correctOffset(actualInputOffsets.Count));
+				}
+			  }
+			}
+
+			// Finish mappping offsets
+			while (actualInputOffsets.Count < actualBuilder.Length)
+			{
+			  actualInputOffsets.Add(mapFilter.correctOffset(actualInputOffsets.Count));
+			}
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String actual = actualBuilder.toString();
+			string actual = actualBuilder.ToString();
+
+			// Verify:
+			assertEquals(expected, actual);
+			assertEquals(inputOffsets, actualInputOffsets);
+		  }
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilterFactory.cs
new file mode 100644
index 0000000..078707f
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Charfilter/TestMappingCharFilterFactory.cs
@@ -0,0 +1,82 @@
+namespace org.apache.lucene.analysis.charfilter
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+	using BaseTokenStreamFactoryTestCase = org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+	public class TestMappingCharFilterFactory : BaseTokenStreamFactoryTestCase
+	{
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testParseString() throws Exception
+	  public virtual void testParseString()
+	  {
+
+		MappingCharFilterFactory f = (MappingCharFilterFactory)charFilterFactory("Mapping");
+
+		try
+		{
+		  f.parseString("\\");
+		  fail("escape character cannot be alone.");
+		}
+		catch (System.ArgumentException)
+		{
+		}
+
+		assertEquals("unexpected escaped characters", "\\\"\n\t\r\b\f", f.parseString("\\\\\\\"\\n\\t\\r\\b\\f"));
+		assertEquals("unexpected escaped characters", "A", f.parseString("\\u0041"));
+		assertEquals("unexpected escaped characters", "AB", f.parseString("\\u0041\\u0042"));
+
+		try
+		{
+		  f.parseString("\\u000");
+		  fail("invalid length check.");
+		}
+		catch (System.ArgumentException)
+		{
+		}
+
+		try
+		{
+		  f.parseString("\\u123x");
+		  fail("invalid hex number check.");
+		}
+		catch (System.FormatException)
+		{
+		}
+	  }
+
+	  /// <summary>
+	  /// Test that bogus arguments result in exception </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testBogusArguments() throws Exception
+	  public virtual void testBogusArguments()
+	  {
+		try
+		{
+		  charFilterFactory("Mapping", "bogusArg", "bogusValue");
+		  fail();
+		}
+		catch (System.ArgumentException expected)
+		{
+		  assertTrue(expected.Message.contains("Unknown parameters"));
+		}
+	  }
+	}
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/c64856a7/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKAnalyzer.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKAnalyzer.cs
new file mode 100644
index 0000000..0b2a3b1
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Cjk/TestCJKAnalyzer.cs
@@ -0,0 +1,289 @@
+using System;
+
+namespace org.apache.lucene.analysis.cjk
+{
+
+	/*
+	 * Licensed to the Apache Software Foundation (ASF) under one or more
+	 * contributor license agreements.  See the NOTICE file distributed with
+	 * this work for additional information regarding copyright ownership.
+	 * The ASF licenses this file to You under the Apache License, Version 2.0
+	 * (the "License"); you may not use this file except in compliance with
+	 * the License.  You may obtain a copy of the License at
+	 *
+	 *     http://www.apache.org/licenses/LICENSE-2.0
+	 *
+	 * Unless required by applicable law or agreed to in writing, software
+	 * distributed under the License is distributed on an "AS IS" BASIS,
+	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	 * See the License for the specific language governing permissions and
+	 * limitations under the License.
+	 */
+
+
+	using MappingCharFilter = org.apache.lucene.analysis.charfilter.MappingCharFilter;
+	using NormalizeCharMap = org.apache.lucene.analysis.charfilter.NormalizeCharMap;
+	using KeywordTokenizer = org.apache.lucene.analysis.core.KeywordTokenizer;
+	using StopFilter = org.apache.lucene.analysis.core.StopFilter;
+	using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+	using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+	using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+
+	/// <summary>
+	/// Most tests adopted from TestCJKTokenizer
+	/// </summary>
+	public class TestCJKAnalyzer : BaseTokenStreamTestCase
+	{
+	  private Analyzer analyzer = new CJKAnalyzer(TEST_VERSION_CURRENT);
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testJa1() throws java.io.IOException
+	  public virtual void testJa1()
+	  {
+		assertAnalyzesTo(analyzer, "一二三四五六七八九十", new string[] {"一二", "二三", "三四", "四五", "五六", "六七", "七八", "八九", "九十"}, new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8}, new int[] {2, 3, 4, 5, 6, 7, 8, 9, 10}, new string[] {"<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testJa2() throws java.io.IOException
+	  public virtual void testJa2()
+	  {
+		assertAnalyzesTo(analyzer, "一 二三四 五六七八九 十", new string[] {"一", "二三", "三四", "五六", "六七", "七八", "八九", "十"}, new int[] {0, 2, 3, 6, 7, 8, 9, 12}, new int[] {1, 4, 5, 8, 9, 10, 11, 13}, new string[] {"<SINGLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<SINGLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testC() throws java.io.IOException
+	  public virtual void testC()
+	  {
+		assertAnalyzesTo(analyzer, "abc defgh ijklmn opqrstu vwxy z", new string[] {"abc", "defgh", "ijklmn", "opqrstu", "vwxy", "z"}, new int[] {0, 4, 10, 17, 25, 30}, new int[] {3, 9, 16, 24, 29, 31}, new string[] {"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>"}, new int[] {1, 1, 1, 1, 1, 1});
+	  }
+
+	  /// <summary>
+	  /// LUCENE-2207: wrong offset calculated by end() 
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testFinalOffset() throws java.io.IOException
+	  public virtual void testFinalOffset()
+	  {
+		assertAnalyzesTo(analyzer, "あい", new string[] {"あい"}, new int[] {0}, new int[] {2}, new string[] {"<DOUBLE>"}, new int[] {1});
+
+		assertAnalyzesTo(analyzer, "あい   ", new string[] {"あい"}, new int[] {0}, new int[] {2}, new string[] {"<DOUBLE>"}, new int[] {1});
+
+		assertAnalyzesTo(analyzer, "test", new string[] {"test"}, new int[] {0}, new int[] {4}, new string[] {"<ALPHANUM>"}, new int[] {1});
+
+		assertAnalyzesTo(analyzer, "test   ", new string[] {"test"}, new int[] {0}, new int[] {4}, new string[] {"<ALPHANUM>"}, new int[] {1});
+
+		assertAnalyzesTo(analyzer, "あいtest", new string[] {"あい", "test"}, new int[] {0, 2}, new int[] {2, 6}, new string[] {"<DOUBLE>", "<ALPHANUM>"}, new int[] {1, 1});
+
+		assertAnalyzesTo(analyzer, "testあい    ", new string[] {"test", "あい"}, new int[] {0, 4}, new int[] {4, 6}, new string[] {"<ALPHANUM>", "<DOUBLE>"}, new int[] {1, 1});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMix() throws java.io.IOException
+	  public virtual void testMix()
+	  {
+		assertAnalyzesTo(analyzer, "あいうえおabcかきくけこ", new string[] {"あい", "いう", "うえ", "えお", "abc", "かき", "きく", "くけ", "けこ"}, new int[] {0, 1, 2, 3, 5, 8, 9, 10, 11}, new int[] {2, 3, 4, 5, 8, 10, 11, 12, 13}, new string[] {"<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<ALPHANUM>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testMix2() throws java.io.IOException
+	  public virtual void testMix2()
+	  {
+		assertAnalyzesTo(analyzer, "あいうえおabんcかきくけ こ", new string[] {"あい", "いう", "うえ", "えお", "ab", "ん", "c", "かき", "きく", "くけ", "こ"}, new int[] {0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 14}, new int[] {2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15}, new string[] {"<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<ALPHANUM>", "<SINGLE>", "<ALPHANUM>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<SINGLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
+	  }
+
+	  /// <summary>
+	  /// Non-english text (outside of CJK) is treated normally, according to unicode rules 
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNonIdeographic() throws java.io.IOException
+	  public virtual void testNonIdeographic()
+	  {
+		assertAnalyzesTo(analyzer, "一 روبرت موير", new string[] {"一", "روبرت", "موير"}, new int[] {0, 2, 8}, new int[] {1, 7, 12}, new string[] {"<SINGLE>", "<ALPHANUM>", "<ALPHANUM>"}, new int[] {1, 1, 1});
+	  }
+
+	  /// <summary>
+	  /// Same as the above, except with a nonspacing mark to show correctness.
+	  /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testNonIdeographicNonLetter() throws java.io.IOException
+	  public virtual void testNonIdeographicNonLetter()
+	  {
+		assertAnalyzesTo(analyzer, "一 رُوبرت موير", new string[] {"一", "رُوبرت", "موير"}, new int[] {0, 2, 9}, new int[] {1, 8, 13}, new string[] {"<SINGLE>", "<ALPHANUM>", "<ALPHANUM>"}, new int[] {1, 1, 1});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSurrogates() throws java.io.IOException
+	  public virtual void testSurrogates()
+	  {
+		assertAnalyzesTo(analyzer, "𩬅艱鍟䇹愯瀛", new string[] {"𩬅艱", "艱鍟", "鍟䇹", "䇹愯", "愯瀛"}, new int[] {0, 2, 3, 4, 5}, new int[] {3, 4, 5, 6, 7}, new string[] {"<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>"}, new int[] {1, 1, 1, 1, 1});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testReusableTokenStream() throws java.io.IOException
+	  public virtual void testReusableTokenStream()
+	  {
+		assertAnalyzesTo(analyzer, "あいうえおabcかきくけこ", new string[] {"あい", "いう", "うえ", "えお", "abc", "かき", "きく", "くけ", "けこ"}, new int[] {0, 1, 2, 3, 5, 8, 9, 10, 11}, new int[] {2, 3, 4, 5, 8, 10, 11, 12, 13}, new string[] {"<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<ALPHANUM>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1});
+
+		assertAnalyzesTo(analyzer, "あいうえおabんcかきくけ こ", new string[] {"あい", "いう", "うえ", "えお", "ab", "ん", "c", "かき", "きく", "くけ", "こ"}, new int[] {0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 14}, new int[] {2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15}, new string[] {"<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<ALPHANUM>", "<SINGLE>", "<ALPHANUM>", "<DOUBLE>", "<DOUBLE>", "<DOUBLE>", "<SINGLE>"}, new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSingleChar() throws java.io.IOException
+	  public virtual void testSingleChar()
+	  {
+		assertAnalyzesTo(analyzer, "一", new string[] {"一"}, new int[] {0}, new int[] {1}, new string[] {"<SINGLE>"}, new int[] {1});
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testTokenStream() throws java.io.IOException
+	  public virtual void testTokenStream()
+	  {
+		assertAnalyzesTo(analyzer, "一丁丂", new string[] {"一丁", "丁丂"}, new int[] {0, 1}, new int[] {2, 3}, new string[] {"<DOUBLE>", "<DOUBLE>"}, new int[] {1, 1});
+	  }
+
+	  /// <summary>
+	  /// test that offsets are correct when mappingcharfilter is previously applied </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testChangedOffsets() throws java.io.IOException
+	  public virtual void testChangedOffsets()
+	  {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder builder = new org.apache.lucene.analysis.charfilter.NormalizeCharMap.Builder();
+		NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+		builder.add("a", "一二");
+		builder.add("b", "二三");
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.analysis.charfilter.NormalizeCharMap norm = builder.build();
+		NormalizeCharMap norm = builder.build();
+		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, norm);
+
+		assertAnalyzesTo(analyzer, "ab", new string[] {"一二", "二二", "二三"}, new int[] {0, 0, 1}, new int[] {1, 1, 2});
+
+		// note: offsets are strange since this is how the charfilter maps them... 
+		// before bigramming, the 4 tokens look like:
+		//   { 0, 0, 1, 1 },
+		//   { 0, 1, 1, 2 }
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly TestCJKAnalyzer outerInstance;
+
+		  private NormalizeCharMap norm;
+
+		  public AnalyzerAnonymousInnerClassHelper(TestCJKAnalyzer outerInstance, NormalizeCharMap norm)
+		  {
+			  this.outerInstance = outerInstance;
+			  this.norm = norm;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+			return new TokenStreamComponents(tokenizer, new CJKBigramFilter(tokenizer));
+		  }
+
+		  protected internal override Reader initReader(string fieldName, Reader reader)
+		  {
+			return new MappingCharFilter(norm, reader);
+		  }
+	  }
+
+	  private class FakeStandardTokenizer : TokenFilter
+	  {
+		internal readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
+
+		public FakeStandardTokenizer(TokenStream input) : base(input)
+		{
+		}
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
+		public override bool incrementToken()
+		{
+		  if (input.incrementToken())
+		  {
+			typeAtt.Type = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC];
+			return true;
+		  }
+		  else
+		  {
+			return false;
+		  }
+		}
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testSingleChar2() throws Exception
+	  public virtual void testSingleChar2()
+	  {
+		Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this);
+
+		assertAnalyzesTo(analyzer, "一", new string[] {"一"}, new int[] {0}, new int[] {1}, new string[] {"<SINGLE>"}, new int[] {1});
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper : Analyzer
+	  {
+		  private readonly TestCJKAnalyzer outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper(TestCJKAnalyzer outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+			TokenFilter filter = new FakeStandardTokenizer(tokenizer);
+			filter = new StopFilter(TEST_VERSION_CURRENT, filter, CharArraySet.EMPTY_SET);
+			filter = new CJKBigramFilter(filter);
+			return new TokenStreamComponents(tokenizer, filter);
+		  }
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomStrings() throws Exception
+	  public virtual void testRandomStrings()
+	  {
+		checkRandomData(random(), new CJKAnalyzer(TEST_VERSION_CURRENT), 1000 * RANDOM_MULTIPLIER);
+	  }
+
+	  /// <summary>
+	  /// blast some random strings through the analyzer </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testRandomHugeStrings() throws Exception
+	  public virtual void testRandomHugeStrings()
+	  {
+		Random random = random();
+		checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 100 * RANDOM_MULTIPLIER, 8192);
+	  }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void testEmptyTerm() throws java.io.IOException
+	  public virtual void testEmptyTerm()
+	  {
+		Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this);
+		checkOneTerm(a, "", "");
+	  }
+
+	  private class AnalyzerAnonymousInnerClassHelper2 : Analyzer
+	  {
+		  private readonly TestCJKAnalyzer outerInstance;
+
+		  public AnalyzerAnonymousInnerClassHelper2(TestCJKAnalyzer outerInstance)
+		  {
+			  this.outerInstance = outerInstance;
+		  }
+
+		  protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+		  {
+			Tokenizer tokenizer = new KeywordTokenizer(reader);
+			return new TokenStreamComponents(tokenizer, new CJKBigramFilter(tokenizer));
+		  }
+	  }
+	}
+
+}
\ No newline at end of file


Mime
View raw message