lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aro...@apache.org
Subject svn commit: r534192 [4/19] - in /incubator/lucene.net/trunk/C#: ./ src/ src/Demo/ src/Demo/DeleteFiles/ src/Demo/DemoLib/ src/Demo/DemoLib/HTML/ src/Demo/IndexFiles/ src/Demo/IndexHtml/ src/Demo/SearchFiles/ src/Lucene.Net/ src/Lucene.Net/Analysis/ src...
Date Tue, 01 May 2007 18:45:35 GMT
Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.jj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizer.jj?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.jj (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizer.jj Tue May  1 11:45:26 2007
@@ -1,178 +1,196 @@
-/**f
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-options {
-  STATIC = false;
-//IGNORE_CASE = true;
-//BUILD_PARSER = false;
-  UNICODE_INPUT = true;
-  USER_CHAR_STREAM = true;
-  OPTIMIZE_TOKEN_MANAGER = true;
-//DEBUG_TOKEN_MANAGER = true;
-}
-PARSER_BEGIN(StandardTokenizer)
-
-package org.apache.lucene.analysis.standard;
-
-import java.io.*;
-
-/** A grammar-based tokenizer constructed with JavaCC.
- *
- * <p> This should be a good tokenizer for most European-language documents:
- *
- * <ul>
- *   <li>Splits words at punctuation characters, removing punctuation. However, a 
- *     dot that's not followed by whitespace is considered part of a token.
- *   <li>Splits words at hyphens, unless there's a number in the token, in which case
- *     the whole token is interpreted as a product number and is not split.
- *   <li>Recognizes email addresses and internet hostnames as one token.
- * </ul>
- *
- * <p>Many applications have specific tokenizer needs.  If this tokenizer does
- * not suit your application, please consider copying this source code
- * directory to your project and maintaining your own grammar-based tokenizer.
- */
-public class StandardTokenizer extends org.apache.lucene.analysis.Tokenizer {
-
-  /** Constructs a tokenizer for this Reader. */
-  public StandardTokenizer(Reader reader) {
-    this(new FastCharStream(reader));
-    this.input = reader;
-  }
-}
-
-PARSER_END(StandardTokenizer)
-
-TOKEN : {					  // token patterns
-
-  // basic word: a sequence of digits & letters
-  <ALPHANUM: (<LETTER>|<DIGIT>|<KOREAN>)+ >
-
-  // internal apostrophes: O'Reilly, you're, O'Reilly's
-  // use a post-filter to remove possesives
-| <APOSTROPHE: <ALPHA> ("'" <ALPHA>)+ >
-
-  // acronyms: U.S.A., I.B.M., etc.
-  // use a post-filter to remove dots
-| <ACRONYM: <ALPHA> "." (<ALPHA> ".")+ >
-
-  // company names like AT&T and Excite@Home.
-| <COMPANY: <ALPHA> ("&"|"@") <ALPHA> >
-
-  // email addresses
-| <EMAIL: <ALPHANUM> (("."|"-"|"_") <ALPHANUM>)* "@" <ALPHANUM> (("."|"-") <ALPHANUM>)+ >
-
-  // hostname
-| <HOST: <ALPHANUM> ("." <ALPHANUM>)+ >
-
-  // floating point, serial, model numbers, ip addresses, etc.
-  // every other segment must have at least one digit
-| <NUM: (<ALPHANUM> <P> <HAS_DIGIT>
-       | <HAS_DIGIT> <P> <ALPHANUM>
-       | <ALPHANUM> (<P> <HAS_DIGIT> <P> <ALPHANUM>)+
-       | <HAS_DIGIT> (<P> <ALPHANUM> <P> <HAS_DIGIT>)+
-       | <ALPHANUM> <P> <HAS_DIGIT> (<P> <ALPHANUM> <P> <HAS_DIGIT>)+
-       | <HAS_DIGIT> <P> <ALPHANUM> (<P> <HAS_DIGIT> <P> <ALPHANUM>)+
-        )
-  >
-| <#P: ("_"|"-"|"/"|"."|",") >
-| <#HAS_DIGIT:					  // at least one digit
-    (<LETTER>|<DIGIT>)*
-    <DIGIT>
-    (<LETTER>|<DIGIT>)*
-  >
-
-| < #ALPHA: (<LETTER>)+>
-| < #LETTER:					  // unicode letters
-      [
-       "\u0041"-"\u005a",
-       "\u0061"-"\u007a",
-       "\u00c0"-"\u00d6",
-       "\u00d8"-"\u00f6",
-       "\u00f8"-"\u00ff",
-       "\u0100"-"\u1fff"
-      ]
-  >
-| < CJ:                                          // Chinese, Japanese
-      [
-       "\u3040"-"\u318f",
-       "\u3300"-"\u337f",
-       "\u3400"-"\u3d2d",
-       "\u4e00"-"\u9fff",
-       "\uf900"-"\ufaff"
-      ]
-  >
-| < KOREAN:                                          // Korean
-      [
-       "\uac00"-"\ud7af"
-      ]
-  >
-| < #DIGIT:					  // unicode digits
-      [
-       "\u0030"-"\u0039",
-       "\u0660"-"\u0669",
-       "\u06f0"-"\u06f9",
-       "\u0966"-"\u096f",
-       "\u09e6"-"\u09ef",
-       "\u0a66"-"\u0a6f",
-       "\u0ae6"-"\u0aef",
-       "\u0b66"-"\u0b6f",
-       "\u0be7"-"\u0bef",
-       "\u0c66"-"\u0c6f",
-       "\u0ce6"-"\u0cef",
-       "\u0d66"-"\u0d6f",
-       "\u0e50"-"\u0e59",
-       "\u0ed0"-"\u0ed9",
-       "\u1040"-"\u1049"
-      ]
-  >
-}
-
-SKIP : {					  // skip unrecognized chars
- <NOISE: ~[] >
-}
-
-/** Returns the next token in the stream, or null at EOS.
- * <p>The returned token's type is set to an element of {@link
- * StandardTokenizerConstants#tokenImage}.
- */
-org.apache.lucene.analysis.Token next() throws IOException :
-{
-  Token token = null;
-}
-{
-  ( token = <ALPHANUM> |
-    token = <APOSTROPHE> |
-    token = <ACRONYM> |
-    token = <COMPANY> |
-    token = <EMAIL> |
-    token = <HOST> |
-    token = <NUM> |
-    token = <CJ> |
-    token = <EOF>
-   )
-    {
-      if (token.kind == EOF) {
-	return null;
-      } else {
-	return
-	  new org.apache.lucene.analysis.Token(token.image,
-					token.beginColumn,token.endColumn,
-					tokenImage[token.kind]);
-      }
-    }
-}
+/**f
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+options {
+  STATIC = false;
+//IGNORE_CASE = true;
+//BUILD_PARSER = false;
+  UNICODE_INPUT = true;
+  USER_CHAR_STREAM = true;
+  OPTIMIZE_TOKEN_MANAGER = true;
+//DEBUG_TOKEN_MANAGER = true;
+}
+PARSER_BEGIN(StandardTokenizer)
+
+package Lucene.Net.Analysis.Standard;
+
+import java.io.*;
+
+/** A grammar-based tokenizer constructed with JavaCC.
+ *
+ * <p> This should be a good tokenizer for most European-language documents:
+ *
+ * <ul>
+ *   <li>Splits words at punctuation characters, removing punctuation. However, a 
+ *     dot that's not followed by whitespace is considered part of a token.
+ *   <li>Splits words at hyphens, unless there's a number in the token, in which case
+ *     the whole token is interpreted as a product number and is not split.
+ *   <li>Recognizes email addresses and internet hostnames as one token.
+ * </ul>
+ *
+ * <p>Many applications have specific tokenizer needs.  If this tokenizer does
+ * not suit your application, please consider copying this source code
+ * directory to your project and maintaining your own grammar-based tokenizer.
+ */
+public class StandardTokenizer extends Lucene.Net.Analysis.Tokenizer {
+
+  /** Constructs a tokenizer for this Reader. */
+  public StandardTokenizer(Reader reader) {
+    this(new FastCharStream(reader));
+    this.input = reader;
+  }
+}
+
+PARSER_END(StandardTokenizer)
+
+TOKEN : {					  // token patterns
+
+  // basic word: a sequence of digits & letters
+  <ALPHANUM: (<LETTER>|<DIGIT>|<KOREAN>)+ >
+
+  // internal apostrophes: O'Reilly, you're, O'Reilly's
+  // use a post-filter to remove possesives
+| <APOSTROPHE: <ALPHA> ("'" <ALPHA>)+ >
+
+  // acronyms: U.S.A., I.B.M., etc.
+  // use a post-filter to remove dots
+| <ACRONYM: <ALPHA> "." (<ALPHA> ".")+ >
+
+  // company names like AT&T and Excite@Home.
+| <COMPANY: <ALPHA> ("&"|"@") <ALPHA> >
+
+  // email addresses
+| <EMAIL: <ALPHANUM> (("."|"-"|"_") <ALPHANUM>)* "@" <ALPHANUM> (("."|"-") <ALPHANUM>)+ >
+
+  // hostname
+| <HOST: <ALPHANUM> ("." <ALPHANUM>)+ >
+
+  // floating point, serial, model numbers, ip addresses, etc.
+  // every other segment must have at least one digit
+| <NUM: (<ALPHANUM> <P> <HAS_DIGIT>
+       | <HAS_DIGIT> <P> <ALPHANUM>
+       | <ALPHANUM> (<P> <HAS_DIGIT> <P> <ALPHANUM>)+
+       | <HAS_DIGIT> (<P> <ALPHANUM> <P> <HAS_DIGIT>)+
+       | <ALPHANUM> <P> <HAS_DIGIT> (<P> <ALPHANUM> <P> <HAS_DIGIT>)+
+       | <HAS_DIGIT> <P> <ALPHANUM> (<P> <HAS_DIGIT> <P> <ALPHANUM>)+
+        )
+  >
+| <#P: ("_"|"-"|"/"|"."|",") >
+| <#HAS_DIGIT:					  // at least one digit
+    (<LETTER>|<DIGIT>)*
+    <DIGIT>
+    (<LETTER>|<DIGIT>)*
+  >
+
+| < #ALPHA: (<LETTER>)+>
+| < #LETTER:					  // unicode letters
+      [
+       "\u0041"-"\u005a",
+       "\u0061"-"\u007a",
+       "\u00c0"-"\u00d6",
+       "\u00d8"-"\u00f6",
+       "\u00f8"-"\u00ff",
+       "\u0100"-"\u1fff",
+       "\uffa0"-"\uffdc"
+      ]
+  >
+| < CJ:                                          // Chinese, Japanese
+      [
+       "\u3040"-"\u318f",
+       "\u3100"-"\u312f",    // BaPoMoFo (aka ZhuYin)
+       "\u3040"-"\u309F",    // Japanese: Hiragana
+       "\u30A0"-"\u30FF",    // Japanese: Katakana
+       "\u31F0"-"\u31FF",    // Japanese: Katakana Phonetic Extensions
+       "\u3300"-"\u337f",
+       "\u3400"-"\u4dbf",    // CJK Unified Ideographs Ext. A
+       "\u4e00"-"\u9fff",
+       "\uf900"-"\ufaff",
+       "\uff65"-"\uff9f"
+
+// Otis: consider adding these, too
+//
+// 2E80-2EFF: CJK Radicals Supplement
+// 2F00-2FDF: Kangxi Radicals
+// 3190-319F: Kanbun
+// 31C0-31EF: CJK Strokes
+// 4E00-9FBF: CJK Unified
+// F900-FAFF: CJK Compatibility Ideographs
+
+      ]
+  >
+| < KOREAN:                                          // Korean
+      [
+       "\uac00"-"\ud7af",     // Hangul Syllables
+       "\u1100"-"\u11ff"      // Hangul Jamo
+       // "\uac00"-"\ud7a3"
+      ]
+  >
+| < #DIGIT:					  // unicode digits
+      [
+       "\u0030"-"\u0039",
+       "\u0660"-"\u0669",
+       "\u06f0"-"\u06f9",
+       "\u0966"-"\u096f",
+       "\u09e6"-"\u09ef",
+       "\u0a66"-"\u0a6f",
+       "\u0ae6"-"\u0aef",
+       "\u0b66"-"\u0b6f",
+       "\u0be7"-"\u0bef",
+       "\u0c66"-"\u0c6f",
+       "\u0ce6"-"\u0cef",
+       "\u0d66"-"\u0d6f",
+       "\u0e50"-"\u0e59",
+       "\u0ed0"-"\u0ed9",
+       "\u1040"-"\u1049"
+      ]
+  >
+}
+
+SKIP : {					  // skip unrecognized chars
+ <NOISE: ~[] >
+}
+
+/** Returns the next token in the stream, or null at EOS.
+ * <p>The returned token's type is set to an element of {@link
+ * StandardTokenizerConstants#tokenImage}.
+ */
+Lucene.Net.Analysis.Token next() throws IOException :
+{
+  Token token = null;
+}
+{
+  ( token = <ALPHANUM> |
+    token = <APOSTROPHE> |
+    token = <ACRONYM> |
+    token = <COMPANY> |
+    token = <EMAIL> |
+    token = <HOST> |
+    token = <NUM> |
+    token = <CJ> |
+    token = <EOF>
+   )
+    {
+      if (token.kind == EOF) {
+	return null;
+      } else {
+	return
+	  new Lucene.Net.Analysis.Token(token.image,
+					token.beginColumn,token.endColumn,
+					tokenImage[token.kind]);
+      }
+    }
+}

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerConstants.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizerConstants.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerConstants.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerConstants.cs Tue May  1 11:45:26 2007
@@ -21,25 +21,25 @@
 namespace Lucene.Net.Analysis.Standard
 {
 	
-	public class StandardTokenizerConstants
+    public class StandardTokenizerConstants
     {
-		public const int EOF = 0;
-		public const int ALPHANUM = 1;
-		public const int APOSTROPHE = 2;
-		public const int ACRONYM = 3;
-		public const int COMPANY = 4;
-		public const int EMAIL = 5;
-		public const int HOST = 6;
-		public const int NUM = 7;
-		public const int P = 8;
-		public const int HAS_DIGIT = 9;
-		public const int ALPHA = 10;
-		public const int LETTER = 11;
-		public const int CJ = 12;
-		public const int KOREAN = 13;
-		public const int DIGIT = 14;
-		public const int NOISE = 15;
-		public const int DEFAULT = 0;
-		public static System.String[] tokenImage = new System.String[]{"<EOF>", "<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<P>", "<HAS_DIGIT>", "<ALPHA>", "<LETTER>", "<CJ>", "<KOREAN>", "<DIGIT>", "<NOISE>"};
-	}
+        public const int EOF = 0;
+        public const int ALPHANUM = 1;
+        public const int APOSTROPHE = 2;
+        public const int ACRONYM = 3;
+        public const int COMPANY = 4;
+        public const int EMAIL = 5;
+        public const int HOST = 6;
+        public const int NUM = 7;
+        public const int P = 8;
+        public const int HAS_DIGIT = 9;
+        public const int ALPHA = 10;
+        public const int LETTER = 11;
+        public const int CJ = 12;
+        public const int KOREAN = 13;
+        public const int DIGIT = 14;
+        public const int NOISE = 15;
+        public const int DEFAULT = 0;
+        public static System.String[] tokenImage = new System.String[]{"<EOF>", "<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<P>", "<HAS_DIGIT>", "<ALPHA>", "<LETTER>", "<CJ>", "<KOREAN>", "<DIGIT>", "<NOISE>"};
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerTokenManager.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/StandardTokenizerTokenManager.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerTokenManager.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/StandardTokenizerTokenManager.cs Tue May  1 11:45:26 2007
@@ -30,7 +30,7 @@
             input_stream.Done(); 
         }
 
-		private void  InitBlock()
+        private void  InitBlock()
 		{
 			System.IO.StreamWriter temp_writer;
 			temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding);
@@ -80,23 +80,25 @@
 			JjCheckNAdd(jjnextStates[start]);
 			JjCheckNAdd(jjnextStates[start + 1]);
 		}
-		internal static readonly ulong[] jjbitVec0 = new ulong[]{0x1ff0000000000000L, 0xffffffffffffc000L, 0xffffffffL, 0x600000000000000L};
+		internal static readonly ulong[] jjbitVec0 = new ulong[]{0xfff0000000000000L, 0xffffffffffffdfffL, 0xffffffffL, 0x600000000000000L};
 		internal static readonly ulong[] jjbitVec2 = new ulong[]{0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL};
-		internal static readonly ulong[] jjbitVec3 = new ulong[]{0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffL, 0x0L};
+		internal static readonly ulong[] jjbitVec3 = new ulong[]{0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffL, 0xffff000000000000L};
 		internal static readonly ulong[] jjbitVec4 = new ulong[]{0xffffffffffffffffL, 0xffffffffffffffffL, 0x0L, 0x0L};
-		internal static readonly ulong[] jjbitVec5 = new ulong[]{0x3fffffffffffL, 0x0L, 0x0L, 0x0L};
-		internal static readonly ulong[] jjbitVec6 = new ulong[]{0x0L, 0x0L, 0xfffff00000000000L, 0x7fffffL};
-		internal static readonly ulong[] jjbitVec7 = new ulong[]{0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffL, 0x0L};
-		internal static readonly ulong[] jjbitVec8 = new ulong[]{0xfffffffeL, 0x0L, 0x0L, 0x0L};
-		internal static readonly ulong[] jjbitVec9 = new ulong[]{0x0L, 0x0L, 0x0L, 0xff7fffffff7fffffL};
-		internal static readonly ulong[] jjbitVec10 = new ulong[]{0x1600L, 0x0L, 0x0L, 0x0L};
-		internal static readonly ulong[] jjbitVec11 = new ulong[]{0x0L, 0xffc000000000L, 0x0L, 0xffc000000000L};
-		internal static readonly ulong[] jjbitVec12 = new ulong[]{0x0L, 0x3ff00000000L, 0x0L, 0x3ff000000000000L};
-		internal static readonly ulong[] jjbitVec13 = new ulong[]{0x0L, 0xffc000000000L, 0x0L, 0xff8000000000L};
-		internal static readonly ulong[] jjbitVec14 = new ulong[]{0x0L, 0xffc000000000L, 0x0L, 0x0L};
-		internal static readonly ulong[] jjbitVec15 = new ulong[]{0x0L, 0x3ff0000L, 0x0L, 0x3ff0000L};
-		internal static readonly ulong[] jjbitVec16 = new ulong[]{0x0L, 0x3ffL, 0x0L, 0x0L};
-		internal static readonly ulong[] jjbitVec17 = new ulong[]{0xfffffffeL, 0x0L, 0xfffff00000000000L, 0x7fffffL};
+		internal static readonly ulong[] jjbitVec5 = new ulong[]{0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0x0L};
+		internal static readonly ulong[] jjbitVec6 = new ulong[]{0x0L, 0xffffffe000000000L, 0xffffffffL, 0x0L};
+		internal static readonly ulong[] jjbitVec7 = new ulong[]{0x20000L, 0x0L, 0xfffff00000000000L, 0x7fffffL};
+		internal static readonly ulong[] jjbitVec8 = new ulong[]{0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffL, 0x0L};
+		internal static readonly ulong[] jjbitVec9 = new ulong[]{0xfffffffeL, 0x0L, 0x0L, 0x0L};
+		internal static readonly ulong[] jjbitVec10 = new ulong[]{0x0L, 0x0L, 0x0L, 0xff7fffffff7fffffL};
+		internal static readonly ulong[] jjbitVec11 = new ulong[]{0x0L, 0x0L, 0xffffffff00000000L, 0x1fffffffL};
+		internal static readonly ulong[] jjbitVec12 = new ulong[]{0x1600L, 0x0L, 0x0L, 0x0L};
+		internal static readonly ulong[] jjbitVec13 = new ulong[]{0x0L, 0xffc000000000L, 0x0L, 0xffc000000000L};
+		internal static readonly ulong[] jjbitVec14 = new ulong[]{0x0L, 0x3ff00000000L, 0x0L, 0x3ff000000000000L};
+		internal static readonly ulong[] jjbitVec15 = new ulong[]{0x0L, 0xffc000000000L, 0x0L, 0xff8000000000L};
+		internal static readonly ulong[] jjbitVec16 = new ulong[]{0x0L, 0xffc000000000L, 0x0L, 0x0L};
+		internal static readonly ulong[] jjbitVec17 = new ulong[]{0x0L, 0x3ff0000L, 0x0L, 0x3ff0000L};
+		internal static readonly ulong[] jjbitVec18 = new ulong[]{0x0L, 0x3ffL, 0x0L, 0x0L};
+		internal static readonly ulong[] jjbitVec19 = new ulong[]{0xfffffffeL, 0x0L, 0xfffff00000000000L, 0x7fffffL};
 		private int JjMoveNfa_0(int startState, int curPos)
 		{
 			int[] nextStates;
@@ -111,7 +113,7 @@
 					ReInitRounds();
 				if (curChar < 64)
 				{
-                    ulong l = ((ulong) 1L) << curChar;
+					ulong l = ((ulong) 1L) << curChar;
 MatchLoop: 
 					do 
 					{
@@ -1206,9 +1208,12 @@
 				case 51: 
 					return ((jjbitVec4[i2] & l2) != (ulong) 0L);
 				
-				case 61: 
+				case 77: 
 					return ((jjbitVec5[i2] & l2) != (ulong) 0L);
 				
+				case 255: 
+					return ((jjbitVec6[i2] & l2) != (ulong) 0L);
+				
 				default: 
 					if ((jjbitVec0[i1] & l1) != (ulong) 0L)
 						return true;
@@ -1222,10 +1227,10 @@
 			{
 				
 				case 215: 
-					return ((jjbitVec7[i2] & l2) != (ulong) 0L);
+					return ((jjbitVec8[i2] & l2) != (ulong) 0L);
 				
 				default: 
-					if ((jjbitVec6[i1] & l1) != (ulong) 0L)
+					if ((jjbitVec7[i1] & l1) != (ulong) 0L)
 						return true;
 					return false;
 				
@@ -1237,10 +1242,13 @@
 			{
 				
 				case 0: 
-					return ((jjbitVec9[i2] & l2) != (ulong) 0L);
+					return ((jjbitVec10[i2] & l2) != (ulong) 0L);
+				
+				case 255: 
+					return ((jjbitVec11[i2] & l2) != (ulong) 0L);
 				
 				default: 
-					if ((jjbitVec8[i1] & l1) != (ulong) 0L)
+					if ((jjbitVec9[i1] & l1) != (ulong) 0L)
 						return true;
 					return false;
 				
@@ -1252,23 +1260,23 @@
 			{
 				
 				case 6: 
-					return ((jjbitVec12[i2] & l2) != (ulong) 0L);
+					return ((jjbitVec14[i2] & l2) != (ulong) 0L);
 				
 				case 11: 
-					return ((jjbitVec13[i2] & l2) != (ulong) 0L);
+					return ((jjbitVec15[i2] & l2) != (ulong) 0L);
 				
 				case 13: 
-					return ((jjbitVec14[i2] & l2) != (ulong) 0L);
+					return ((jjbitVec16[i2] & l2) != (ulong) 0L);
 				
 				case 14: 
-					return ((jjbitVec15[i2] & l2) != (ulong) 0L);
+					return ((jjbitVec17[i2] & l2) != (ulong) 0L);
 				
 				case 16: 
-					return ((jjbitVec16[i2] & l2) != (ulong) 0L);
+					return ((jjbitVec18[i2] & l2) != (ulong) 0L);
 				
 				default: 
-					if ((jjbitVec10[i1] & l1) != (ulong) 0L)
-						if ((jjbitVec11[i2] & l2) == (ulong) 0L)
+					if ((jjbitVec12[i1] & l1) != (ulong) 0L)
+						if ((jjbitVec13[i2] & l2) == (ulong) 0L)
 							return false;
 						else
 							return true;
@@ -1282,13 +1290,16 @@
 			{
 				
 				case 0: 
-					return ((jjbitVec9[i2] & l2) != (ulong) 0L);
+					return ((jjbitVec10[i2] & l2) != (ulong) 0L);
 				
 				case 215: 
-					return ((jjbitVec7[i2] & l2) != (ulong) 0L);
+					return ((jjbitVec8[i2] & l2) != (ulong) 0L);
+				
+				case 255: 
+					return ((jjbitVec11[i2] & l2) != (ulong) 0L);
 				
 				default: 
-					if ((jjbitVec17[i1] & l1) != (ulong) 0L)
+					if ((jjbitVec19[i1] & l1) != (ulong) 0L)
 						return true;
 					return false;
 				

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/Token.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/Token.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/Token.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/Token.cs Tue May  1 11:45:26 2007
@@ -21,73 +21,73 @@
 namespace Lucene.Net.Analysis.Standard
 {
 	
-	/// <summary> Describes the input token stream.</summary>
+    /// <summary> Describes the input token stream.</summary>
 	
-	public class Token
-	{
+    public class Token
+    {
 		
-		/// <summary> An integer that describes the kind of this token.  This numbering
-		/// system is determined by JavaCCParser, and a table of these numbers is
-		/// stored in the file ...Constants.java.
-		/// </summary>
-		public int kind;
-		
-		/// <summary> beginLine and beginColumn describe the position of the first character
-		/// of this token; endLine and endColumn describe the position of the
-		/// last character of this token.
-		/// </summary>
-		public int beginLine, beginColumn, endLine, endColumn;
-		
-		/// <summary> The string image of the token.</summary>
-		public System.String image;
-		
-		/// <summary> A reference to the next regular (non-special) token from the input
-		/// stream.  If this is the last token from the input stream, or if the
-		/// token manager has not read tokens beyond this one, this field is
-		/// set to null.  This is true only if this token is also a regular
-		/// token.  Otherwise, see below for a description of the contents of
-		/// this field.
-		/// </summary>
-		public Token next;
-		
-		/// <summary> This field is used to access special tokens that occur prior to this
-		/// token, but after the immediately preceding regular (non-special) token.
-		/// If there are no such special tokens, this field is set to null.
-		/// When there are more than one such special token, this field refers
-		/// to the last of these special tokens, which in turn refers to the next
-		/// previous special token through its specialToken field, and so on
-		/// until the first special token (whose specialToken field is null).
-		/// The next fields of special tokens refer to other special tokens that
-		/// immediately follow it (without an intervening regular token).  If there
-		/// is no such token, this field is null.
-		/// </summary>
-		public Token specialToken;
-		
-		/// <summary> Returns the image.</summary>
-		public override System.String ToString()
-		{
-			return image;
-		}
-		
-		/// <summary> Returns a new Token object, by default. However, if you want, you
-		/// can create and return subclass objects based on the value of ofKind.
-		/// Simply add the cases to the switch for all those special cases.
-		/// For example, if you have a subclass of Token called IDToken that
-		/// you want to create if ofKind is ID, simlpy add something like :
-		/// 
-		/// case MyParserConstants.ID : return new IDToken();
-		/// 
-		/// to the following switch statement. Then you can cast matchedToken
-		/// variable to the appropriate type and use it in your lexical actions.
-		/// </summary>
-		public static Token NewToken(int ofKind)
-		{
-			switch (ofKind)
-			{
+        /// <summary> An integer that describes the kind of this token.  This numbering
+        /// system is determined by JavaCCParser, and a table of these numbers is
+        /// stored in the file ...Constants.java.
+        /// </summary>
+        public int kind;
+		
+        /// <summary> beginLine and beginColumn describe the position of the first character
+        /// of this token; endLine and endColumn describe the position of the
+        /// last character of this token.
+        /// </summary>
+        public int beginLine, beginColumn, endLine, endColumn;
+		
+        /// <summary> The string image of the token.</summary>
+        public System.String image;
+		
+        /// <summary> A reference to the next regular (non-special) token from the input
+        /// stream.  If this is the last token from the input stream, or if the
+        /// token manager has not read tokens beyond this one, this field is
+        /// set to null.  This is true only if this token is also a regular
+        /// token.  Otherwise, see below for a description of the contents of
+        /// this field.
+        /// </summary>
+        public Token next;
+		
+        /// <summary> This field is used to access special tokens that occur prior to this
+        /// token, but after the immediately preceding regular (non-special) token.
+        /// If there are no such special tokens, this field is set to null.
+        /// When there are more than one such special token, this field refers
+        /// to the last of these special tokens, which in turn refers to the next
+        /// previous special token through its specialToken field, and so on
+        /// until the first special token (whose specialToken field is null).
+        /// The next fields of special tokens refer to other special tokens that
+        /// immediately follow it (without an intervening regular token).  If there
+        /// is no such token, this field is null.
+        /// </summary>
+        public Token specialToken;
+		
+        /// <summary> Returns the image.</summary>
+        public override System.String ToString()
+        {
+            return image;
+        }
+		
+        /// <summary> Returns a new Token object, by default. However, if you want, you
+        /// can create and return subclass objects based on the value of ofKind.
+        /// Simply add the cases to the switch for all those special cases.
+        /// For example, if you have a subclass of Token called IDToken that
+        /// you want to create if ofKind is ID, simlpy add something like :
+        /// 
+        /// case MyParserConstants.ID : return new IDToken();
+        /// 
+        /// to the following switch statement. Then you can cast matchedToken
+        /// variable to the appropriate type and use it in your lexical actions.
+        /// </summary>
+        public static Token NewToken(int ofKind)
+        {
+            switch (ofKind)
+            {
 				
-				default:  return new Token();
+                default:  return new Token();
 				
-			}
-		}
-	}
+            }
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/TokenMgrError.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Standard/TokenMgrError.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/TokenMgrError.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Standard/TokenMgrError.cs Tue May  1 11:45:26 2007
@@ -21,141 +21,141 @@
 namespace Lucene.Net.Analysis.Standard
 {
 	
-	[Serializable]
-	public class TokenMgrError:System.ApplicationException
-	{
-		/// <summary> You can also modify the body of this method to customize your error messages.
-		/// For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
-		/// of end-users concern, so you can return something like : 
-		/// 
-		/// "Internal Error : Please file a bug report .... "
-		/// 
-		/// from this method for such cases in the release version of your parser.
-		/// </summary>
-		public override System.String Message
-		{
-			get
-			{
-				return base.Message;
-			}
+    [Serializable]
+    public class TokenMgrError:System.ApplicationException
+    {
+        /// <summary> You can also modify the body of this method to customize your error messages.
+        /// For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
+        /// of end-users concern, so you can return something like : 
+        /// 
+        /// "Internal Error : Please file a bug report .... "
+        /// 
+        /// from this method for such cases in the release version of your parser.
+        /// </summary>
+        public override System.String Message
+        {
+            get
+            {
+                return base.Message;
+            }
 			
-		}
-		/*
-		* Ordinals for various reasons why an Error of this type can be thrown.
-		*/
-		
-		/// <summary> Lexical error occured.</summary>
-		internal const int LEXICAL_ERROR = 0;
-		
-		/// <summary> An attempt wass made to create a second instance of a static token manager.</summary>
-		internal const int STATIC_LEXER_ERROR = 1;
-		
-		/// <summary> Tried to change to an invalid lexical state.</summary>
-		internal const int INVALID_LEXICAL_STATE = 2;
-		
-		/// <summary> Detected (and bailed out of) an infinite loop in the token manager.</summary>
-		internal const int LOOP_DETECTED = 3;
-		
-		/// <summary> Indicates the reason why the exception is thrown. It will have
-		/// one of the above 4 values.
-		/// </summary>
-		internal int errorCode;
-		
-		/// <summary> Replaces unprintable characters by their espaced (or unicode escaped)
-		/// equivalents in the given string
-		/// </summary>
-		protected internal static System.String addEscapes(System.String str)
-		{
-			System.Text.StringBuilder retval = new System.Text.StringBuilder();
-			char ch;
-			for (int i = 0; i < str.Length; i++)
-			{
-				switch (str[i])
-				{
-					
-					case (char) (0): 
-						continue;
-					
-					case '\b': 
-						retval.Append("\\b");
-						continue;
-					
-					case '\t': 
-						retval.Append("\\t");
-						continue;
-					
-					case '\n': 
-						retval.Append("\\n");
-						continue;
-					
-					case '\f': 
-						retval.Append("\\f");
-						continue;
-					
-					case '\r': 
-						retval.Append("\\r");
-						continue;
-					
-					case '\"': 
-						retval.Append("\\\"");
-						continue;
-					
-					case '\'': 
-						retval.Append("\\\'");
-						continue;
-					
-					case '\\': 
-						retval.Append("\\\\");
-						continue;
-					
-					default: 
-						if ((ch = str[i]) < 0x20 || ch > 0x7e)
-						{
-							System.String s = "0000" + System.Convert.ToString(ch, 16);
-							retval.Append("\\u" + s.Substring(s.Length - 4, (s.Length) - (s.Length - 4)));
-						}
-						else
-						{
-							retval.Append(ch);
-						}
-						continue;
-					
-				}
-			}
-			return retval.ToString();
-		}
-		
-		/// <summary> Returns a detailed message for the Error when it is thrown by the
-		/// token manager to indicate a lexical error.
-		/// Parameters : 
-		/// EOFSeen     : indicates if EOF caused the lexicl error
-		/// curLexState : lexical state in which this error occured
-		/// errorLine   : line number when the error occured
-		/// errorColumn : column number when the error occured
-		/// errorAfter  : prefix that was seen before this error occured
-		/// curchar     : the offending character
-		/// Note: You can customize the lexical error message by modifying this method.
-		/// </summary>
-		protected internal static System.String LexicalError(bool EOFSeen, int lexState, int errorLine, int errorColumn, System.String errorAfter, char curChar)
-		{
-			return ("Lexical error at line " + errorLine + ", column " + errorColumn + ".  Encountered: " + (EOFSeen?"<EOF> ":("\"" + addEscapes(System.Convert.ToString(curChar)) + "\"") + " (" + (int) curChar + "), ") + "after : \"" + addEscapes(errorAfter) + "\"");
-		}
-		
-		/*
-		* Constructors of various flavors follow.
-		*/
-		
-		public TokenMgrError()
-		{
-		}
-		
-		public TokenMgrError(System.String message, int reason):base(message)
-		{
-			errorCode = reason;
-		}
-		
-		public TokenMgrError(bool EOFSeen, int lexState, int errorLine, int errorColumn, System.String errorAfter, char curChar, int reason):this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason)
-		{
-		}
-	}
+        }
+        /*
+        * Ordinals for various reasons why an Error of this type can be thrown.
+        */
+		
+        /// <summary> Lexical error occured.</summary>
+        internal const int LEXICAL_ERROR = 0;
+		
+        /// <summary> An attempt wass made to create a second instance of a static token manager.</summary>
+        internal const int STATIC_LEXER_ERROR = 1;
+		
+        /// <summary> Tried to change to an invalid lexical state.</summary>
+        internal const int INVALID_LEXICAL_STATE = 2;
+		
+        /// <summary> Detected (and bailed out of) an infinite loop in the token manager.</summary>
+        internal const int LOOP_DETECTED = 3;
+		
+        /// <summary> Indicates the reason why the exception is thrown. It will have
+        /// one of the above 4 values.
+        /// </summary>
+        internal int errorCode;
+		
+        /// <summary> Replaces unprintable characters by their espaced (or unicode escaped)
+        /// equivalents in the given string
+        /// </summary>
+        protected internal static System.String addEscapes(System.String str)
+        {
+            System.Text.StringBuilder retval = new System.Text.StringBuilder();
+            char ch;
+            for (int i = 0; i < str.Length; i++)
+            {
+                switch (str[i])
+                {
+					
+                    case (char) (0): 
+                        continue;
+					
+                    case '\b': 
+                        retval.Append("\\b");
+                        continue;
+					
+                    case '\t': 
+                        retval.Append("\\t");
+                        continue;
+					
+                    case '\n': 
+                        retval.Append("\\n");
+                        continue;
+					
+                    case '\f': 
+                        retval.Append("\\f");
+                        continue;
+					
+                    case '\r': 
+                        retval.Append("\\r");
+                        continue;
+					
+                    case '\"': 
+                        retval.Append("\\\"");
+                        continue;
+					
+                    case '\'': 
+                        retval.Append("\\\'");
+                        continue;
+					
+                    case '\\': 
+                        retval.Append("\\\\");
+                        continue;
+					
+                    default: 
+                        if ((ch = str[i]) < 0x20 || ch > 0x7e)
+                        {
+                            System.String s = "0000" + System.Convert.ToString(ch, 16);
+                            retval.Append("\\u" + s.Substring(s.Length - 4, (s.Length) - (s.Length - 4)));
+                        }
+                        else
+                        {
+                            retval.Append(ch);
+                        }
+                        continue;
+					
+                }
+            }
+            return retval.ToString();
+        }
+		
+        /// <summary> Returns a detailed message for the Error when it is thrown by the
+        /// token manager to indicate a lexical error.
+        /// Parameters : 
+        /// EOFSeen     : indicates if EOF caused the lexicl error
+        /// curLexState : lexical state in which this error occured
+        /// errorLine   : line number when the error occured
+        /// errorColumn : column number when the error occured
+        /// errorAfter  : prefix that was seen before this error occured
+        /// curchar     : the offending character
+        /// Note: You can customize the lexical error message by modifying this method.
+        /// </summary>
+        protected internal static System.String LexicalError(bool EOFSeen, int lexState, int errorLine, int errorColumn, System.String errorAfter, char curChar)
+        {
+            return ("Lexical error at line " + errorLine + ", column " + errorColumn + ".  Encountered: " + (EOFSeen?"<EOF> ":("\"" + addEscapes(System.Convert.ToString(curChar)) + "\"") + " (" + (int) curChar + "), ") + "after : \"" + addEscapes(errorAfter) + "\"");
+        }
+		
+        /*
+        * Constructors of various flavors follow.
+        */
+		
+        public TokenMgrError()
+        {
+        }
+		
+        public TokenMgrError(System.String message, int reason):base(message)
+        {
+            errorCode = reason;
+        }
+		
+        public TokenMgrError(bool EOFSeen, int lexState, int errorLine, int errorColumn, System.String errorAfter, char curChar, int reason):this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason)
+        {
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/StopAnalyzer.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopAnalyzer.cs Tue May  1 11:45:26 2007
@@ -20,55 +20,55 @@
 namespace Lucene.Net.Analysis
 {
 	
-	/// <summary>Filters LetterTokenizer with LowerCaseFilter and StopFilter. </summary>
+    /// <summary>Filters LetterTokenizer with LowerCaseFilter and StopFilter. </summary>
 	
-	public sealed class StopAnalyzer : Analyzer
-	{
-		private System.Collections.Hashtable stopWords;
-		
-		/// <summary>An array containing some common English words that are not usually useful
-		/// for searching. 
-		/// </summary>
-		public static readonly System.String[] ENGLISH_STOP_WORDS = new System.String[]{"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "s", "such", "t", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"};
-		
-		/// <summary>Builds an analyzer which removes words in ENGLISH_STOP_WORDS. </summary>
-		public StopAnalyzer()
-		{
-			stopWords = StopFilter.MakeStopSet(ENGLISH_STOP_WORDS);
-		}
-		
-		/// <summary>Builds an analyzer with the stop words from the given set.</summary>
-		public StopAnalyzer(System.Collections.Hashtable stopWords)
-		{
-			this.stopWords = stopWords;
-		}
-		
-		/// <summary>Builds an analyzer which removes words in the provided array. </summary>
-		public StopAnalyzer(System.String[] stopWords)
-		{
-			this.stopWords = StopFilter.MakeStopSet(stopWords);
-		}
-		
-		/// <summary>Builds an analyzer with the stop words from the given file.</summary>
-		/// <seealso cref="WordlistLoader.GetWordSet(File)">
-		/// </seealso>
-		public StopAnalyzer(System.IO.FileInfo stopwordsFile)
-		{
-			stopWords = WordlistLoader.GetWordSet(stopwordsFile);
-		}
-		
-		/// <summary>Builds an analyzer with the stop words from the given reader.</summary>
-		/// <seealso cref="WordlistLoader.GetWordSet(Reader)">
-		/// </seealso>
-		public StopAnalyzer(System.IO.TextReader stopwords)
-		{
-			stopWords = WordlistLoader.GetWordSet(stopwords);
-		}
-		
-		/// <summary>Filters LowerCaseTokenizer with StopFilter. </summary>
-		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
-		{
-			return new StopFilter(new LowerCaseTokenizer(reader), stopWords);
-		}
-	}
+    public sealed class StopAnalyzer : Analyzer
+    {
+        private System.Collections.Hashtable stopWords;
+		
+        /// <summary>An array containing some common English words that are not usually useful
+        /// for searching. 
+        /// </summary>
+        public static readonly System.String[] ENGLISH_STOP_WORDS = new System.String[]{"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "t", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"};
+		
+        /// <summary>Builds an analyzer which removes words in ENGLISH_STOP_WORDS. </summary>
+        public StopAnalyzer()
+        {
+            stopWords = StopFilter.MakeStopSet(ENGLISH_STOP_WORDS);
+        }
+		
+        /// <summary>Builds an analyzer with the stop words from the given set.</summary>
+        public StopAnalyzer(System.Collections.Hashtable stopWords)
+        {
+            this.stopWords = stopWords;
+        }
+		
+        /// <summary>Builds an analyzer which removes words in the provided array. </summary>
+        public StopAnalyzer(System.String[] stopWords)
+        {
+            this.stopWords = StopFilter.MakeStopSet(stopWords);
+        }
+		
+        /// <summary>Builds an analyzer with the stop words from the given file.</summary>
+        /// <seealso cref="WordlistLoader.GetWordSet(File)">
+        /// </seealso>
+        public StopAnalyzer(System.IO.FileInfo stopwordsFile)
+        {
+            stopWords = WordlistLoader.GetWordSet(stopwordsFile);
+        }
+		
+        /// <summary>Builds an analyzer with the stop words from the given reader.</summary>
+        /// <seealso cref="WordlistLoader.GetWordSet(Reader)">
+        /// </seealso>
+        public StopAnalyzer(System.IO.TextReader stopwords)
+        {
+            stopWords = WordlistLoader.GetWordSet(stopwords);
+        }
+		
+        /// <summary>Filters LowerCaseTokenizer with StopFilter. </summary>
+        public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+        {
+            return new StopFilter(new LowerCaseTokenizer(reader), stopWords);
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/StopFilter.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/StopFilter.cs Tue May  1 11:45:26 2007
@@ -20,97 +20,97 @@
 namespace Lucene.Net.Analysis
 {
 	
-	/// <summary> Removes stop words from a token stream.</summary>
+    /// <summary> Removes stop words from a token stream.</summary>
 	
-	public sealed class StopFilter : TokenFilter
-	{
+    public sealed class StopFilter : TokenFilter
+    {
 		
-		private System.Collections.Hashtable stopWords;
-		private bool ignoreCase;
+        private System.Collections.Hashtable stopWords;
+        private bool ignoreCase;
 		
-		/// <summary> Construct a token stream filtering the given input.</summary>
-		public StopFilter(TokenStream input, System.String[] stopWords) : this(input, stopWords, false)
-		{
-		}
-		
-		/// <summary> Constructs a filter which removes words from the input
-		/// TokenStream that are named in the array of words.
-		/// </summary>
-		public StopFilter(TokenStream in_Renamed, System.String[] stopWords, bool ignoreCase) : base(in_Renamed)
-		{
-			this.ignoreCase = ignoreCase;
-			this.stopWords = MakeStopSet(stopWords, ignoreCase);
-		}
+        /// <summary> Construct a token stream filtering the given input.</summary>
+        public StopFilter(TokenStream input, System.String[] stopWords) : this(input, stopWords, false)
+        {
+        }
+		
+        /// <summary> Constructs a filter which removes words from the input
+        /// TokenStream that are named in the array of words.
+        /// </summary>
+        public StopFilter(TokenStream in_Renamed, System.String[] stopWords, bool ignoreCase) : base(in_Renamed)
+        {
+            this.ignoreCase = ignoreCase;
+            this.stopWords = MakeStopSet(stopWords, ignoreCase);
+        }
 		
 
-		/// <summary> Construct a token stream filtering the given input.</summary>
-		/// <param name="input">
-		/// </param>
-		/// <param name="stopWords">The set of Stop Words, as Strings.  If ignoreCase is true, all strings should be lower cased
-		/// </param>
-		/// <param name="ignoreCase">-Ignore case when stopping.  The stopWords set must be setup to contain only lower case words 
-		/// </param>
-		public StopFilter(TokenStream input, System.Collections.Hashtable stopWords, bool ignoreCase) : base(input)
-		{
-			this.ignoreCase = ignoreCase;
-			this.stopWords = stopWords;
-		}
-		
-		/// <summary> Constructs a filter which removes words from the input
-		/// TokenStream that are named in the Set.
-		/// It is crucial that an efficient Set implementation is used
-		/// for maximum performance.
-		/// 
-		/// </summary>
-		/// <seealso cref="MakeStopSet(String[])">
-		/// </seealso>
-		public StopFilter(TokenStream in_Renamed, System.Collections.Hashtable stopWords) : this(in_Renamed, stopWords, false)
-		{
-		}
-		
-		/// <summary> Builds a Set from an array of stop words,
-		/// appropriate for passing into the StopFilter constructor.
-		/// This permits this stopWords construction to be cached once when
-		/// an Analyzer is constructed.
-		/// 
-		/// </summary>
-		/// <seealso cref="MakeStopSet(String[], boolean) passing false to ignoreCase">
-		/// </seealso>
-		public static System.Collections.Hashtable MakeStopSet(System.String[] stopWords)
-		{
-			return MakeStopSet(stopWords, false);
-		}
-		
-		/// <summary> </summary>
-		/// <param name="stopWords">
-		/// </param>
-		/// <param name="ignoreCase">If true, all words are lower cased first.  
-		/// </param>
-		/// <returns> a Set containing the words
-		/// </returns>
-		public static System.Collections.Hashtable MakeStopSet(System.String[] stopWords, bool ignoreCase)
-		{
-			System.Collections.Hashtable stopTable = new System.Collections.Hashtable(stopWords.Length);
+        /// <summary> Construct a token stream filtering the given input.</summary>
+        /// <param name="input">
+        /// </param>
+        /// <param name="stopWords">The set of Stop Words, as Strings.  If ignoreCase is true, all strings should be lower cased
+        /// </param>
+        /// <param name="ignoreCase">-Ignore case when stopping.  The stopWords set must be setup to contain only lower case words 
+        /// </param>
+        public StopFilter(TokenStream input, System.Collections.Hashtable stopWords, bool ignoreCase) : base(input)
+        {
+            this.ignoreCase = ignoreCase;
+            this.stopWords = stopWords;
+        }
+		
+        /// <summary> Constructs a filter which removes words from the input
+        /// TokenStream that are named in the Set.
+        /// It is crucial that an efficient Set implementation is used
+        /// for maximum performance.
+        /// 
+        /// </summary>
+        /// <seealso cref="MakeStopSet(String[])">
+        /// </seealso>
+        public StopFilter(TokenStream in_Renamed, System.Collections.Hashtable stopWords) : this(in_Renamed, stopWords, false)
+        {
+        }
+		
+        /// <summary> Builds a Set from an array of stop words,
+        /// appropriate for passing into the StopFilter constructor.
+        /// This permits this stopWords construction to be cached once when
+        /// an Analyzer is constructed.
+        /// 
+        /// </summary>
+        /// <seealso cref="MakeStopSet(String[], boolean) passing false to ignoreCase">
+        /// </seealso>
+        public static System.Collections.Hashtable MakeStopSet(System.String[] stopWords)
+        {
+            return MakeStopSet(stopWords, false);
+        }
+		
+        /// <summary> </summary>
+        /// <param name="stopWords">
+        /// </param>
+        /// <param name="ignoreCase">If true, all words are lower cased first.  
+        /// </param>
+        /// <returns> a Set containing the words
+        /// </returns>
+        public static System.Collections.Hashtable MakeStopSet(System.String[] stopWords, bool ignoreCase)
+        {
+            System.Collections.Hashtable stopTable = new System.Collections.Hashtable(stopWords.Length);
             for (int i = 0; i < stopWords.Length; i++)
             {
                 System.String tmp = ignoreCase ? stopWords[i].ToLower() : stopWords[i];
                 stopTable.Add(tmp, tmp);
             }
-			return stopTable;
-		}
+            return stopTable;
+        }
 		
-		/// <summary> Returns the next input Token whose termText() is not a stop word.</summary>
-		public override Token Next()
-		{
-			// return the first non-stop word found
-			for (Token token = input.Next(); token != null; token = input.Next())
-			{
-				System.String termText = ignoreCase ? token.termText.ToLower() : token.termText;
-				if (!stopWords.Contains(termText))
-					return token;
-			}
-			// reached EOS -- return null
-			return null;
-		}
-	}
+        /// <summary> Returns the next input Token whose termText() is not a stop word.</summary>
+        public override Token Next()
+        {
+            // return the first non-stop word found
+            for (Token token = input.Next(); token != null; token = input.Next())
+            {
+                System.String termText = ignoreCase ? token.termText.ToLower() : token.termText;
+                if (!stopWords.Contains(termText))
+                    return token;
+            }
+            // reached EOS -- return null
+            return null;
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Token.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Token.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Token.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Token.cs Tue May  1 11:45:26 2007
@@ -20,129 +20,147 @@
 namespace Lucene.Net.Analysis
 {
 	
-	/// <summary>A Token is an occurence of a term from the text of a field.  It consists of
-	/// a term's text, the start and end offset of the term in the text of the field,
-	/// and a type string.
-	/// The start and end offsets permit applications to re-associate a token with
-	/// its source text, e.g., to display highlighted query terms in a document
-	/// browser, or to show matching text fragments in a KWIC (KeyWord In Context)
-	/// display, etc.
-	/// The type is an interned string, assigned by a lexical analyzer
-	/// (a.k.a. tokenizer), naming the lexical or syntactic class that the token
-	/// belongs to.  For example an end of sentence marker token might be implemented
-	/// with type "eos".  The default token type is "word".  
-	/// </summary>
+    /// <summary>A Token is an occurence of a term from the text of a field.  It consists of
+    /// a term's text, the start and end offset of the term in the text of the field,
+    /// and a type string.
+    /// The start and end offsets permit applications to re-associate a token with
+    /// its source text, e.g., to display highlighted query terms in a document
+    /// browser, or to show matching text fragments in a KWIC (KeyWord In Context)
+    /// display, etc.
+    /// The type is an interned string, assigned by a lexical analyzer
+    /// (a.k.a. tokenizer), naming the lexical or syntactic class that the token
+    /// belongs to.  For example an end of sentence marker token might be implemented
+    /// with type "eos".  The default token type is "word".  
+    /// </summary>
 	
-	public sealed class Token
-	{
-		internal System.String termText; // the text of the term
-		internal int startOffset; // start in source text
-		internal int endOffset; // end in source text
-		internal System.String type = "word"; // lexical type
-		
-		private int positionIncrement = 1;
-		
-		/// <summary>Constructs a Token with the given term text, and start & end offsets.
-		/// The type defaults to "word." 
-		/// </summary>
-		public Token(System.String text, int start, int end)
-		{
-			termText = text;
-			startOffset = start;
-			endOffset = end;
-		}
-		
-		/// <summary>Constructs a Token with the given text, start and end offsets, & type. </summary>
-		public Token(System.String text, int start, int end, System.String typ)
-		{
-			termText = text;
-			startOffset = start;
-			endOffset = end;
-			type = typ;
-		}
-		
-		/// <summary>Set the position increment.  This determines the position of this token
-		/// relative to the previous Token in a {@link TokenStream}, used in phrase
-		/// searching.
-		/// 
-		/// <p>The default value is one.
-		/// 
-		/// <p>Some common uses for this are:<ul>
-		/// 
-		/// <li>Set it to zero to put multiple terms in the same position.  This is
-		/// useful if, e.g., a word has multiple stems.  Searches for phrases
-		/// including either stem will match.  In this case, all but the first stem's
-		/// increment should be set to zero: the increment of the first instance
-		/// should be one.  Repeating a token with an increment of zero can also be
-		/// used to boost the scores of matches on that token.
-		/// 
-		/// <li>Set it to values greater than one to inhibit exact phrase matches.
-		/// If, for example, one does not want phrases to match across removed stop
-		/// words, then one could build a stop word filter that removes stop words and
-		/// also sets the increment to the number of stop words removed before each
-		/// non-stop word.  Then exact phrase queries will only match when the terms
-		/// occur with no intervening stop words.
-		/// 
-		/// </ul>
-		/// </summary>
-		/// <seealso cref="Lucene.Net.index.TermPositions">
-		/// </seealso>
-		public void  SetPositionIncrement(int positionIncrement)
-		{
-			if (positionIncrement < 0)
-				throw new System.ArgumentException("Increment must be zero or greater: " + positionIncrement);
-			this.positionIncrement = positionIncrement;
-		}
-		
-		/// <summary>Returns the position increment of this Token.</summary>
-		/// <seealso cref="setPositionIncrement">
-		/// </seealso>
-		public int GetPositionIncrement()
-		{
-			return positionIncrement;
-		}
-		
-		/// <summary>Returns the Token's term text. </summary>
-		public System.String TermText()
-		{
-			return termText;
-		}
-		
-		/// <summary>Returns this Token's starting offset, the position of the first character
-		/// corresponding to this token in the source text.
-		/// Note that the difference between endOffset() and startOffset() may not be
-		/// equal to termText.length(), as the term text may have been altered by a
-		/// stemmer or some other filter. 
-		/// </summary>
-		public int StartOffset()
-		{
-			return startOffset;
-		}
-		
-		/// <summary>Returns this Token's ending offset, one greater than the position of the
-		/// last character corresponding to this token in the source text. 
-		/// </summary>
-		public int EndOffset()
-		{
-			return endOffset;
-		}
-		
-		/// <summary>Returns this Token's lexical type.  Defaults to "word". </summary>
-		public System.String Type()
-		{
-			return type;
-		}
-		
-		public override System.String ToString()
-		{
-			System.Text.StringBuilder sb = new System.Text.StringBuilder();
-			sb.Append("(" + termText + "," + startOffset + "," + endOffset);
-			if (!type.Equals("word"))
-				sb.Append(",type=" + type);
-			if (positionIncrement != 1)
-				sb.Append(",posIncr=" + positionIncrement);
-			sb.Append(")");
-			return sb.ToString();
-		}
-	}
+    public class Token : System.ICloneable
+    {
+        internal System.String termText; // the text of the term
+        internal int startOffset; // start in source text
+        internal int endOffset; // end in source text
+        internal System.String type = "word"; // lexical type
+		
+        private int positionIncrement = 1;
+		
+        /// <summary>Constructs a Token with the given term text, and start & end offsets.
+        /// The type defaults to "word." 
+        /// </summary>
+        public Token(System.String text, int start, int end)
+        {
+            termText = text;
+            startOffset = start;
+            endOffset = end;
+        }
+		
+        /// <summary>Constructs a Token with the given text, start and end offsets, & type. </summary>
+        public Token(System.String text, int start, int end, System.String typ)
+        {
+            termText = text;
+            startOffset = start;
+            endOffset = end;
+            type = typ;
+        }
+		
+        /// <summary>Set the position increment.  This determines the position of this token
+        /// relative to the previous Token in a {@link TokenStream}, used in phrase
+        /// searching.
+        /// 
+        /// <p>The default value is one.
+        /// 
+        /// <p>Some common uses for this are:<ul>
+        /// 
+        /// <li>Set it to zero to put multiple terms in the same position.  This is
+        /// useful if, e.g., a word has multiple stems.  Searches for phrases
+        /// including either stem will match.  In this case, all but the first stem's
+        /// increment should be set to zero: the increment of the first instance
+        /// should be one.  Repeating a token with an increment of zero can also be
+        /// used to boost the scores of matches on that token.
+        /// 
+        /// <li>Set it to values greater than one to inhibit exact phrase matches.
+        /// If, for example, one does not want phrases to match across removed stop
+        /// words, then one could build a stop word filter that removes stop words and
+        /// also sets the increment to the number of stop words removed before each
+        /// non-stop word.  Then exact phrase queries will only match when the terms
+        /// occur with no intervening stop words.
+        /// 
+        /// </ul>
+        /// </summary>
+        /// <seealso cref="Lucene.Net.index.TermPositions">
+        /// </seealso>
+        public void  SetPositionIncrement(int positionIncrement)
+        {
+            if (positionIncrement < 0)
+                throw new System.ArgumentException("Increment must be zero or greater: " + positionIncrement);
+            this.positionIncrement = positionIncrement;
+        }
+		
+        /// <summary>Returns the position increment of this Token.</summary>
+        /// <seealso cref="setPositionIncrement">
+        /// </seealso>
+        public int GetPositionIncrement()
+        {
+            return positionIncrement;
+        }
+		
+        /// <summary>Sets the Token's term text. </summary>
+        public virtual void  SetTermText(System.String text)
+        {
+            termText = text;
+        }
+		
+        /// <summary>Returns the Token's term text. </summary>
+        public System.String TermText()
+        {
+            return termText;
+        }
+		
+        /// <summary>Returns this Token's starting offset, the position of the first character
+        /// corresponding to this token in the source text.
+        /// Note that the difference between endOffset() and startOffset() may not be
+        /// equal to termText.length(), as the term text may have been altered by a
+        /// stemmer or some other filter. 
+        /// </summary>
+        public int StartOffset()
+        {
+            return startOffset;
+        }
+		
+        /// <summary>Returns this Token's ending offset, one greater than the position of the
+        /// last character corresponding to this token in the source text. 
+        /// </summary>
+        public int EndOffset()
+        {
+            return endOffset;
+        }
+		
+        /// <summary>Returns this Token's lexical type.  Defaults to "word". </summary>
+        public System.String Type()
+        {
+            return type;
+        }
+		
+        public override System.String ToString()
+        {
+            System.Text.StringBuilder sb = new System.Text.StringBuilder();
+            sb.Append("(" + termText + "," + startOffset + "," + endOffset);
+            if (!type.Equals("word"))
+                sb.Append(",type=" + type);
+            if (positionIncrement != 1)
+                sb.Append(",posIncr=" + positionIncrement);
+            sb.Append(")");
+            return sb.ToString();
+        }
+		
+        public virtual System.Object Clone()
+        {
+            try
+            {
+                return base.MemberwiseClone();
+            }
+            catch (System.Exception e)
+            {
+                throw new System.SystemException("", e); // shouldn't happen since we implement Cloneable
+            }
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TokenFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/TokenFilter.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TokenFilter.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TokenFilter.cs Tue May  1 11:45:26 2007
@@ -20,26 +20,26 @@
 namespace Lucene.Net.Analysis
 {
 	
-	/// <summary>A TokenFilter is a TokenStream whose input is another token stream.
-	/// <p>
-	/// This is an abstract class.
-	/// </summary>
+    /// <summary>A TokenFilter is a TokenStream whose input is another token stream.
+    /// <p>
+    /// This is an abstract class.
+    /// </summary>
 	
-	public abstract class TokenFilter : TokenStream
-	{
-		/// <summary>The source of tokens for this filter. </summary>
-		protected internal TokenStream input;
+    public abstract class TokenFilter : TokenStream
+    {
+        /// <summary>The source of tokens for this filter. </summary>
+        protected internal TokenStream input;
 		
-		/// <summary>Construct a token stream filtering the given input. </summary>
-		protected internal TokenFilter(TokenStream input)
-		{
-			this.input = input;
-		}
+        /// <summary>Construct a token stream filtering the given input. </summary>
+        protected internal TokenFilter(TokenStream input)
+        {
+            this.input = input;
+        }
 		
-		/// <summary>Close the input TokenStream. </summary>
-		public override void  Close()
-		{
-			input.Close();
-		}
-	}
+        /// <summary>Close the input TokenStream. </summary>
+        public override void  Close()
+        {
+            input.Close();
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TokenStream.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/TokenStream.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TokenStream.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/TokenStream.cs Tue May  1 11:45:26 2007
@@ -20,26 +20,26 @@
 namespace Lucene.Net.Analysis
 {
 	
-	/// <summary>A TokenStream enumerates the sequence of tokens, either from
-	/// fields of a document or from query text.
-	/// <p>
-	/// This is an abstract class.  Concrete subclasses are:
-	/// <ul>
-	/// <li>{@link Tokenizer}, a TokenStream
-	/// whose input is a Reader; and
-	/// <li>{@link TokenFilter}, a TokenStream
-	/// whose input is another TokenStream.
-	/// </ul>
-	/// </summary>
+    /// <summary>A TokenStream enumerates the sequence of tokens, either from
+    /// fields of a document or from query text.
+    /// <p>
+    /// This is an abstract class.  Concrete subclasses are:
+    /// <ul>
+    /// <li>{@link Tokenizer}, a TokenStream
+    /// whose input is a Reader; and
+    /// <li>{@link TokenFilter}, a TokenStream
+    /// whose input is another TokenStream.
+    /// </ul>
+    /// </summary>
 	
-	public abstract class TokenStream
-	{
-		/// <summary>Returns the next token in the stream, or null at EOS. </summary>
-		public abstract Token Next();
+    public abstract class TokenStream
+    {
+        /// <summary>Returns the next token in the stream, or null at EOS. </summary>
+        public abstract Token Next();
 		
-		/// <summary>Releases resources associated with this stream. </summary>
-		public virtual void  Close()
-		{
-		}
-	}
+        /// <summary>Releases resources associated with this stream. </summary>
+        public virtual void  Close()
+        {
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Tokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/Tokenizer.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Tokenizer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/Tokenizer.cs Tue May  1 11:45:26 2007
@@ -20,34 +20,34 @@
 namespace Lucene.Net.Analysis
 {
 	
-	/// <summary>A Tokenizer is a TokenStream whose input is a Reader.
-	/// <p>
-	/// This is an abstract class.
-	/// </summary>
+    /// <summary>A Tokenizer is a TokenStream whose input is a Reader.
+    /// <p>
+    /// This is an abstract class.
+    /// </summary>
 	
-	public abstract class Tokenizer : TokenStream
-	{
-		/// <summary>The text source for this Tokenizer. </summary>
-		protected internal System.IO.TextReader input;
+    public abstract class Tokenizer : TokenStream
+    {
+        /// <summary>The text source for this Tokenizer. </summary>
+        protected internal System.IO.TextReader input;
 		
-		/// <summary>Construct a tokenizer with null input. </summary>
-		protected internal Tokenizer()
-		{
-		}
+        /// <summary>Construct a tokenizer with null input. </summary>
+        protected internal Tokenizer()
+        {
+        }
 		
-		/// <summary>Construct a token stream processing the given input. </summary>
-		protected internal Tokenizer(System.IO.TextReader input)
-		{
-			this.input = input;
-		}
+        /// <summary>Construct a token stream processing the given input. </summary>
+        protected internal Tokenizer(System.IO.TextReader input)
+        {
+            this.input = input;
+        }
 		
-		/// <summary>By default, closes the input Reader. </summary>
-		public override void  Close()
-		{
-			if (input != null)
-			{
-				input.Close();
-			}
-		}
-	}
+        /// <summary>By default, closes the input Reader. </summary>
+        public override void  Close()
+        {
+            if (input != null)
+            {
+                input.Close();
+            }
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/WhitespaceAnalyzer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/WhitespaceAnalyzer.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/WhitespaceAnalyzer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/WhitespaceAnalyzer.cs Tue May  1 11:45:26 2007
@@ -20,13 +20,13 @@
 namespace Lucene.Net.Analysis
 {
 	
-	/// <summary>An Analyzer that uses WhitespaceTokenizer. </summary>
+    /// <summary>An Analyzer that uses WhitespaceTokenizer. </summary>
 	
-	public sealed class WhitespaceAnalyzer : Analyzer
-	{
-		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
-		{
-			return new WhitespaceTokenizer(reader);
-		}
-	}
+    public sealed class WhitespaceAnalyzer : Analyzer
+    {
+        public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+        {
+            return new WhitespaceTokenizer(reader);
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/WhitespaceTokenizer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/WhitespaceTokenizer.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/WhitespaceTokenizer.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/WhitespaceTokenizer.cs Tue May  1 11:45:26 2007
@@ -20,23 +20,23 @@
 namespace Lucene.Net.Analysis
 {
 	
-	/// <summary>A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
-	/// Adjacent sequences of non-Whitespace characters form tokens. 
-	/// </summary>
+    /// <summary>A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
+    /// Adjacent sequences of non-Whitespace characters form tokens. 
+    /// </summary>
 	
-	public class WhitespaceTokenizer : CharTokenizer
-	{
-		/// <summary>Construct a new WhitespaceTokenizer. </summary>
-		public WhitespaceTokenizer(System.IO.TextReader in_Renamed) : base(in_Renamed)
-		{
-		}
+    public class WhitespaceTokenizer : CharTokenizer
+    {
+        /// <summary>Construct a new WhitespaceTokenizer. </summary>
+        public WhitespaceTokenizer(System.IO.TextReader in_Renamed) : base(in_Renamed)
+        {
+        }
 		
-		/// <summary>Collects only characters which do not satisfy
-		/// {@link Character#isWhitespace(char)}.
-		/// </summary>
-		protected internal override bool IsTokenChar(char c)
-		{
-			return !System.Char.IsWhiteSpace(c);
-		}
-	}
+        /// <summary>Collects only characters which do not satisfy
+        /// {@link Character#isWhitespace(char)}.
+        /// </summary>
+        protected internal override bool IsTokenChar(char c)
+        {
+            return !System.Char.IsWhiteSpace(c);
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/WordlistLoader.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/Analysis/WordlistLoader.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/WordlistLoader.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/Analysis/WordlistLoader.cs Tue May  1 11:45:26 2007
@@ -20,91 +20,110 @@
 namespace Lucene.Net.Analysis
 {
 	
-	/// <summary> Loader for text files that represent a list of stopwords.
-	/// 
-	/// </summary>
-	/// <author>  Gerhard Schwarz
-	/// </author>
-	/// <version>  $Id: WordlistLoader.java 192989 2005-06-22 19:59:03Z dnaber $
-	/// </version>
-	public class WordlistLoader
-	{
+    /// <summary> Loader for text files that represent a list of stopwords.
+    /// 
+    /// </summary>
+    /// <author>  Gerhard Schwarz
+    /// </author>
+    /// <version>  $Id: WordlistLoader.java 192989 2005-06-22 19:59:03Z dnaber $
+    /// </version>
+    public class WordlistLoader
+    {
 		
-		/// <summary> Loads a text file and adds every line as an entry to a HashSet (omitting
-		/// leading and trailing whitespace). Every line of the file should contain only
-		/// one word. The words need to be in lowercase if you make use of an
-		/// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-		/// 
-		/// </summary>
-		/// <param name="wordfile">File containing the wordlist
-		/// </param>
-		/// <returns> A HashSet with the file's words
-		/// </returns>
-		public static System.Collections.Hashtable GetWordSet(System.IO.FileInfo wordfile)
-		{
-			System.Collections.Hashtable result = new System.Collections.Hashtable();
-			System.IO.TextReader reader = null;
-			try
-			{
-				reader = new System.IO.StreamReader(wordfile.FullName, System.Text.Encoding.Default);
-				result = GetWordSet(reader);
-			}
-			finally
-			{
-				if (reader != null)
-					reader.Close();
-			}
-			return result;
-		}
+        /// <summary> Loads a text file and adds every line as an entry to a HashSet (omitting
+        /// leading and trailing whitespace). Every line of the file should contain only
+        /// one word. The words need to be in lowercase if you make use of an
+        /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+        /// 
+        /// </summary>
+        /// <param name="wordfile">File containing the wordlist
+        /// </param>
+        /// <returns> A HashSet with the file's words
+        /// </returns>
+        public static System.Collections.Hashtable GetWordSet(System.IO.FileInfo wordfile)
+        {
+            System.Collections.Hashtable result = new System.Collections.Hashtable();
+            System.IO.TextReader reader = null;
+            try
+            {
+                reader = new System.IO.StreamReader(wordfile.FullName, System.Text.Encoding.Default);
+                result = GetWordSet(reader);
+            }
+            finally
+            {
+                if (reader != null)
+                    reader.Close();
+            }
+            return result;
+        }
 		
-		/// <summary> Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
-		/// leading and trailing whitespace). Every line of the Reader should contain only
-		/// one word. The words need to be in lowercase if you make use of an
-		/// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-		/// 
-		/// </summary>
-		/// <param name="reader">Reader containing the wordlist
-		/// </param>
-		/// <returns> A HashSet with the reader's words
-		/// </returns>
-		public static System.Collections.Hashtable GetWordSet(System.IO.TextReader reader)
-		{
-			System.Collections.Hashtable result = new System.Collections.Hashtable();
-			System.IO.TextReader br = null;
-			try
-			{
-				br = (System.IO.TextReader) reader;
-				System.String word = null;
-				while ((word = br.ReadLine()) != null)
-				{
+        /// <summary> Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
+        /// leading and trailing whitespace). Every line of the Reader should contain only
+        /// one word. The words need to be in lowercase if you make use of an
+        /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+        /// 
+        /// </summary>
+        /// <param name="reader">Reader containing the wordlist
+        /// </param>
+        /// <returns> A HashSet with the reader's words
+        /// </returns>
+        public static System.Collections.Hashtable GetWordSet(System.IO.TextReader reader)
+        {
+            System.Collections.Hashtable result = new System.Collections.Hashtable();
+            System.IO.TextReader br = null;
+            try
+            {
+                br = (System.IO.TextReader) reader;
+                System.String word = null;
+                while ((word = br.ReadLine()) != null)
+                {
                     System.String tmp = word.Trim();
-					result.Add(tmp, tmp);
-				}
-			}
-			finally
-			{
-				if (br != null)
-					br.Close();
-			}
-			return result;
-		}
+                    result.Add(tmp, tmp);
+                }
+            }
+            finally
+            {
+                if (br != null)
+                    br.Close();
+            }
+            return result;
+        }
 		
-		
-		/// <summary> Builds a wordlist table, using words as both keys and values
-		/// for backward compatibility.
-		/// 
-		/// </summary>
-		/// <param name="wordSet">  stopword set
-		/// </param>
-		private static System.Collections.Hashtable MakeWordTable(System.Collections.Hashtable wordSet)
-		{
-			System.Collections.Hashtable table = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
-			for (System.Collections.IEnumerator iter = wordSet.GetEnumerator(); iter.MoveNext(); )
-			{
-				System.String word = (System.String) iter.Current;
-				table[word] = word;
-			}
-			return table;
-		}
-	}
+        /// <summary> Reads a stem dictionary. Each line contains:
+        /// <pre>word<b>\t</b>stem</pre>
+        /// (i.e. two tab seperated words)
+        /// 
+        /// </summary>
+        /// <returns> stem dictionary that overrules the stemming algorithm
+        /// </returns>
+        /// <throws>  IOException  </throws>
+        public static System.Collections.Hashtable GetStemDict(System.IO.FileInfo wordstemfile)
+        {
+            if (wordstemfile == null)
+                throw new System.NullReferenceException("wordstemfile may not be null");
+            System.Collections.Hashtable result = new System.Collections.Hashtable();
+            System.IO.StreamReader br = null;
+            System.IO.StreamReader fr = null;
+            try
+            {
+                fr = new System.IO.StreamReader(wordstemfile.FullName, System.Text.Encoding.Default);
+                br = new System.IO.StreamReader(fr.BaseStream, fr.CurrentEncoding);
+                System.String line;
+                char[] tab = {'\t'};
+                while ((line = br.ReadLine()) != null)
+                {
+                    System.String[] wordstem = line.Split(tab, 2);
+                    result[wordstem[0]] = wordstem[1];
+                }
+            }
+            finally
+            {
+                if (fr != null)
+                    fr.Close();
+                if (br != null)
+                    br.Close();
+            }
+            return result;
+        }
+    }
 }

Modified: incubator/lucene.net/trunk/C#/src/Lucene.Net/AssemblyInfo.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/C%23/src/Lucene.Net/AssemblyInfo.cs?view=diff&rev=534192&r1=534191&r2=534192
==============================================================================
--- incubator/lucene.net/trunk/C#/src/Lucene.Net/AssemblyInfo.cs (original)
+++ incubator/lucene.net/trunk/C#/src/Lucene.Net/AssemblyInfo.cs Tue May  1 11:45:26 2007
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 using System.Reflection;
 using System.Runtime.CompilerServices;
 
@@ -7,7 +24,7 @@
 // associated with an assembly.
 //
 [assembly: AssemblyTitle("Apache Lucene.Net")]
-[assembly: AssemblyDescription("The Apache Software Foundation Lucene.Net a full-text search engine library")]
+[assembly: AssemblyDescription("The Apache Software Foundation Lucene.Net text search library")]
 [assembly: AssemblyConfiguration("")]
 [assembly: AssemblyCompany("The Apache Software Foundation")]
 [assembly: AssemblyProduct("Lucene.Net")]
@@ -18,6 +35,7 @@
 
 [assembly: AssemblyInformationalVersionAttribute("2.0")]
 
+
 //
 // Version information for an assembly consists of the following four values:
 //
@@ -29,7 +47,8 @@
 // You can specify all the values or you can default the Revision and Build Numbers 
 // by using the '*' as shown below:
 
-[assembly: AssemblyVersion("2.0.0.005")]
+[assembly: AssemblyVersion("2.1.0.001")]
+
 
 //
 // In order to sign your assembly you must specify a key to use. Refer to the 



Mime
View raw message