lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject [20/52] [abbrv] lucenenet git commit: Ported Analysis.Compound namespace + tests
Date Thu, 01 Sep 2016 14:39:41 GMT
Ported Analysis.Compound namespace + tests


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/87c1d606
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/87c1d606
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/87c1d606

Branch: refs/heads/master
Commit: 87c1d6068e578fde46351ec59f9e2696175b7839
Parents: a153b02
Author: Shad Storhaug <shad@shadstorhaug.com>
Authored: Mon Aug 15 10:02:08 2016 +0700
Committer: Shad Storhaug <shad@shadstorhaug.com>
Committed: Sun Aug 21 23:22:50 2016 +0700

----------------------------------------------------------------------
 .../Compound/CompoundWordTokenFilterBase.cs     |   14 +-
 .../DictionaryCompoundWordTokenFilter.cs        |    4 +-
 .../DictionaryCompoundWordTokenFilterFactory.cs |   22 +-
 .../HyphenationCompoundWordTokenFilter.cs       |   87 +-
 ...HyphenationCompoundWordTokenFilterFactory.cs |  179 +-
 .../Analysis/Compound/hyphenation/ByteVector.cs |  301 ++--
 .../Analysis/Compound/hyphenation/CharVector.cs |  324 ++--
 .../Analysis/Compound/hyphenation/Hyphen.cs     |  126 +-
 .../Compound/hyphenation/Hyphenation.cs         |   94 +-
 .../Compound/hyphenation/HyphenationTree.cs     | 1084 ++++++------
 .../Compound/hyphenation/PatternConsumer.cs     |   56 +-
 .../Compound/hyphenation/PatternParser.cs       |  927 +++++-----
 .../Compound/hyphenation/TernaryTree.cs         | 1578 +++++++++---------
 .../Compound/hyphenation/hyphenation.dtd        |   68 +
 .../Lucene.Net.Analysis.Common.csproj           |   16 +
 .../Tokenattributes/ICharTermAttribute.cs       |    9 +
 .../Compound/TestCompoundWordTokenFilter.cs     |  766 +++++----
 ...tDictionaryCompoundWordTokenFilterFactory.cs |   79 +-
 ...HyphenationCompoundWordTokenFilterFactory.cs |  106 +-
 .../Analysis/Compound/compoundDictionary.txt    |   19 +
 .../Analysis/Compound/da_UTF8.xml               | 1208 ++++++++++++++
 .../Analysis/Compound/da_compoundDictionary.txt |   19 +
 .../Lucene.Net.Tests.Analysis.Common.csproj     |    9 +-
 23 files changed, 4292 insertions(+), 2803 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
index c6bc4cd..4731b79 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
@@ -1,13 +1,12 @@
-using System.Collections.Generic;
-using System.Diagnostics;
-using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Tokenattributes;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
+using System.Collections.Generic;
+using System.Diagnostics;
 
 namespace Lucene.Net.Analysis.Compound
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -24,6 +23,7 @@ namespace Lucene.Net.Analysis.Compound
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */
+
     /// <summary>
     /// Base class for decomposition token filters.
     /// <para>
@@ -64,7 +64,7 @@ namespace Lucene.Net.Analysis.Compound
         protected internal readonly int maxSubwordSize;
         protected internal readonly bool onlyLongestMatch;
 
-        protected internal readonly CharTermAttribute termAtt;
+        protected internal readonly ICharTermAttribute termAtt;
         protected internal readonly IOffsetAttribute offsetAtt;
         private readonly IPositionIncrementAttribute posIncAtt;
 
@@ -83,7 +83,7 @@ namespace Lucene.Net.Analysis.Compound
         protected CompoundWordTokenFilterBase(LuceneVersion matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
             : base(input)
         {
-            termAtt = AddAttribute<ICharTermAttribute>() as CharTermAttribute;
+            termAtt = AddAttribute<ICharTermAttribute>();
             offsetAtt = AddAttribute<IOffsetAttribute>();
             posIncAtt = AddAttribute<IPositionIncrementAttribute>();
 
@@ -108,7 +108,7 @@ namespace Lucene.Net.Analysis.Compound
             this.dictionary = dictionary;
         }
 
-        public override bool IncrementToken()
+        public override sealed bool IncrementToken()
         {
             if (tokens.Count > 0)
             {

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
index 09c67fd..849b5b3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
@@ -3,7 +3,6 @@ using Lucene.Net.Util;
 
 namespace Lucene.Net.Analysis.Compound
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -20,6 +19,7 @@ namespace Lucene.Net.Analysis.Compound
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */
+
     /// <summary>
     /// A <seealso cref="TokenFilter"/> that decomposes compound words found in many Germanic languages.
     /// <para>
@@ -90,7 +90,7 @@ namespace Lucene.Net.Analysis.Compound
             }
         }
 
-        protected internal override void Decompose()
+        protected override void Decompose()
         {
             int len = termAtt.Length;
             for (int i = 0; i <= len - this.minSubwordSize; ++i)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
index ef8f1dc..f3c116a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
@@ -1,9 +1,8 @@
-using System.Collections.Generic;
-using Lucene.Net.Analysis.Util;
+using Lucene.Net.Analysis.Util;
+using System.Collections.Generic;
 
 namespace Lucene.Net.Analysis.Compound
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -20,6 +19,7 @@ namespace Lucene.Net.Analysis.Compound
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */
+
     /// <summary>
     /// Factory for <seealso cref="DictionaryCompoundWordTokenFilter"/>. 
     /// <pre class="prettyprint">
@@ -31,7 +31,7 @@ namespace Lucene.Net.Analysis.Compound
     ///   &lt;/analyzer&gt;
     /// &lt;/fieldType&gt;</pre>
     /// </summary>
-    public class DictionaryCompoundWordTokenFilterFactory : TokenFilterFactory, ResourceLoaderAware
+    public class DictionaryCompoundWordTokenFilterFactory : TokenFilterFactory, IResourceLoaderAware
     {
         private CharArraySet dictionary;
         private readonly string dictFile;
@@ -45,19 +45,19 @@ namespace Lucene.Net.Analysis.Compound
         public DictionaryCompoundWordTokenFilterFactory(IDictionary<string, string> args)
             : base(args)
         {
-            assureMatchVersion();
-            dictFile = require(args, "dictionary");
-            minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
-            minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
-            maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
-            onlyLongestMatch = getBoolean(args, "onlyLongestMatch", true);
+            AssureMatchVersion();
+            dictFile = Require(args, "dictionary");
+            minWordSize = GetInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+            minSubwordSize = GetInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+            maxSubwordSize = GetInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+            onlyLongestMatch = GetBoolean(args, "onlyLongestMatch", true);
             if (args.Count > 0)
             {
                 throw new System.ArgumentException("Unknown parameters: " + args);
             }
         }
 
-        public virtual void Inform(ResourceLoader loader)
+        public virtual void Inform(IResourceLoader loader)
         {
             dictionary = base.GetWordSet(loader, dictFile, false);
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
index 38518ed..edeee5e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
@@ -1,12 +1,11 @@
-using System.IO;
-using Lucene.Net.Analysis.Compound.Hyphenation;
+using Lucene.Net.Analysis.Compound.Hyphenation;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
-using org.apache.lucene.analysis.compound;
+using System.IO;
+using System.Text;
 
 namespace Lucene.Net.Analysis.Compound
 {
-
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
@@ -23,6 +22,7 @@ namespace Lucene.Net.Analysis.Compound
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */
+
     /// <summary>
     /// A <seealso cref="TokenFilter"/> that decomposes compound words found in many Germanic languages.
     /// <para>
@@ -58,8 +58,10 @@ namespace Lucene.Net.Analysis.Compound
         ///          the hyphenation pattern tree to use for hyphenation </param>
         /// <param name="dictionary">
         ///          the word dictionary to match against. </param>
-        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary)
-            : this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
+        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, 
+            HyphenationTree hyphenator, CharArraySet dictionary)
+            : this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, 
+                  DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
         {
         }
 
@@ -85,8 +87,11 @@ namespace Lucene.Net.Analysis.Compound
         ///          only subwords shorter than this get to the output stream </param>
         /// <param name="onlyLongestMatch">
         ///          Add only the longest matching subword to the stream </param>
-        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
-            : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch)
+        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, 
+            HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize, 
+            int maxSubwordSize, bool onlyLongestMatch)
+            : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, 
+                  onlyLongestMatch)
         {
 
             this.hyphenator = hyphenator;
@@ -100,8 +105,11 @@ namespace Lucene.Net.Analysis.Compound
         /// null, minWordSize, minSubwordSize, maxSubwordSize }
         /// </para>
         /// </summary>
-        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, HyphenationTree hyphenator, int minWordSize, int minSubwordSize, int maxSubwordSize)
-            : this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize, maxSubwordSize, false)
+        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, 
+            HyphenationTree hyphenator, int minWordSize, int minSubwordSize, 
+            int maxSubwordSize)
+            : this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize, 
+                  maxSubwordSize, false)
         {
         }
 
@@ -113,8 +121,10 @@ namespace Lucene.Net.Analysis.Compound
         /// DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE }
         /// </para>
         /// </summary>
-        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, HyphenationTree hyphenator)
-            : this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE)
+        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, 
+            HyphenationTree hyphenator)
+            : this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, 
+                  DEFAULT_MAX_SUBWORD_SIZE)
         {
         }
 
@@ -126,7 +136,18 @@ namespace Lucene.Net.Analysis.Compound
         /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
         public static HyphenationTree GetHyphenationTree(string hyphenationFilename)
         {
-            return getHyphenationTree(new InputSource(hyphenationFilename));
+            return GetHyphenationTree(hyphenationFilename, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Create a hyphenator tree
+        /// </summary>
+        /// <param name="hyphenationFilename"> the filename of the XML grammar to load </param>
+        /// <returns> An object representing the hyphenation patterns </returns>
+        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+        public static HyphenationTree GetHyphenationTree(string hyphenationFilename, Encoding encoding)
+        {
+            return GetHyphenationTree(new FileStream(hyphenationFilename, FileMode.Open, FileAccess.Read), encoding);
         }
 
         /// <summary>
@@ -135,9 +156,31 @@ namespace Lucene.Net.Analysis.Compound
         /// <param name="hyphenationFile"> the file of the XML grammar to load </param>
         /// <returns> An object representing the hyphenation patterns </returns>
         /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
-        public static HyphenationTree GetHyphenationTree(File hyphenationFile)
+        public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile)
+        {
+            return GetHyphenationTree(hyphenationFile, Encoding.UTF8);
+        }
+
+        /// <summary>
+        /// Create a hyphenator tree
+        /// </summary>
+        /// <param name="hyphenationFile"> the file of the XML grammar to load </param>
+        /// <returns> An object representing the hyphenation patterns </returns>
+        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+        public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile, Encoding encoding)
+        {
+            return GetHyphenationTree(new FileStream(hyphenationFile.FullName, FileMode.Open, FileAccess.Read), encoding);
+        }
+
+        /// <summary>
+        /// Create a hyphenator tree
+        /// </summary>
+        /// <param name="hyphenationSource"> the InputSource pointing to the XML grammar </param>
+        /// <returns> An object representing the hyphenation patterns </returns>
+        /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+        public static HyphenationTree GetHyphenationTree(Stream hyphenationSource)
         {
-            return getHyphenationTree(new InputSource(hyphenationFile.ToURI().toASCIIString()));
+            return GetHyphenationTree(hyphenationSource, Encoding.UTF8);
         }
 
         /// <summary>
@@ -146,17 +189,17 @@ namespace Lucene.Net.Analysis.Compound
         /// <param name="hyphenationSource"> the InputSource pointing to the XML grammar </param>
         /// <returns> An object representing the hyphenation patterns </returns>
         /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
-        public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
+        public static HyphenationTree GetHyphenationTree(Stream hyphenationSource, Encoding encoding)
         {
             var tree = new HyphenationTree();
-            tree.loadPatterns(hyphenationSource);
+            tree.LoadPatterns(hyphenationSource, encoding);
             return tree;
         }
 
-        protected internal override void decompose()
+        protected override void Decompose()
         {
             // get the hyphenation points
-            Hyphenation hyphens = hyphenator.hyphenate(termAtt.Buffer(), 0, termAtt.Length(), 1, 1);
+            Hyphenation.Hyphenation hyphens = hyphenator.Hyphenate(termAtt.Buffer(), 0, termAtt.Length, 1, 1);
             // No hyphen points found -> exit
             if (hyphens == null)
             {
@@ -197,7 +240,7 @@ namespace Lucene.Net.Analysis.Compound
                         {
                             if (longestMatchToken != null)
                             {
-                                if (longestMatchToken.txt.Length() < partLength)
+                                if (longestMatchToken.txt.Length < partLength)
                                 {
                                     longestMatchToken = new CompoundToken(this, start, partLength);
                                 }
@@ -212,7 +255,7 @@ namespace Lucene.Net.Analysis.Compound
                             tokens.AddLast(new CompoundToken(this, start, partLength));
                         }
                     }
-                    else if (dictionary.contains(termAtt.buffer(), start, partLength - 1))
+                    else if (dictionary.Contains(termAtt.Buffer(), start, partLength - 1))
                     {
                         // check the dictionary again with a word that is one character
                         // shorter
@@ -222,7 +265,7 @@ namespace Lucene.Net.Analysis.Compound
                         {
                             if (longestMatchToken != null)
                             {
-                                if (longestMatchToken.txt.Length() < partLength - 1)
+                                if (longestMatchToken.txt.Length < partLength - 1)
                                 {
                                     longestMatchToken = new CompoundToken(this, start, partLength - 1);
                                 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
index d1cdeee..4de8724 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
@@ -1,12 +1,13 @@
-using System.Collections.Generic;
-using Lucene.Net.Analysis.Compound.Hyphenation;
+using Lucene.Net.Analysis.Compound.Hyphenation;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
 
 namespace Lucene.Net.Analysis.Compound
 {
-
-	/*
+    /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
@@ -22,92 +23,96 @@ namespace Lucene.Net.Analysis.Compound
 	 * See the License for the specific language governing permissions and
 	 * limitations under the License.
 	 */
+
     /// <summary>
-	/// Factory for <seealso cref="HyphenationCompoundWordTokenFilter"/>.
-	/// <para>
-	/// This factory accepts the following parameters:
-	/// <ul>
-	///  <li><code>hyphenator</code> (mandatory): path to the FOP xml hyphenation pattern. 
-	///  See <a href="http://offo.sourceforge.net/hyphenation/">http://offo.sourceforge.net/hyphenation/</a>.
-	///  <li><code>encoding</code> (optional): encoding of the xml hyphenation file. defaults to UTF-8.
-	///  <li><code>dictionary</code> (optional): dictionary of words. defaults to no dictionary.
-	///  <li><code>minWordSize</code> (optional): minimal word length that gets decomposed. defaults to 5.
-	///  <li><code>minSubwordSize</code> (optional): minimum length of subwords. defaults to 2.
-	///  <li><code>maxSubwordSize</code> (optional): maximum length of subwords. defaults to 15.
-	///  <li><code>onlyLongestMatch</code> (optional): if true, adds only the longest matching subword 
-	///    to the stream. defaults to false.
-	/// </ul>
-	/// </para>
-	/// <para>
-	/// <pre class="prettyprint">
-	/// &lt;fieldType name="text_hyphncomp" class="solr.TextField" positionIncrementGap="100"&gt;
-	///   &lt;analyzer&gt;
-	///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
-	///     &lt;filter class="solr.HyphenationCompoundWordTokenFilterFactory" hyphenator="hyphenator.xml" encoding="UTF-8"
-	///         dictionary="dictionary.txt" minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="false"/&gt;
-	///   &lt;/analyzer&gt;
-	/// &lt;/fieldType&gt;</pre>
-	/// 
-	/// </para>
-	/// </summary>
-	/// <seealso cref= HyphenationCompoundWordTokenFilter </seealso>
-	public class HyphenationCompoundWordTokenFilterFactory : TokenFilterFactory, ResourceLoaderAware
-	{
-	  private CharArraySet dictionary;
-	  private HyphenationTree hyphenator;
-	  private readonly string dictFile;
-	  private readonly string hypFile;
-	  private readonly string encoding;
-	  private readonly int minWordSize;
-	  private readonly int minSubwordSize;
-	  private readonly int maxSubwordSize;
-	  private readonly bool onlyLongestMatch;
+    /// Factory for <seealso cref="HyphenationCompoundWordTokenFilter"/>.
+    /// <para>
+    /// This factory accepts the following parameters:
+    /// <ul>
+    ///  <li><code>hyphenator</code> (mandatory): path to the FOP xml hyphenation pattern. 
+    ///  See <a href="http://offo.sourceforge.net/hyphenation/">http://offo.sourceforge.net/hyphenation/</a>.
+    ///  <li><code>encoding</code> (optional): encoding of the xml hyphenation file. defaults to UTF-8.
+    ///  <li><code>dictionary</code> (optional): dictionary of words. defaults to no dictionary.
+    ///  <li><code>minWordSize</code> (optional): minimal word length that gets decomposed. defaults to 5.
+    ///  <li><code>minSubwordSize</code> (optional): minimum length of subwords. defaults to 2.
+    ///  <li><code>maxSubwordSize</code> (optional): maximum length of subwords. defaults to 15.
+    ///  <li><code>onlyLongestMatch</code> (optional): if true, adds only the longest matching subword 
+    ///    to the stream. defaults to false.
+    /// </ul>
+    /// </para>
+    /// <para>
+    /// <pre class="prettyprint">
+    /// &lt;fieldType name="text_hyphncomp" class="solr.TextField" positionIncrementGap="100"&gt;
+    ///   &lt;analyzer&gt;
+    ///     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
+    ///     &lt;filter class="solr.HyphenationCompoundWordTokenFilterFactory" hyphenator="hyphenator.xml" encoding="UTF-8"
+    ///         dictionary="dictionary.txt" minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="false"/&gt;
+    ///   &lt;/analyzer&gt;
+    /// &lt;/fieldType&gt;</pre>
+    /// 
+    /// </para>
+    /// </summary>
+    /// <seealso cref= HyphenationCompoundWordTokenFilter </seealso>
+    public class HyphenationCompoundWordTokenFilterFactory : TokenFilterFactory, IResourceLoaderAware
+    {
+        private CharArraySet dictionary;
+        private HyphenationTree hyphenator;
+        private readonly string dictFile;
+        private readonly string hypFile;
+        private readonly string encoding;
+        private readonly int minWordSize;
+        private readonly int minSubwordSize;
+        private readonly int maxSubwordSize;
+        private readonly bool onlyLongestMatch;
+
+        /// <summary>
+        /// Creates a new HyphenationCompoundWordTokenFilterFactory </summary>
+        public HyphenationCompoundWordTokenFilterFactory(IDictionary<string, string> args) : base(args)
+        {
+            AssureMatchVersion();
+            dictFile = Get(args, "dictionary");
+            encoding = Get(args, "encoding");
+            hypFile = Require(args, "hyphenator"); // LUCENENET TODO: Not sure what to do with this
+            minWordSize = GetInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+            minSubwordSize = GetInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+            maxSubwordSize = GetInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+            onlyLongestMatch = GetBoolean(args, "onlyLongestMatch", false);
+            if (args.Count > 0)
+            {
+                throw new System.ArgumentException("Unknown parameters: " + args);
+            }
+        }
+
+        public virtual void Inform(IResourceLoader loader)
+        {
+            Stream stream = null;
+            try
+            {
+                if (dictFile != null) // the dictionary can be empty.
+                {
+                    dictionary = GetWordSet(loader, dictFile, false);
+                }
+                // TODO: Broken, because we cannot resolve real system id
+                // ResourceLoader should also supply method like ClassLoader to get resource URL
+                stream = loader.OpenResource(hypFile);
+                //InputSource @is = new InputSource(stream);
+                //@is.Encoding = encoding; // if it's null let xml parser decide
+                //@is.SystemId = hypFile;
 
-	  /// <summary>
-	  /// Creates a new HyphenationCompoundWordTokenFilterFactory </summary>
-	  public HyphenationCompoundWordTokenFilterFactory(IDictionary<string, string> args) : base(args)
-	  {
-		assureMatchVersion();
-		dictFile = get(args, "dictionary");
-		encoding = get(args, "encoding");
-		hypFile = require(args, "hyphenator");
-		minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
-		minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
-		maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
-		onlyLongestMatch = getBoolean(args, "onlyLongestMatch", false);
-		if (args.Count > 0)
-		{
-		  throw new System.ArgumentException("Unknown parameters: " + args);
-		}
-	  }
+                var xmlEncoding = string.IsNullOrEmpty(encoding) ? Encoding.UTF8 : Encoding.GetEncoding(encoding);
 
-	  public virtual void Inform(ResourceLoader loader)
-	  {
-		InputStream stream = null;
-		try
-		{
-		  if (dictFile != null) // the dictionary can be empty.
-		  {
-			dictionary = getWordSet(loader, dictFile, false);
-		  }
-		  // TODO: Broken, because we cannot resolve real system id
-		  // ResourceLoader should also supply method like ClassLoader to get resource URL
-		  stream = loader.openResource(hypFile);
-		  InputSource @is = new InputSource(stream);
-		  @is.Encoding = encoding; // if it's null let xml parser decide
-		  @is.SystemId = hypFile;
-		  hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(@is);
-		}
-		finally
-		{
-		  IOUtils.CloseWhileHandlingException(stream);
-		}
-	  }
+                hyphenator = HyphenationCompoundWordTokenFilter.GetHyphenationTree(stream, xmlEncoding);
 
-	  public override TokenStream Create(TokenStream input)
-	  {
-		return new HyphenationCompoundWordTokenFilter(luceneMatchVersion, input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
-	  }
-	}
+            }
+            finally
+            {
+                IOUtils.CloseWhileHandlingException(stream);
+            }
+        }
 
+        public override TokenStream Create(TokenStream input)
+        {
+            return new HyphenationCompoundWordTokenFilter(luceneMatchVersion, input, hyphenator, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
index c59a69d..6442d11 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/ByteVector.cs
@@ -1,149 +1,156 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
+namespace Lucene.Net.Analysis.Compound.Hyphenation
 {
-
-	/// <summary>
-	/// This class implements a simple byte vector with access to the underlying
-	/// array.
-	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-	/// </summary>
-	public class ByteVector
-	{
-
-	  /// <summary>
-	  /// Capacity increment size
-	  /// </summary>
-	  private const int DEFAULT_BLOCK_SIZE = 2048;
-
-	  private int blockSize;
-
-	  /// <summary>
-	  /// The encapsulated array
-	  /// </summary>
-	  private sbyte[] array;
-
-	  /// <summary>
-	  /// Points to next free item
-	  /// </summary>
-	  private int n;
-
-	  public ByteVector() : this(DEFAULT_BLOCK_SIZE)
-	  {
-	  }
-
-	  public ByteVector(int capacity)
-	  {
-		if (capacity_Renamed > 0)
-		{
-		  blockSize = capacity_Renamed;
-		}
-		else
-		{
-		  blockSize = DEFAULT_BLOCK_SIZE;
-		}
-		array = new sbyte[blockSize];
-		n = 0;
-	  }
-
-	  public ByteVector(sbyte[] a)
-	  {
-		blockSize = DEFAULT_BLOCK_SIZE;
-		array = a;
-		n = 0;
-	  }
-
-	  public ByteVector(sbyte[] a, int capacity)
-	  {
-		if (capacity_Renamed > 0)
-		{
-		  blockSize = capacity_Renamed;
-		}
-		else
-		{
-		  blockSize = DEFAULT_BLOCK_SIZE;
-		}
-		array = a;
-		n = 0;
-	  }
-
-	  public virtual sbyte[] Array
-	  {
-		  get
-		  {
-			return array;
-		  }
-	  }
-
-	  /// <summary>
-	  /// return number of items in array
-	  /// </summary>
-	  public virtual int length()
-	  {
-		return n;
-	  }
-
-	  /// <summary>
-	  /// returns current capacity of array
-	  /// </summary>
-	  public virtual int capacity()
-	  {
-		return array.Length;
-	  }
-
-	  public virtual void put(int index, sbyte val)
-	  {
-		array[index] = val;
-	  }
-
-	  public virtual sbyte get(int index)
-	  {
-		return array[index];
-	  }
-
-	  /// <summary>
-	  /// This is to implement memory allocation in the array. Like malloc().
-	  /// </summary>
-	  public virtual int alloc(int size)
-	  {
-		int index = n;
-		int len = array.Length;
-		if (n + size >= len)
-		{
-		  sbyte[] aux = new sbyte[len + blockSize];
-		  Array.Copy(array, 0, aux, 0, len);
-		  array = aux;
-		}
-		n += size;
-		return index;
-	  }
-
-	  public virtual void trimToSize()
-	  {
-		if (n < array.Length)
-		{
-		  sbyte[] aux = new sbyte[n];
-		  Array.Copy(array, 0, aux, 0, n);
-		  array = aux;
-		}
-	  }
-
-	}
-
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// This class implements a simple byte vector with access to the underlying
+    /// array.
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+    public class ByteVector
+    {
+
+        /// <summary>
+        /// Capacity increment size
+        /// </summary>
+        private const int DEFAULT_BLOCK_SIZE = 2048;
+
+        private int blockSize;
+
+        /// <summary>
+        /// The encapsulated array
+        /// </summary>
+        private sbyte[] array;
+
+        /// <summary>
+        /// Points to next free item
+        /// </summary>
+        private int n;
+
+        public ByteVector() : this(DEFAULT_BLOCK_SIZE)
+        {
+        }
+
+        public ByteVector(int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = new sbyte[blockSize];
+            n = 0;
+        }
+
+        public ByteVector(sbyte[] a)
+        {
+            blockSize = DEFAULT_BLOCK_SIZE;
+            array = a;
+            n = 0;
+        }
+
+        public ByteVector(sbyte[] a, int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = a;
+            n = 0;
+        }
+
+        public virtual sbyte[] Array
+        {
+            get
+            {
+                return array;
+            }
+        }
+
+        /// <summary>
+        /// LUCENENET indexer for .NET
+        /// </summary>
+        /// <param name="index"></param>
+        /// <returns></returns>
+        public virtual sbyte this[int index]
+        {
+            get { return array[index]; }
+            set { array[index] = value; }
+        }
+
+        /// <summary>
+        /// return number of items in array
+        /// </summary>
+        public virtual int Length
+        {
+            get { return n; }
+        }
+
+        /// <summary>
+        /// returns current capacity of array
+        /// </summary>
+        public virtual int Capacity
+        {
+            get { return array.Length; }
+        }
+
+        //public virtual void Put(int index, sbyte val)
+        //{
+        //    array[index] = val;
+        //}
+
+        //public virtual sbyte Get(int index)
+        //{
+        //    return array[index];
+        //}
+
+        /// <summary>
+        /// This is to implement memory allocation in the array. Like malloc().
+        /// </summary>
+        public virtual int Alloc(int size)
+        {
+            int index = n;
+            int len = array.Length;
+            if (n + size >= len)
+            {
+                sbyte[] aux = new sbyte[len + blockSize];
+                System.Array.Copy(array, 0, aux, 0, len);
+                array = aux;
+            }
+            n += size;
+            return index;
+        }
+
+        public virtual void TrimToSize()
+        {
+            if (n < array.Length)
+            {
+                sbyte[] aux = new sbyte[n];
+                System.Array.Copy(array, 0, aux, 0, n);
+                array = aux;
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
index 568b50b..26fcea5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/CharVector.cs
@@ -1,163 +1,171 @@
 using System;
 
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 namespace Lucene.Net.Analysis.Compound.Hyphenation
 {
-
-	/// <summary>
-	/// This class implements a simple char vector with access to the underlying
-	/// array.
-	/// 
-	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-	/// </summary>
-	public class CharVector : ICloneable
-	{
-
-	  /// <summary>
-	  /// Capacity increment size
-	  /// </summary>
-	  private const int DEFAULT_BLOCK_SIZE = 2048;
-
-	  private int blockSize;
-
-	  /// <summary>
-	  /// The encapsulated array
-	  /// </summary>
-	  private char[] array;
-
-	  /// <summary>
-	  /// Points to next free item
-	  /// </summary>
-	  private int n;
-
-	  public CharVector() : this(DEFAULT_BLOCK_SIZE)
-	  {
-	  }
-
-	  public CharVector(int capacity)
-	  {
-		if (capacity_Renamed > 0)
-		{
-		  blockSize = capacity_Renamed;
-		}
-		else
-		{
-		  blockSize = DEFAULT_BLOCK_SIZE;
-		}
-		array = new char[blockSize];
-		n = 0;
-	  }
-
-	  public CharVector(char[] a)
-	  {
-		blockSize = DEFAULT_BLOCK_SIZE;
-		array = a;
-		n = a.Length;
-	  }
-
-	  public CharVector(char[] a, int capacity)
-	  {
-		if (capacity_Renamed > 0)
-		{
-		  blockSize = capacity_Renamed;
-		}
-		else
-		{
-		  blockSize = DEFAULT_BLOCK_SIZE;
-		}
-		array = a;
-		n = a.Length;
-	  }
-
-	  /// <summary>
-	  /// Reset Vector but don't resize or clear elements
-	  /// </summary>
-	  public virtual void clear()
-	  {
-		n = 0;
-	  }
-
-	  public override CharVector clone()
-	  {
-		CharVector cv = new CharVector(array.Clone(), blockSize);
-		cv.n = this.n;
-		return cv;
-	  }
-
-	  public virtual char[] Array
-	  {
-		  get
-		  {
-			return array;
-		  }
-	  }
-
-	  /// <summary>
-	  /// return number of items in array
-	  /// </summary>
-	  public virtual int length()
-	  {
-		return n;
-	  }
-
-	  /// <summary>
-	  /// returns current capacity of array
-	  /// </summary>
-	  public virtual int capacity()
-	  {
-		return array.Length;
-	  }
-
-	  public virtual void put(int index, char val)
-	  {
-		array[index] = val;
-	  }
-
-	  public virtual char get(int index)
-	  {
-		return array[index];
-	  }
-
-	  public virtual int alloc(int size)
-	  {
-		int index = n;
-		int len = array.Length;
-		if (n + size >= len)
-		{
-		  char[] aux = new char[len + blockSize];
-		  Array.Copy(array, 0, aux, 0, len);
-		  array = aux;
-		}
-		n += size;
-		return index;
-	  }
-
-	  public virtual void trimToSize()
-	  {
-		if (n < array.Length)
-		{
-		  char[] aux = new char[n];
-		  Array.Copy(array, 0, aux, 0, n);
-		  array = aux;
-		}
-	  }
-
-	}
-
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// This class implements a simple char vector with access to the underlying
+    /// array.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+    public class CharVector : ICloneable
+    {
+
+        /// <summary>
+        /// Capacity increment size
+        /// </summary>
+        private const int DEFAULT_BLOCK_SIZE = 2048;
+
+        private int blockSize;
+
+        /// <summary>
+        /// The encapsulated array
+        /// </summary>
+        private char[] array;
+
+        /// <summary>
+        /// Points to next free item
+        /// </summary>
+        private int n;
+
+        public CharVector() : this(DEFAULT_BLOCK_SIZE)
+        {
+        }
+
+        public CharVector(int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = new char[blockSize];
+            n = 0;
+        }
+
+        public CharVector(char[] a)
+        {
+            blockSize = DEFAULT_BLOCK_SIZE;
+            array = a;
+            n = a.Length;
+        }
+
+        public CharVector(char[] a, int capacity)
+        {
+            if (capacity > 0)
+            {
+                blockSize = capacity;
+            }
+            else
+            {
+                blockSize = DEFAULT_BLOCK_SIZE;
+            }
+            array = a;
+            n = a.Length;
+        }
+
+        /// <summary>
+        /// Reset Vector but don't resize or clear elements
+        /// </summary>
+        public virtual void Clear()
+        {
+            n = 0;
+        }
+
+        public virtual object Clone()
+        {
+            CharVector cv = new CharVector(array, blockSize);
+            cv.n = this.n;
+            return cv;
+        }
+
+        public virtual char[] Array
+        {
+            get
+            {
+                return array;
+            }
+        }
+
+        /// <summary>
+        /// LUCENENET indexer for .NET
+        /// </summary>
+        /// <param name="index"></param>
+        /// <returns></returns>
+        public virtual char this[int index]
+        {
+            get { return array[index]; }
+            set { array[index] = value; }
+        }
+
+        /// <summary>
+        /// return number of items in array
+        /// </summary>
+        public virtual int Length()
+        {
+            return n;
+        }
+
+        /// <summary>
+        /// returns current capacity of array
+        /// </summary>
+        public virtual int Capacity
+        {
+            get { return array.Length; }
+        }
+
+        //public virtual void Put(int index, char val)
+        //{
+        //    array[index] = val;
+        //}
+
+        //public virtual char get(int index)
+        //{
+        //    return array[index];
+        //}
+
+        public virtual int Alloc(int size)
+        {
+            int index = n;
+            int len = array.Length;
+            if (n + size >= len)
+            {
+                char[] aux = new char[len + blockSize];
+                System.Array.Copy(array, 0, aux, 0, len);
+                array = aux;
+            }
+            n += size;
+            return index;
+        }
+
+        public virtual void TrimToSize()
+        {
+            if (n < array.Length)
+            {
+                char[] aux = new char[n];
+                System.Array.Copy(array, 0, aux, 0, n);
+                array = aux;
+            }
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
index 8d73bd8..91009b1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphen.cs
@@ -1,76 +1,72 @@
 using System.Text;
 
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 namespace Lucene.Net.Analysis.Compound.Hyphenation
 {
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
 
-	/// <summary>
-	/// This class represents a hyphen. A 'full' hyphen is made of 3 parts: the
-	/// pre-break text, post-break text and no-break. If no line-break is generated
-	/// at this position, the no-break text is used, otherwise, pre-break and
-	/// post-break are used. Typically, pre-break is equal to the hyphen character
-	/// and the others are empty. However, this general scheme allows support for
-	/// cases in some languages where words change spelling if they're split across
-	/// lines, like german's 'backen' which hyphenates 'bak-ken'. BTW, this comes
-	/// from TeX.
-	/// 
-	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
-	/// </summary>
-
-	public class Hyphen
-	{
-	  public string preBreak;
-
-	  public string noBreak;
-
-	  public string postBreak;
+    /// <summary>
+    /// This class represents a hyphen. A 'full' hyphen is made of 3 parts: the
+    /// pre-break text, post-break text and no-break. If no line-break is generated
+    /// at this position, the no-break text is used, otherwise, pre-break and
+    /// post-break are used. Typically, pre-break is equal to the hyphen character
+    /// and the others are empty. However, this general scheme allows support for
+    /// cases in some languages where words change spelling if they're split across
+    /// lines, like german's 'backen' which hyphenates 'bak-ken'. BTW, this comes
+    /// from TeX.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified. 
+    /// </summary>
+    public class Hyphen
+    {
+        public string preBreak;
 
-	  internal Hyphen(string pre, string no, string post)
-	  {
-		preBreak = pre;
-		noBreak = no;
-		postBreak = post;
-	  }
+        public string noBreak;
 
-	  internal Hyphen(string pre)
-	  {
-		preBreak = pre;
-		noBreak = null;
-		postBreak = null;
-	  }
+        public string postBreak;
 
-	  public override string ToString()
-	  {
-		if (noBreak == null && postBreak == null && preBreak != null && preBreak.Equals("-"))
-		{
-		  return "-";
-		}
-		StringBuilder res = new StringBuilder("{");
-		res.Append(preBreak);
-		res.Append("}{");
-		res.Append(postBreak);
-		res.Append("}{");
-		res.Append(noBreak);
-		res.Append('}');
-		return res.ToString();
-	  }
+        internal Hyphen(string pre, string no, string post)
+        {
+            preBreak = pre;
+            noBreak = no;
+            postBreak = post;
+        }
 
-	}
+        internal Hyphen(string pre)
+        {
+            preBreak = pre;
+            noBreak = null;
+            postBreak = null;
+        }
 
+        public override string ToString()
+        {
+            if (noBreak == null && postBreak == null && preBreak != null && preBreak.Equals("-"))
+            {
+                return "-";
+            }
+            StringBuilder res = new StringBuilder("{");
+            res.Append(preBreak);
+            res.Append("}{");
+            res.Append(postBreak);
+            res.Append("}{");
+            res.Append(noBreak);
+            res.Append('}');
+            return res.ToString();
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/87c1d606/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
index bf2a170..fdbac29 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/hyphenation/Hyphenation.cs
@@ -1,55 +1,53 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-namespace Lucene.Net.Analysis.Compound.Hyphenation
+namespace Lucene.Net.Analysis.Compound.Hyphenation
 {
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     * 
+     *      http://www.apache.org/licenses/LICENSE-2.0
+     * 
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
 
-	/// <summary>
-	/// This class represents a hyphenated word.
-	/// 
-	/// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
-	/// </summary>
-	public class Hyphenation
-	{
-
-	  private readonly int[] hyphenPoints;
+    /// <summary>
+    /// This class represents a hyphenated word.
+    /// 
+    /// This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
+    /// </summary>
+    public class Hyphenation
+    {
 
-	  /// <summary>
-	  /// rawWord as made of alternating strings and <seealso cref="Hyphen"/> instances
-	  /// </summary>
-	  internal Hyphenation(int[] points)
-	  {
-		hyphenPoints = points;
-	  }
+        private readonly int[] hyphenPoints;
 
-	  /// <returns> the number of hyphenation points in the word </returns>
-	  public virtual int length()
-	  {
-		return hyphenPoints.Length;
-	  }
+        /// <summary>
+        /// rawWord as made of alternating strings and <seealso cref="Hyphen"/> instances
+        /// </summary>
+        internal Hyphenation(int[] points)
+        {
+            hyphenPoints = points;
+        }
 
-	  /// <returns> the hyphenation points </returns>
-	  public virtual int[] HyphenationPoints
-	  {
-		  get
-		  {
-			return hyphenPoints;
-		  }
-	  }
-	}
+        /// <returns> the number of hyphenation points in the word </returns>
+        public virtual int Length
+        {
+            get { return hyphenPoints.Length; }
+        }
 
+        /// <returns> the hyphenation points </returns>
+        public virtual int[] HyphenationPoints
+        {
+            get
+            {
+                return hyphenPoints;
+            }
+        }
+    }
 }
\ No newline at end of file


Mime
View raw message