lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From si...@apache.org
Subject svn commit: r1347076 [1/9] - in /incubator/lucene.net/trunk: src/contrib/Analyzers/ src/contrib/Analyzers/Hunspell/ test/contrib/Analyzers/ test/contrib/Analyzers/Hunspell/ test/contrib/Analyzers/Hunspell/Dictionaries/
Date Wed, 06 Jun 2012 19:46:00 GMT
Author: sisve
Date: Wed Jun  6 19:45:59 2012
New Revision: 1347076

URL: http://svn.apache.org/viewvc?rev=1347076&view=rev
Log:
Support for stemming using hunspell dictionaries.

Added:
    incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/
    incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellAffix.cs   (with props)
    incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellDictionary.cs   (with props)
    incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStem.cs   (with props)
    incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemFilter.cs   (with props)
    incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemmer.cs   (with props)
    incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellWord.cs   (with props)
    incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/
    incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/
    incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/en_US.aff   (with props)
    incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/en_US.dic   (with props)
    incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/fr-moderne.aff   (with props)
    incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/fr-moderne.dic   (with props)
    incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/nl_NL.aff   (with props)
    incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/nl_NL.dic   (with props)
    incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/HunspellDictionaryLoader.cs   (with props)
    incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellDictionary.cs   (with props)
    incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellStemFilter.cs   (with props)
    incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/TestHunspellStemmer.cs   (with props)
Modified:
    incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj
    incubator/lucene.net/trunk/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj

Modified: incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj?rev=1347076&r1=1347075&r2=1347076&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj (original)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Contrib.Analyzers.csproj Wed Jun  6 19:45:59 2012
@@ -110,6 +110,12 @@
     <Compile Include="Fr\FrenchAnalyzer.cs" />
     <Compile Include="Fr\FrenchStemFilter.cs" />
     <Compile Include="Fr\FrenchStemmer.cs" />
+    <Compile Include="Hunspell\HunspellAffix.cs" />
+    <Compile Include="Hunspell\HunspellDictionary.cs" />
+    <Compile Include="Hunspell\HunspellStem.cs" />
+    <Compile Include="Hunspell\HunspellStemFilter.cs" />
+    <Compile Include="Hunspell\HunspellStemmer.cs" />
+    <Compile Include="Hunspell\HunspellWord.cs" />
     <Compile Include="Miscellaneous\EmptyTokenStream.cs" />
     <Compile Include="Miscellaneous\InjectablePrefixAwareTokenFilter.cs" />
     <Compile Include="Miscellaneous\PatternAnalyzer.cs" />
@@ -185,4 +191,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file

Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellAffix.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellAffix.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellAffix.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellAffix.cs Wed Jun  6 19:45:59 2012
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Diagnostics;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Hunspell {
+    /// <summary>
+    ///   Wrapper class representing a hunspell affix.
+    /// </summary>
+    [DebuggerDisplay("{Condition}")]
+    public class HunspellAffix {
+        private String _condition;
+        private Regex _conditionPattern;
+
+        /// <summary>
+        ///   The append defined for the affix.
+        /// </summary>
+        public String Append { get; set; }
+
+        /// <summary>
+        ///   The flags defined for the affix append.
+        /// </summary>
+        public Char[] AppendFlags { get; set; }
+
+        /// <summary>
+        ///   The condition that must be met before the affix can be applied.
+        /// </summary>
+        public String Condition {
+            get { return _condition; }
+        }
+
+        /// <summary>
+        ///   The affix flag.
+        /// </summary>
+        public Char Flag { get; set; }
+
+        /// <summary>
+        ///   Whether the affix is defined as cross product.
+        /// </summary>
+        public Boolean IsCrossProduct { get; set; }
+
+        /// <summary>
+        ///   The stripping characters defined for the affix.
+        /// </summary>
+        public String Strip { get; set; }
+
+        /// <summary>
+        ///   Checks whether the String defined by the provided char array, offset 
+        ///   and length, meets the condition of this affix.
+        /// </summary>
+        /// <returns>
+        ///   <c>true</c> if the String meets the condition, <c>false</c> otherwise.
+        /// </returns>
+        public Boolean CheckCondition(String text) {
+            if (text == null)
+                throw new ArgumentNullException("text");
+
+            return _conditionPattern.IsMatch(text);
+        }
+
+        /// <summary>
+        ///   Sets the condition that must be met before the affix can be applied.
+        /// </summary>
+        /// <param name="condition">Condition to be met before affix application.</param>
+        /// <param name="pattern">Condition as a regular expression pattern.</param>
+        public void SetCondition(String condition, String pattern) {
+            if (condition == null) throw new ArgumentNullException("condition");
+            if (pattern == null) throw new ArgumentNullException("pattern");
+
+            _condition = condition;
+            _conditionPattern = new Regex(pattern);
+        }
+    }
+}
\ No newline at end of file

Propchange: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellAffix.cs
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellDictionary.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellDictionary.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellDictionary.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellDictionary.cs Wed Jun  6 19:45:59 2012
@@ -0,0 +1,428 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Analysis.Hunspell {
+    public class HunspellDictionary {
+        private static readonly HunspellWord NoFlags = new HunspellWord();
+
+        private static readonly String PREFIX_KEY = "PFX";
+        private static readonly String SUFFIX_KEY = "SFX";
+        private static readonly String FLAG_KEY = "FLAG";
+        private static readonly String AF_KEY = "AF";
+
+        private static readonly String NUM_FLAG_TYPE = "num";
+        private static readonly String UTF8_FLAG_TYPE = "UTF-8";
+        private static readonly String LONG_FLAG_TYPE = "long";
+
+        private static readonly String PREFIX_CONDITION_REGEX_PATTERN = @"^{0}";
+        private static readonly String SUFFIX_CONDITION_REGEX_PATTERN = @"{0}$";
+
+        private readonly Dictionary<String, List<HunspellAffix>> _prefixes = new Dictionary<String, List<HunspellAffix>>();
+        private readonly Dictionary<String, List<HunspellAffix>> _suffixes = new Dictionary<String, List<HunspellAffix>>();
+        private readonly Dictionary<String, List<HunspellWord>> _words = new Dictionary<String, List<HunspellWord>>();
+        private readonly Dictionary<String, Char[]> _aliases = new Dictionary<String, Char[]>();
+        private FlagParsingStrategy _flagParsingStrategy = new SimpleFlagParsingStrategy(); // Default flag parsing strategy
+
+        /// <summary>
+        ///   Creates a new HunspellDictionary containing the information read from the provided streams to hunspell affix and dictionary file.
+        /// </summary>
+        /// <param name = "affix">Stream for reading the hunspell affix file.</param>
+        /// <param name = "dictionary">Stream for reading the hunspell dictionary file.</param>
+        /// <exception cref = "IOException">Can be thrown while reading from the streams.</exception>
+        /// <exception cref = "InvalidDataException">Can be thrown if the content of the files does not meet expected formats.</exception>
+        public HunspellDictionary(Stream affix, Stream dictionary)
+            : this(affix, new[] { dictionary }) {
+        }
+
+        /// <summary>
+        ///   Creates a new HunspellDictionary containing the information read from the provided streams to hunspell affix and dictionary files.
+        /// </summary>
+        /// <param name = "affix">Stream for reading the hunspell affix file.</param>
+        /// <param name = "dictionaries">Streams for reading the hunspell dictionary file.</param>
+        /// <exception cref = "IOException">Can be thrown while reading from the streams.</exception>
+        /// <exception cref = "InvalidDataException">Can be thrown if the content of the files does not meet expected formats.</exception>
+        public HunspellDictionary(Stream affix, IEnumerable<Stream> dictionaries) {
+            if (affix == null) throw new ArgumentNullException("affix");
+            if (dictionaries == null) throw new ArgumentNullException("dictionaries");
+
+            var encodingName = ReadDictionaryEncoding(affix);
+            var encoding = Encoding.GetEncoding(encodingName);
+
+            ReadAffixFile(affix, encoding);
+            foreach (var dictionary in dictionaries)
+                ReadDictionaryFile(dictionary, encoding);
+        }
+
+        /// <summary>
+        ///   Looks up HunspellWords that match the String created from the given char array, offset and length.
+        /// </summary>
+        public IEnumerable<HunspellWord> LookupWord(String word) {
+            if (word == null) throw new ArgumentNullException("word");
+
+            List<HunspellWord> list;
+            if (_words.TryGetValue(word, out list))
+                return list;
+
+            return null;
+        }
+
+        /// <summary>
+        ///   Looks up HunspellAffix prefixes that have an append that matches the String created from the given char array, offset and length.
+        /// </summary>
+        /// <param name="word">Char array to generate the String from.</param>
+        /// <param name="offset">Offset in the char array that the String starts at.</param>
+        /// <param name="length">Length from the offset that the String is.</param>
+        /// <returns>List of HunspellAffix prefixes with an append that matches the String, or <c>null</c> if none are found.</returns>
+        public IEnumerable<HunspellAffix> LookupPrefix(char[] word, int offset, int length) {
+            if (word == null) throw new ArgumentNullException("word");
+            var key = new String(word, offset, length);
+
+            List<HunspellAffix> list;
+            if (_prefixes.TryGetValue(key, out list))
+                return list;
+
+            return null;
+        }
+
+        /// <summary>
+        ///   Looks up HunspellAffix suffixes that have an append that matches the String created from the given char array, offset and length.
+        /// </summary>
+        /// <param name="word">Char array to generate the String from.</param>
+        /// <param name="offset">Offset in the char array that the String starts at.</param>
+        /// <param name="length">Length from the offset that the String is.</param>
+        /// <returns>List of HunspellAffix suffixes with an append that matches the String, or <c>null</c> if none are found</returns>
+        public IEnumerable<HunspellAffix> LookupSuffix(char[] word, int offset, int length) {
+            if (word == null) throw new ArgumentNullException("word");
+            var key = new String(word, offset, length);
+
+            List<HunspellAffix> list;
+            if (_suffixes.TryGetValue(key, out list))
+                return list;
+
+            return null;
+        }
+
+        /// <summary>
+        ///   Reads the affix file through the provided Stream, building up the prefix and suffix maps.
+        /// </summary>
+        /// <param name="affixStream">Stream to read the content of the affix file from.</param>
+        /// <param name="encoding">Encoding to decode the content of the file.</param>
+        /// <exception cref="IOException">IOException Can be thrown while reading from the Stream.</exception>
+        private void ReadAffixFile(Stream affixStream, Encoding encoding) {
+            if (affixStream == null) throw new ArgumentNullException("affixStream");
+            if (encoding == null) throw new ArgumentNullException("encoding");
+
+            using (var reader = new StreamReader(affixStream, encoding)) {
+                String line;
+                while ((line = reader.ReadLine()) != null) {
+                    if (line.StartsWith(PREFIX_KEY)) {
+                        ParseAffix(_prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN);
+                    } else if (line.StartsWith(SUFFIX_KEY)) {
+                        ParseAffix(_suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN);
+                    } else if (line.StartsWith(FLAG_KEY)) {
+                        // Assume that the FLAG line comes before any prefix or suffixes
+                        // Store the strategy so it can be used when parsing the dic file
+                        _flagParsingStrategy = GetFlagParsingStrategy(line);
+                    } else if (line.StartsWith(AF_KEY)) {
+                        // Parse Alias Flag
+                        ParseAliasFlag(line, reader);
+                    }
+                }
+            }
+        }
+
+        /// <summary>
+        /// Parse alias flag and put it in hash
+        /// </summary>
+        /// <param name="line"></param>
+        /// <param name="reader"></param>
+        private void ParseAliasFlag(String line, TextReader reader) {
+            if (reader == null) throw new ArgumentNullException("reader");
+            var args = Regex.Split(line, "\\s+");
+            var numLines = Int32.Parse(args[1]);
+
+            for (var i = 0; i < numLines; i++) {
+                line = reader.ReadLine();
+                var ruleArgs = Regex.Split(line, "\\s+");
+
+                if (ruleArgs[0] != "AF")
+                    throw new Exception("File corrupted, should be AF directive : " + line);
+
+                var appendFlags = _flagParsingStrategy.ParseFlags(ruleArgs[1]);
+                _aliases.Add((i+1).ToString(CultureInfo.InvariantCulture), appendFlags);
+            }
+        }
+
+        /// <summary>
+        ///   Parses a specific affix rule putting the result into the provided affix map.
+        /// </summary>
+        /// <param name="affixes">Map where the result of the parsing will be put.</param>
+        /// <param name="header">Header line of the affix rule.</param>
+        /// <param name="reader">TextReader to read the content of the rule from.</param>
+        /// <param name="conditionPattern">Pattern to be used to generate the condition regex pattern.</param>
+        private void ParseAffix(Dictionary<String, List<HunspellAffix>> affixes, String header, TextReader reader, String conditionPattern) {
+            if (affixes == null) throw new ArgumentNullException("affixes");
+            if (header == null) throw new ArgumentNullException("header");
+            if (reader == null) throw new ArgumentNullException("reader");
+            if (conditionPattern == null) throw new ArgumentNullException("conditionPattern");
+
+            var args = Regex.Split(header, "\\s+");
+            var crossProduct = args[2].Equals("Y");
+            var numLines = Int32.Parse(args[3]);
+
+            var hasAliases = _aliases.Count > 0;
+            for (var i = 0; i < numLines; i++) {
+                var line = reader.ReadLine();
+                var ruleArgs = Regex.Split(line, "\\s+");
+
+                var affix = new HunspellAffix();
+
+                affix.Flag = _flagParsingStrategy.ParseFlag(ruleArgs[1]);
+                affix.Strip = (ruleArgs[2] == "0") ? "" : ruleArgs[2];
+
+                var affixArg = ruleArgs[3];
+
+                var flagSep = affixArg.LastIndexOf('/');
+                if (flagSep != -1) {
+                    var cflag = affixArg.Substring(flagSep + 1);
+                    var appendFlags = hasAliases ? _aliases[cflag] : _flagParsingStrategy.ParseFlags(cflag);
+                    Array.Sort(appendFlags);
+                    affix.AppendFlags = appendFlags;
+                    affix.Append = affixArg.Substring(0, flagSep);
+                } else {
+                    affix.Append = affixArg;
+                }
+
+                var condition = ruleArgs[4];
+                affix.SetCondition(condition, String.Format(conditionPattern, condition));
+                affix.IsCrossProduct = crossProduct;
+
+                List<HunspellAffix> list;
+                if (!affixes.TryGetValue(affix.Append, out list))
+                    affixes.Add(affix.Append, list = new List<HunspellAffix>());
+
+                list.Add(affix);
+            }
+        }
+
+        /// <summary>
+        ///   Parses the encoding specificed in the affix file readable through the provided Stream.
+        /// </summary>
+        /// <param name="affix">Stream for reading the affix file.</param>
+        /// <returns>Encoding specified in the affix file.</returns>
+        /// <exception cref="InvalidDataException">
+        ///   Thrown if the first non-empty non-comment line read from the file does not
+        ///   adhere to the format <c>SET encoding</c>.
+        /// </exception>
+        private static String ReadDictionaryEncoding(Stream affix) {
+            if (affix == null) throw new ArgumentNullException("affix");
+
+            var builder = new StringBuilder();
+            for (; ; ) {
+                builder.Length = 0;
+                int ch;
+                while ((ch = affix.ReadByte()) >= 0) {
+                    if (ch == '\n') {
+                        break;
+                    }
+                    if (ch != '\r') {
+                        builder.Append((char)ch);
+                    }
+                }
+
+                if (builder.Length == 0 ||
+                    builder[0] == '#' ||
+                    // this test only at the end as ineffective but would allow lines only containing spaces:
+                    builder.ToString().Trim().Length == 0
+                    ) {
+                    if (ch < 0)
+                        throw new InvalidDataException("Unexpected end of affix file.");
+
+                    continue;
+                }
+
+                if ("SET ".Equals(builder.ToString(0, 4))) {
+                    // cleanup the encoding string, too (whitespace)
+                    return builder.ToString(4, builder.Length - 4).Trim();
+                }
+
+                throw new InvalidDataException("The first non-comment line in the affix file must " +
+                                               "be a 'SET charset', was: '" + builder + "'");
+            }
+        }
+
+        /// <summary>
+        ///   Determines the appropriate {@link FlagParsingStrategy} based on the FLAG definiton line taken from the affix file.
+        /// </summary>
+        /// <param name="flagLine">Line containing the flag information</param>
+        /// <returns>FlagParsingStrategy that handles parsing flags in the way specified in the FLAG definition.</returns>
+        private static FlagParsingStrategy GetFlagParsingStrategy(String flagLine) {
+            if (flagLine == null) throw new ArgumentNullException("flagLine");
+            var flagType = flagLine.Substring(5);
+
+            if (NUM_FLAG_TYPE.Equals(flagType))
+                return new NumFlagParsingStrategy();
+
+            if (UTF8_FLAG_TYPE.Equals(flagType))
+                return new SimpleFlagParsingStrategy();
+
+            if (LONG_FLAG_TYPE.Equals(flagType))
+                return new DoubleASCIIFlagParsingStrategy();
+
+            throw new ArgumentException("Unknown flag type: " + flagType);
+        }
+
+        /// <summary>
+        ///   Reads the dictionary file through the provided Stream, building up the words map.
+        /// </summary>
+        /// <param name="dictionary">Stream to read the dictionary file through.</param>
+        /// <param name="encoding">Encoding used to decode the contents of the file.</param>
+        /// <exception cref="IOException">Can be thrown while reading from the file.</exception>
+        private void ReadDictionaryFile(Stream dictionary, Encoding encoding) {
+            if (dictionary == null) throw new ArgumentNullException("dictionary");
+            if (encoding == null) throw new ArgumentNullException("encoding");
+            var reader = new StreamReader(dictionary, encoding);
+
+            // nocommit, don't create millions of strings.
+            var line = reader.ReadLine(); // first line is number of entries
+            var numEntries = Int32.Parse(line);
+            var hasAliases = _aliases.Count > 0;
+
+            // nocommit, the flags themselves can be double-chars (long) or also numeric
+            // either way the trick is to encode them as char... but they must be parsed differently
+            while ((line = reader.ReadLine()) != null) {
+                String entry;
+                HunspellWord wordForm;
+
+                var flagSep = line.LastIndexOf('/');
+                if (flagSep == -1) {
+                    wordForm = NoFlags;
+                    entry = line;
+                } else {
+                    // note, there can be comments (morph description) after a flag.
+                    // we should really look for any whitespace
+                    var end = line.IndexOf('\t', flagSep);
+                    var cflag = end == -1 ? line.Substring(flagSep + 1) : line.Substring(flagSep + 1, end - flagSep - 1);
+
+                    wordForm = new HunspellWord(hasAliases ? _aliases[cflag] : _flagParsingStrategy.ParseFlags(cflag));
+
+                    entry = line.Substring(0, flagSep);
+                }
+
+                List<HunspellWord> entries;
+                if (!_words.TryGetValue(entry, out entries))
+                    _words.Add(entry, entries = new List<HunspellWord>());
+
+                entries.Add(wordForm);
+            }
+        }
+
+        #region Nested type: DoubleASCIIFlagParsingStrategy
+
+        /// <summary>
+        ///   Implementation of {@link FlagParsingStrategy} that assumes each flag is encoded as
+        ///   two ASCII characters whose codes must be combined into a single character.
+        /// </summary>
+        private class DoubleASCIIFlagParsingStrategy : FlagParsingStrategy {
+            public override Char[] ParseFlags(String rawFlags) {
+                if (rawFlags.Length == 0)
+                    return new Char[0];
+
+                var builder = new StringBuilder();
+                for (var i = 0; i < rawFlags.Length; i += 2) {
+                    var cookedFlag = (Char)(rawFlags[i] + rawFlags[i + 1]);
+                    builder.Append(cookedFlag);
+                }
+
+                return builder.ToString().ToCharArray();
+            }
+        }
+
+        #endregion
+
+        #region Nested type: FlagParsingStrategy
+        /// <summary>
+        ///   Abstraction of the process of parsing flags taken from the affix and dic files
+        /// </summary>
+        private abstract class FlagParsingStrategy {
+            /// <summary>
+            ///   Parses the given String into a single flag.
+            /// </summary>
+            /// <param name="rawFlag">String to parse into a flag.</param>
+            /// <returns>Parsed flag.</returns>
+            public Char ParseFlag(String rawFlag) {
+                if (rawFlag == null)
+                    throw new ArgumentNullException("rawFlag");
+
+                return ParseFlags(rawFlag)[0];
+            }
+
+            /// <summary>
+            ///   Parses the given String into multiple flag.
+            /// </summary>
+            /// <param name="rawFlags">String to parse into a flags.</param>
+            /// <returns>Parsed flags.</returns>
+            public abstract Char[] ParseFlags(String rawFlags);
+        }
+
+        #endregion
+
+        #region Nested type: NumFlagParsingStrategy
+
+        /// <summary>
+        ///   Implementation of {@link FlagParsingStrategy} that assumes each flag is encoded in its
+        ///   numerical form.  In the case of multiple flags, each number is separated by a comma.
+        /// </summary>
+        private class NumFlagParsingStrategy : FlagParsingStrategy {
+            public override Char[] ParseFlags(String rawFlags) {
+                var rawFlagParts = rawFlags.Trim().Split(',');
+                var flags = new Char[rawFlagParts.Length];
+
+                for (var i = 0; i < rawFlagParts.Length; i++) {
+                    // note, removing the trailing X/leading I for nepali... what is the rule here?! 
+                    var replaced = Regex.Replace(rawFlagParts[i], "[^0-9]", "");
+                    flags[i] = (Char)Int32.Parse(replaced);
+                }
+
+                return flags;
+            }
+        }
+
+        #endregion
+
+        #region Nested type: SimpleFlagParsingStrategy
+
+        /// <summary>
+        ///   Simple implementation of {@link FlagParsingStrategy} that treats the chars in each
+        ///   String as a individual flags. Can be used with both the ASCII and UTF-8 flag types.
+        /// </summary>
+        private class SimpleFlagParsingStrategy : FlagParsingStrategy {
+            public override Char[] ParseFlags(String rawFlags) {
+                return rawFlags.ToCharArray();
+            }
+        }
+
+        #endregion
+    }
+}
\ No newline at end of file

Propchange: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellDictionary.cs
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStem.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStem.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStem.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStem.cs Wed Jun  6 19:45:59 2012
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis.Hunspell {
+    public class HunspellStem {
+        private readonly List<HunspellAffix> _prefixes = new List<HunspellAffix>();
+        private readonly List<HunspellAffix> _suffixes = new List<HunspellAffix>();
+        private readonly String _stem;
+
+        /// <summary>
+        ///   the actual word stem itself.
+        /// </summary>
+        public String Stem {
+            get { return _stem; }
+        }
+
+        /// <summary>
+        ///   The stem length.
+        /// </summary>
+        public Int32 StemLength {
+            get { return _stem.Length; }
+        }
+
+        /// <summary>
+        ///   The list of prefixes used to generate the stem.
+        /// </summary>
+        public IEnumerable<HunspellAffix> Prefixes {
+            get { return _prefixes; }
+        }
+
+        /// <summary>
+        ///   The list of suffixes used to generate the stem.
+        /// </summary>
+        public IEnumerable<HunspellAffix> Suffixes {
+            get { return _suffixes; }
+        }
+
+        /// <summary>
+        ///   Creates a new Stem wrapping the given word stem.
+        /// </summary>
+        public HunspellStem(String stem) {
+            if (stem == null) throw new ArgumentNullException("stem");
+
+            _stem = stem;
+        }
+
+        /// <summary>
+        ///   Adds a prefix to the list of prefixes used to generate this stem. Because it is 
+        ///   assumed that prefixes are added depth first, the prefix is added to the front of 
+        ///   the list.
+        /// </summary>
+        /// <param name="prefix">Prefix to add to the list of prefixes for this stem.</param>
+        public void AddPrefix(HunspellAffix prefix) {
+            _prefixes.Insert(0, prefix);
+        }
+
+        /// <summary>
+        ///   Adds a suffix to the list of suffixes used to generate this stem. Because it
+        ///   is assumed that suffixes are added depth first, the suffix is added to the end
+        ///   of the list.
+        /// </summary>
+        /// <param name="suffix">Suffix to add to the list of suffixes for this stem.</param>
+        public void AddSuffix(HunspellAffix suffix) {
+            _suffixes.Add(suffix);
+        }
+    }
+}
\ No newline at end of file

Propchange: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStem.cs
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemFilter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemFilter.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemFilter.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemFilter.cs Wed Jun  6 19:45:59 2012
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis.Hunspell {
+    /// <summary>
+    ///   TokenFilter that uses hunspell affix rules and words to stem tokens.  Since hunspell supports a
+    ///   word having multiple stems, this filter can emit multiple tokens for each consumed token.
+    /// </summary>
+    public class HunspellStemFilter : TokenFilter {
+        private readonly ITermAttribute _termAtt;
+        private readonly IPositionIncrementAttribute _posIncAtt;
+        private readonly HunspellStemmer _stemmer;
+
+        private readonly Queue<HunspellStem> _buffer = new Queue<HunspellStem>();
+        private State _savedState;
+
+        private readonly Boolean _dedup;
+
+        /// <summary>
+        ///   Creates a new HunspellStemFilter that will stem tokens from the given TokenStream using
+        ///   affix rules in the provided HunspellDictionary.
+        /// </summary>
+        /// <param name="input">TokenStream whose tokens will be stemmed.</param>
+        /// <param name="dictionary">HunspellDictionary containing the affix rules and words that will be used to stem the tokens.</param>
+        /// <param name="dedup">true if only unique terms should be output.</param>
+        public HunspellStemFilter(TokenStream input, HunspellDictionary dictionary, Boolean dedup = true)
+            : base(input) {
+            _posIncAtt = AddAttribute<IPositionIncrementAttribute>();
+            _termAtt = AddAttribute<ITermAttribute>();
+
+            _dedup = dedup;
+            _stemmer = new HunspellStemmer(dictionary);
+        }
+
+        public override Boolean IncrementToken() {
+            if (_buffer.Any()) {
+                var nextStem = _buffer.Dequeue();
+
+                RestoreState(_savedState);
+                _posIncAtt.PositionIncrement = 0;
+                _termAtt.SetTermBuffer(nextStem.Stem, 0, nextStem.StemLength);
+                return true;
+            }
+
+            if (!input.IncrementToken())
+                return false;
+
+            var newTerms = _dedup
+                               ? _stemmer.UniqueStems(_termAtt.Term())
+                               : _stemmer.Stem(_termAtt.Term());
+            foreach (var newTerm in newTerms)
+                _buffer.Enqueue(newTerm);
+
+            if (_buffer.Count == 0)
+                // we do not know this word, return it unchanged
+                return true;
+
+            var stem = _buffer.Dequeue();
+            _termAtt.SetTermBuffer(stem.Stem, 0, stem.StemLength);
+
+            if (_buffer.Count > 0)
+                _savedState = CaptureState();
+
+            return true;
+        }
+
+        public override void Reset() {
+            base.Reset();
+
+            _buffer.Clear();
+        }
+    }
+}

Propchange: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemFilter.cs
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemmer.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemmer.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemmer.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemmer.cs Wed Jun  6 19:45:59 2012
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Analysis.Hunspell {
+    /// <summary>
+    ///   HunspellStemmer uses the affix rules declared in the HunspellDictionary to generate one or
+    ///   more stems for a word.  It conforms to the algorithm in the original hunspell algorithm,
+    ///   including recursive suffix stripping.
+    /// </summary>
+    /// <author>Chris Male</author>
+    public class HunspellStemmer {
+        private static Int32 RECURSION_CAP = 2;
+        private readonly HunspellDictionary _dictionary;
+
+        /// <summary>
+        ///   Constructs a new HunspellStemmer which will use the provided HunspellDictionary
+        ///   to create its stems.
+        /// </summary>
+        /// <param name="dictionary">HunspellDictionary that will be used to create the stems.</param>
+        public HunspellStemmer(HunspellDictionary dictionary) {
+            if (dictionary == null) throw new ArgumentNullException("dictionary");
+            _dictionary = dictionary;
+        }
+
+        /// <summary>
+        ///   Find the stem(s) of the provided word.
+        /// </summary>
+        /// <param name="word">Word to find the stems for.</param>
+        /// <returns>List of stems for the word.</returns>
+        public IEnumerable<HunspellStem> Stem(String word) {
+            if (word == null) throw new ArgumentNullException("word");
+
+            var stems = new List<HunspellStem>();
+            if (_dictionary.LookupWord(word) != null)
+                stems.Add(new HunspellStem(word));
+
+            stems.AddRange(Stem(word, null, 0));
+            return stems;
+        }
+
+        /// <summary>
+        ///   Find the unique stem(s) of the provided word.
+        /// </summary>
+        /// <param name="word">Word to find the stems for.</param>
+        /// <returns>List of stems for the word.</returns>
+        public IEnumerable<HunspellStem> UniqueStems(String word) {
+            if (word == null) throw new ArgumentNullException("word");
+
+            var stems = new List<HunspellStem>();
+            var terms = new CharArraySet(8, false);
+            if (_dictionary.LookupWord(word) != null) {
+                stems.Add(new HunspellStem(word));
+                terms.Add(word);
+            }
+
+            var otherStems = Stem(word, null, 0);
+            foreach (var s in otherStems) {
+                if (!terms.Contains(s.Stem)) {
+                    stems.Add(s);
+                    terms.Add(s.Stem);
+                }
+            }
+
+            return stems;
+        }
+
+        /// <summary>
+        ///   Generates a list of stems for the provided word.
+        /// </summary>
+        /// <param name="word">Word to generate the stems for.</param>
+        /// <param name="flags">Flags from a previous stemming step that need to be cross-checked with any affixes in this recursive step.</param>
+        /// <param name="recursionDepth">Level of recursion this stemming step is at.</param>
+        /// <returns>List of stems, pr an empty if no stems are found.</returns>
+        private IEnumerable<HunspellStem> Stem(String word, Char[] flags, Int32 recursionDepth) {
+            if (word == null) throw new ArgumentNullException("word");
+
+            var stems = new List<HunspellStem>();
+            var chars = word.ToCharArray();
+            var length = word.Length;
+
+            for (var i = 0; i < length; i++) {
+                var suffixes = _dictionary.LookupSuffix(chars, i, length - i);
+                if (suffixes != null) {
+                    foreach (var suffix in suffixes) {
+                        if (HasCrossCheckedFlag(suffix.Flag, flags)) {
+                            var deAffixedLength = length - suffix.Append.Length;
+
+                            // TODO: can we do this in-place?
+                            var strippedWord = new StringBuilder()
+                                .Append(word, 0, deAffixedLength)
+                                .Append(suffix.Strip)
+                                .ToString();
+
+                            var stemList = ApplyAffix(strippedWord, suffix, recursionDepth);
+                            foreach (var stem in stemList) {
+                                stem.AddSuffix(suffix);
+                            }
+
+                            stems.AddRange(stemList);
+                        }
+                    }
+                }
+            }
+
+            for (var i = length - 1; i >= 0; i--) {
+                var prefixes = _dictionary.LookupPrefix(chars, 0, i);
+                if (prefixes != null) {
+                    foreach (var prefix in prefixes) {
+                        if (HasCrossCheckedFlag(prefix.Flag, flags)) {
+                            var deAffixedStart = prefix.Append.Length;
+                            var deAffixedLength = length - deAffixedStart;
+
+                            var strippedWord = new StringBuilder()
+                                .Append(prefix.Strip)
+                                .Append(word, deAffixedStart, deAffixedLength)
+                                .ToString();
+
+                            var stemList = ApplyAffix(strippedWord, prefix, recursionDepth);
+                            foreach (var stem in stemList) {
+                                stem.AddPrefix(prefix);
+                            }
+
+                            stems.AddRange(stemList);
+                        }
+                    }
+                }
+            }
+
+            return stems;
+        }
+
+        /// <summary>
+        ///   Applies the affix rule to the given word, producing a list of stems if any are found.
+        /// </summary>
+        /// <param name="strippedWord">Word the affix has been removed and the strip added.</param>
+        /// <param name="affix">HunspellAffix representing the affix rule itself.</param>
+        /// <param name="recursionDepth">Level of recursion this stemming step is at.</param>
+        /// <returns>List of stems for the word, or an empty list if none are found.</returns>
+        public IEnumerable<HunspellStem> ApplyAffix(String strippedWord, HunspellAffix affix, Int32 recursionDepth) {
+            if (strippedWord == null) throw new ArgumentNullException("strippedWord");
+            if (affix == null) throw new ArgumentNullException("affix");
+
+            if (!affix.CheckCondition(strippedWord)) {
+                return new List<HunspellStem>();
+            }
+
+            var words = _dictionary.LookupWord(strippedWord);
+            if (words == null) {
+                return new List<HunspellStem>();
+            }
+
+            var stems = new List<HunspellStem>();
+
+            foreach (var hunspellWord in words) {
+                if (hunspellWord.HasFlag(affix.Flag)) {
+                    if (affix.IsCrossProduct && recursionDepth < RECURSION_CAP) {
+                        var recursiveStems = Stem(strippedWord, affix.AppendFlags, ++recursionDepth);
+                        if (recursiveStems.Any()) {
+                            stems.AddRange(recursiveStems);
+                        } else {
+                            stems.Add(new HunspellStem(strippedWord));
+                        }
+                    } else {
+                        stems.Add(new HunspellStem(strippedWord));
+                    }
+                }
+            }
+
+            return stems;
+        }
+
+        /// <summary>
+        ///   Checks if the given flag cross checks with the given array of flags.
+        /// </summary>
+        /// <param name="flag">Flag to cross check with the array of flags.</param>
+        /// <param name="flags">Array of flags to cross check against.  Can be <c>null</c>.</param>
+        /// <returns><c>true</c> if the flag is found in the array or the array is <c>null</c>, <c>false</c> otherwise.</returns>
+        private static Boolean HasCrossCheckedFlag(Char flag, Char[] flags) {
+            return flags == null || Array.BinarySearch(flags, flag) >= 0;
+        }
+    }
+}
\ No newline at end of file

Propchange: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellStemmer.cs
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellWord.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellWord.cs?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellWord.cs (added)
+++ incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellWord.cs Wed Jun  6 19:45:59 2012
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Linq;
+
+namespace Lucene.Net.Analysis.Hunspell {
+    public class HunspellWord {
+        private readonly Char[] _flags;
+
+        /// <summary>
+        ///   Creates a new HunspellWord with no associated flags.
+        /// </summary>
+        public HunspellWord() : this(new Char[0]) {
+        }
+
+        /// <summary>
+        ///   Constructs a new HunspellWord with the given flags.
+        /// </summary>
+        /// <param name="flags">Flags to associate with the word.</param>
+        public HunspellWord(Char[] flags) {
+            if (flags == null) 
+                throw new ArgumentNullException("flags");
+
+            _flags = flags;
+        }
+
+        /// <summary>
+        ///   Checks whether the word has the given flag associated with it.
+        /// </summary>
+        /// <param name="flag">Flag to check whether it is associated with the word.</param>
+        /// <returns><c>true</c> if the flag is associated, <c>false</c> otherwise</returns>
+        public Boolean HasFlag(Char flag) {
+            return _flags.Contains(flag);
+        }
+    }
+}

Propchange: incubator/lucene.net/trunk/src/contrib/Analyzers/Hunspell/HunspellWord.cs
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: incubator/lucene.net/trunk/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj?rev=1347076&r1=1347075&r2=1347076&view=diff
==============================================================================
--- incubator/lucene.net/trunk/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj (original)
+++ incubator/lucene.net/trunk/test/contrib/Analyzers/Contrib.Analyzers.Test.csproj Wed Jun  6 19:45:59 2012
@@ -98,6 +98,10 @@
     <Compile Include="Fa\TestPersianAnalyzer.cs" />
     <Compile Include="Fr\TestElision.cs" />
     <Compile Include="Fr\TestFrenchAnalyzer.cs" />
+    <Compile Include="Hunspell\HunspellDictionaryLoader.cs" />
+    <Compile Include="Hunspell\TestHunspellDictionary.cs" />
+    <Compile Include="Hunspell\TestHunspellStemFilter.cs" />
+    <Compile Include="Hunspell\TestHunspellStemmer.cs" />
     <Compile Include="NGram\TestEdgeNGramTokenFilter.cs" />
     <Compile Include="NGram\TestEdgeNGramTokenizer.cs" />
     <Compile Include="Miscellaneous\PatternAnalyzerTest.cs" />
@@ -191,6 +195,12 @@
     </Content>
   </ItemGroup>
   <ItemGroup>
+    <EmbeddedResource Include="Hunspell\Dictionaries\en_US.aff" />
+    <EmbeddedResource Include="Hunspell\Dictionaries\en_US.dic" />
+    <EmbeddedResource Include="Hunspell\Dictionaries\fr-moderne.aff" />
+    <EmbeddedResource Include="Hunspell\Dictionaries\fr-moderne.dic" />
+    <EmbeddedResource Include="Hunspell\Dictionaries\nl_NL.aff" />
+    <EmbeddedResource Include="Hunspell\Dictionaries\nl_NL.dic" />
     <None Include="Lucene.Net.snk" />
   </ItemGroup>
   <ItemGroup />
@@ -202,4 +212,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
+</Project>
\ No newline at end of file

Added: incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/en_US.aff
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/en_US.aff?rev=1347076&view=auto
==============================================================================
--- incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/en_US.aff (added)
+++ incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/en_US.aff Wed Jun  6 19:45:59 2012
@@ -0,0 +1,207 @@
+# testcomment
+
+   
+# Alles so schön!
+
+SET ISO8859-1
+
+TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ'
+NOSUGGEST !
+
+# ordinal numbers
+COMPOUNDMIN 1
+# only in compounds: 1th, 2th, 3th
+ONLYINCOMPOUND c
+# compound rules:
+# 1. [0-9]*1[0-9]th (10th, 11th, 12th, 56714th, etc.)
+# 2. [0-9]*[02-9](1st|2nd|3rd|[4-9]th) (21st, 22nd, 123rd, 1234th, etc.)
+COMPOUNDRULE 2
+COMPOUNDRULE n*1t
+COMPOUNDRULE n*mp
+WORDCHARS 0123456789
+
+PFX A Y 1
+PFX A   0     re         .
+
+PFX I Y 1
+PFX I   0     in         .
+
+PFX U Y 1
+PFX U   0     un         .
+
+PFX C Y 1
+PFX C   0     de          .
+
+PFX E Y 1
+PFX E   0     dis         .
+
+PFX F Y 1
+PFX F   0     con         .
+
+PFX K Y 1
+PFX K   0     pro         .
+
+SFX V N 2
+SFX V   e     ive        e
+SFX V   0     ive        [^e]
+
+SFX N Y 3
+SFX N   e     ion        e
+SFX N   y     ication    y 
+SFX N   0     en         [^ey] 
+
+SFX X Y 3
+SFX X   e     ions       e
+SFX X   y     ications   y
+SFX X   0     ens        [^ey]
+
+SFX H N 2
+SFX H   y     ieth       y
+SFX H   0     th         [^y] 
+
+SFX Y Y 1
+SFX Y   0     ly         .
+
+SFX G Y 2
+SFX G   e     ing        e
+SFX G   0     ing        [^e] 
+
+SFX J Y 2
+SFX J   e     ings       e
+SFX J   0     ings       [^e]
+
+SFX D Y 4
+SFX D   0     d          e
+SFX D   y     ied        [^aeiou]y
+SFX D   0     ed         [^ey]
+SFX D   0     ed         [aeiou]y
+
+SFX T N 4
+SFX T   0     st         e
+SFX T   y     iest       [^aeiou]y
+SFX T   0     est        [aeiou]y
+SFX T   0     est        [^ey]
+
+SFX R Y 4
+SFX R   0     r          e
+SFX R   y     ier        [^aeiou]y
+SFX R   0     er         [aeiou]y
+SFX R   0     er         [^ey]
+
+SFX Z Y 4
+SFX Z   0     rs         e
+SFX Z   y     iers       [^aeiou]y
+SFX Z   0     ers        [aeiou]y
+SFX Z   0     ers        [^ey]
+
+SFX S Y 4
+SFX S   y     ies        [^aeiou]y
+SFX S   0     s          [aeiou]y
+SFX S   0     es         [sxzh]
+SFX S   0     s          [^sxzhy]
+
+SFX P Y 3
+SFX P   y     iness      [^aeiou]y
+SFX P   0     ness       [aeiou]y
+SFX P   0     ness       [^y]
+
+SFX M Y 1
+SFX M   0     's         .
+
+SFX B Y 3
+SFX B   0     able       [^aeiou]
+SFX B   0     able       ee
+SFX B   e     able       [^aeiou]e
+
+SFX L Y 1
+SFX L   0     ment       .
+
+REP 88
+REP a ei
+REP ei a
+REP a ey
+REP ey a
+REP ai ie
+REP ie ai
+REP are air
+REP are ear
+REP are eir
+REP air are
+REP air ere
+REP ere air
+REP ere ear
+REP ere eir
+REP ear are
+REP ear air
+REP ear ere
+REP eir are
+REP eir ere
+REP ch te
+REP te ch
+REP ch ti
+REP ti ch
+REP ch tu
+REP tu ch
+REP ch s
+REP s ch
+REP ch k
+REP k ch
+REP f ph
+REP ph f
+REP gh f
+REP f gh
+REP i igh
+REP igh i
+REP i uy
+REP uy i
+REP i ee
+REP ee i
+REP j di
+REP di j
+REP j gg
+REP gg j
+REP j ge
+REP ge j
+REP s ti
+REP ti s
+REP s ci
+REP ci s
+REP k cc
+REP cc k
+REP k qu
+REP qu k
+REP kw qu
+REP o eau
+REP eau o
+REP o ew
+REP ew o
+REP oo ew
+REP ew oo
+REP ew ui
+REP ui ew
+REP oo ui
+REP ui oo
+REP ew u
+REP u ew
+REP oo u
+REP u oo
+REP u oe
+REP oe u
+REP u ieu
+REP ieu u
+REP ue ew
+REP ew ue
+REP uff ough
+REP oo ieu
+REP ieu oo
+REP ier ear
+REP ear ier
+REP ear air
+REP air ear
+REP w qu
+REP qu w
+REP z ss
+REP ss z
+REP shun tion
+REP shun sion
+REP shun cion

Propchange: incubator/lucene.net/trunk/test/contrib/Analyzers/Hunspell/Dictionaries/en_US.aff
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message