lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nightowl...@apache.org
Subject [42/62] [abbrv] lucenenet git commit: Lucene.Net.Core.Analysis: Deleted obsolete Analysis files that have mostly been moved to Lucene.Net.Analysis.Common
Date Sat, 01 Apr 2017 01:09:35 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8a97bfcf/src/Lucene.Net.Core/Analysis/Standard/StandardTokenizerImpl.jflex
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/Standard/StandardTokenizerImpl.jflex b/src/Lucene.Net.Core/Analysis/Standard/StandardTokenizerImpl.jflex
deleted file mode 100644
index 9308713..0000000
--- a/src/Lucene.Net.Core/Analysis/Standard/StandardTokenizerImpl.jflex
+++ /dev/null
@@ -1,156 +0,0 @@
-package org.apache.lucene.analysis.standard;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-
-WARNING: if you change StandardTokenizerImpl.jflex and need to regenerate
-      the tokenizer, only use Java 1.4 !!!
-      This grammar currently uses constructs (eg :digit:, :letter:) whose 
-      meaning can vary according to the JRE used to run jflex.  See
-      https://issues.apache.org/jira/browse/LUCENE-1126 for details.
-      For current backwards compatibility it is needed to support
-      only Java 1.4 - this will change in Lucene 3.1.
-
-*/
-
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-
-%%
-
-%class StandardTokenizerImpl
-%unicode
-%integer
-%function getNextToken
-%pack
-%char
-
-%{
-
-public static final int ALPHANUM          = StandardTokenizer.ALPHANUM;
-public static final int APOSTROPHE        = StandardTokenizer.APOSTROPHE;
-public static final int ACRONYM           = StandardTokenizer.ACRONYM;
-public static final int COMPANY           = StandardTokenizer.COMPANY;
-public static final int EMAIL             = StandardTokenizer.EMAIL;
-public static final int HOST              = StandardTokenizer.HOST;
-public static final int NUM               = StandardTokenizer.NUM;
-public static final int CJ                = StandardTokenizer.CJ;
-/*
- * @deprecated this solves a bug where HOSTs that end with '.' are identified
- *             as ACRONYMs.
- */
-public static final int ACRONYM_DEP       = StandardTokenizer.ACRONYM_DEP;
-
-public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
-
-public final int yychar()
-{
-    return yychar;
-}
-
-/*
- * Resets the Tokenizer to a new Reader.
- */
-final void reset(java.io.Reader r) {
-  // reset to default buffer size, if buffer has grown
-  if (zzBuffer.length > ZZ_BUFFERSIZE) {
-    zzBuffer = new char[ZZ_BUFFERSIZE];
-  }
-  yyreset(r);
-}
-
-/*
- * Fills Lucene token with the current token text.
- */
-final void getText(Token t) {
-  t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
-}
-
-/*
- * Fills TermAttribute with the current token text.
- */
-final void getText(TermAttribute t) {
-  t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
-}
-
-%}
-
-THAI       = [\u0E00-\u0E59]
-
-// basic word: a sequence of digits & letters (includes Thai to enable ThaiAnalyzer to
function)
-ALPHANUM   = ({LETTER}|{THAI}|[:digit:])+
-
-// internal apostrophes: O'Reilly, you're, O'Reilly's
-// use a post-filter to remove possessives
-APOSTROPHE =  {ALPHA} ("'" {ALPHA})+
-
-// acronyms: U.S.A., I.B.M., etc.
-// use a post-filter to remove dots
-ACRONYM    =  {LETTER} "." ({LETTER} ".")+
-
-ACRONYM_DEP	= {ALPHANUM} "." ({ALPHANUM} ".")+
-
-// company names like AT&T and Excite@Home.
-COMPANY    =  {ALPHA} ("&"|"@") {ALPHA}
-
-// email addresses
-EMAIL      =  {ALPHANUM} (("."|"-"|"_") {ALPHANUM})* "@" {ALPHANUM} (("."|"-") {ALPHANUM})+
-
-// hostname
-HOST       =  {ALPHANUM} ((".") {ALPHANUM})+
-
-// floating point, serial, model numbers, ip addresses, etc.
-// every other segment must have at least one digit
-NUM        = ({ALPHANUM} {P} {HAS_DIGIT}
-           | {HAS_DIGIT} {P} {ALPHANUM}
-           | {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+
-           | {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
-           | {ALPHANUM} {P} {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
-           | {HAS_DIGIT} {P} {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+)
-
-// punctuation
-P	         = ("_"|"-"|"/"|"."|",")
-
-// at least one digit
-HAS_DIGIT  = ({LETTER}|[:digit:])* [:digit:] ({LETTER}|[:digit:])*
-
-ALPHA      = ({LETTER})+
-
-// From the JFlex manual: "the expression that matches everything of <a> not matched
by <b> is !(!<a>|<b>)"
-LETTER     = !(![:letter:]|{CJ})
-
-// Chinese and Japanese (but NOT Korean, which is included in [:letter:])
-CJ         = [\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f]
-
-WHITESPACE = \r\n | [ \r\n\t\f]
-
-%%
-
-{ALPHANUM}                                                     { return ALPHANUM; }
-{APOSTROPHE}                                                   { return APOSTROPHE; }
-{ACRONYM}                                                      { return ACRONYM; }
-{COMPANY}                                                      { return COMPANY; }
-{EMAIL}                                                        { return EMAIL; }
-{HOST}                                                         { return HOST; }
-{NUM}                                                          { return NUM; }
-{CJ}                                                           { return CJ; }
-{ACRONYM_DEP}                                                  { return ACRONYM_DEP; }
-
-/* Ignore the rest */
-. | {WHITESPACE}                                               { /* ignore */ }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8a97bfcf/src/Lucene.Net.Core/Analysis/StopAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/StopAnalyzer.cs b/src/Lucene.Net.Core/Analysis/StopAnalyzer.cs
deleted file mode 100644
index 96a673d..0000000
--- a/src/Lucene.Net.Core/Analysis/StopAnalyzer.cs
+++ /dev/null
@@ -1,141 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System.Collections.Generic;
-using Version = Lucene.Net.Util.Version;
-
-namespace Lucene.Net.Analysis
-{
-    
-    /// <summary> Filters <see cref="LetterTokenizer" /> with <see cref="LowerCaseFilter"
/> and
-    /// <see cref="StopFilter" />.
-    /// 
-    /// <a name="version"/>
-    /// <p/>
-    /// You must specify the required <see cref="Version" /> compatibility when creating
-    /// StopAnalyzer:
-    /// <list type="bullet">
-    /// <item>As of 2.9, position increments are preserved</item>
-    /// </list>
-    /// </summary>
-    
-    public sealed class StopAnalyzer:Analyzer
-    {
-        private readonly ISet<string> stopWords;
-        private readonly bool enablePositionIncrements;
-
-        /// <summary>An unmodifiable set containing some common English words that
are not usually useful
-        /// for searching.
-        /// </summary>
-        public static ISet<string> ENGLISH_STOP_WORDS_SET;
-        
-        /// <summary> Builds an analyzer which removes words in ENGLISH_STOP_WORDS.</summary>
-        public StopAnalyzer(Version matchVersion)
-        {
-            stopWords = ENGLISH_STOP_WORDS_SET;
-            enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
-        }
-
-        /// <summary>Builds an analyzer with the stop words from the given set.</summary>
-        public StopAnalyzer(Version matchVersion, ISet<string> stopWords)
-        {
-            this.stopWords = stopWords;
-            enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
-        }
-        
-        /// <summary> Builds an analyzer with the stop words from the given file.
-        /// 
-        /// </summary>
-        /// <seealso cref="WordlistLoader.GetWordSet(System.IO.FileInfo)">
-        /// </seealso>
-        /// <param name="matchVersion">See <a href="#version">above</a>
-        /// </param>
-        /// <param name="stopwordsFile">File to load stop words from
-        /// </param>
-        public StopAnalyzer(Version matchVersion, System.IO.FileInfo stopwordsFile)
-        {
-            stopWords = WordlistLoader.GetWordSet(stopwordsFile);
-            enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
-        }
-
-        /// <summary>Builds an analyzer with the stop words from the given reader.
</summary>
-        /// <seealso cref="WordlistLoader.GetWordSet(System.IO.TextReader)">
-        /// </seealso>
-        /// <param name="matchVersion">See <a href="#Version">above</a>
-        /// </param>
-        /// <param name="stopwords">Reader to load stop words from
-        /// </param>
-        public StopAnalyzer(Version matchVersion, System.IO.TextReader stopwords)
-        {
-            stopWords = WordlistLoader.GetWordSet(stopwords);
-            enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
-        }
-
-        /// <summary>Filters LowerCaseTokenizer with StopFilter. </summary>
-        public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader
reader)
-        {
-            return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader),
stopWords);
-        }
-        
-        /// <summary>Filters LowerCaseTokenizer with StopFilter. </summary>
-        private class SavedStreams
-        {
-            public SavedStreams(StopAnalyzer enclosingInstance)
-            {
-                InitBlock(enclosingInstance);
-            }
-            private void  InitBlock(StopAnalyzer enclosingInstance)
-            {
-                this.enclosingInstance = enclosingInstance;
-            }
-            private StopAnalyzer enclosingInstance;
-            public StopAnalyzer Enclosing_Instance
-            {
-                get
-                {
-                    return enclosingInstance;
-                }
-                
-            }
-            internal Tokenizer source;
-            internal TokenStream result;
-        }
-        
-        public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader
reader)
-        {
-            var streams = (SavedStreams) PreviousTokenStream;
-            if (streams == null)
-            {
-                streams = new SavedStreams(this) {source = new LowerCaseTokenizer(reader)};
-                streams.result = new StopFilter(enablePositionIncrements, streams.source,
stopWords);
-                PreviousTokenStream = streams;
-            }
-            else
-                streams.source.Reset(reader);
-            return streams.result;
-        }
-        static StopAnalyzer()
-        {
-            {
-                var stopWords = new System.String[]{"a", "an", "and", "are", "as", "at",
"be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such",
"that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"};
-                var stopSet = new CharArraySet(stopWords.Length, false);
-                stopSet.AddAll(stopWords);
-                ENGLISH_STOP_WORDS_SET = CharArraySet.UnmodifiableSet(stopSet);
-            }
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8a97bfcf/src/Lucene.Net.Core/Analysis/StopFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/StopFilter.cs b/src/Lucene.Net.Core/Analysis/StopFilter.cs
deleted file mode 100644
index 722faaf..0000000
--- a/src/Lucene.Net.Core/Analysis/StopFilter.cs
+++ /dev/null
@@ -1,178 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using System.Collections.Generic;
-using Lucene.Net.Analysis.Tokenattributes;
-using Lucene.Net.Util;
-using QueryParser = Lucene.Net.QueryParsers.QueryParser;
-using Version = Lucene.Net.Util.Version;
-
-namespace Lucene.Net.Analysis
-{
-    
-    /// <summary> Removes stop words from a token stream.</summary>
-    
-    public sealed class StopFilter:TokenFilter
-    {
-        private readonly CharArraySet stopWords;
-        private bool enablePositionIncrements = false;
-        
-        private readonly ITermAttribute termAtt;
-        private readonly IPositionIncrementAttribute posIncrAtt;
-        
-        /// <summary> Construct a token stream filtering the given input.
-        /// If <c>stopWords</c> is an instance of <see cref="CharArraySet"
/> (true if
-        /// <c>makeStopSet()</c> was used to construct the set) it will be directly
used
-        /// and <c>ignoreCase</c> will be ignored since <c>CharArraySet</c>
-        /// directly controls case sensitivity.
-        /// <p/>
-        /// If <c>stopWords</c> is not an instance of <see cref="CharArraySet"
/>,
-        /// a new CharArraySet will be constructed and <c>ignoreCase</c> will
be
-        /// used to specify the case sensitivity of that set.
-        /// </summary>
-        /// <param name="enablePositionIncrements">true if token positions should record
the removed stop words</param>
-        /// <param name="input">Input TokenStream</param>
-        /// <param name="stopWords">A Set of strings or strings or char[] or any other
ToString()-able set representing the stopwords</param>
-        /// <param name="ignoreCase">if true, all words are lower cased first</param>
-        public StopFilter(bool enablePositionIncrements, TokenStream input, ISet<string>
stopWords, bool ignoreCase)
-            : base(input)
-        {
-            if (stopWords is CharArraySet)
-            {
-                this.stopWords = (CharArraySet) stopWords;
-            }
-            else
-            {
-                this.stopWords = new CharArraySet(stopWords.Count, ignoreCase);
-                this.stopWords.AddAll(stopWords);
-            }
-            this.enablePositionIncrements = enablePositionIncrements;
-            termAtt = AddAttribute<ITermAttribute>();
-            posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
-        }
-
-        /// <summary> Constructs a filter which removes words from the input
-        /// TokenStream that are named in the Set.
-        /// </summary>
-        /// <param name="enablePositionIncrements">true if token positions should record
the removed stop words</param>
-        ///  <param name="in">Input stream</param>
-        /// <param name="stopWords">A Set of strings or char[] or any other ToString()-able
set representing the stopwords</param>
-        /// <seealso cref="MakeStopSet(String[])"/>
-        public StopFilter(bool enablePositionIncrements, TokenStream @in, ISet<string>
stopWords)
-            : this(enablePositionIncrements, @in, stopWords, false)
-        { }
-        
-        /// <summary> Builds a Set from an array of stop words,
-        /// appropriate for passing into the StopFilter constructor.
-        /// This permits this stopWords construction to be cached once when
-        /// an Analyzer is constructed.
-        /// 
-        /// </summary>
-        /// <seealso cref="MakeStopSet(String[], bool)">passing false to ignoreCase</seealso>
-        public static ISet<string> MakeStopSet(params string[] stopWords)
-        {
-            return MakeStopSet(stopWords, false);
-        }
-        
-        /// <summary> Builds a Set from an array of stop words,
-        /// appropriate for passing into the StopFilter constructor.
-        /// This permits this stopWords construction to be cached once when
-        /// an Analyzer is constructed.
-        /// </summary>
-        /// <param name="stopWords">A list of strings or char[] or any other ToString()-able
list representing the stop words</param>
-        /// <seealso cref="MakeStopSet(String[], bool)">passing false to ignoreCase</seealso>
-        public static ISet<string> MakeStopSet(IList<object> stopWords)
-        {
-            return MakeStopSet(stopWords, false);
-        }
-        
-        /// <summary></summary>
-        /// <param name="stopWords">An array of stopwords</param>
-        /// <param name="ignoreCase">If true, all words are lower cased first.</param>
-        /// <returns> a Set containing the words</returns>
-        public static ISet<string> MakeStopSet(string[] stopWords, bool ignoreCase)
-        {
-            var stopSet = new CharArraySet(stopWords.Length, ignoreCase);
-            stopSet.AddAll(stopWords);
-            return stopSet;
-        }
-        
-        /// <summary> </summary>
-        /// <param name="stopWords">A List of Strings or char[] or any other toString()-able
list representing the stopwords </param>
-        /// <param name="ignoreCase">if true, all words are lower cased first</param>
-        /// <returns>A Set (<see cref="CharArraySet"/>)containing the words</returns>
-        public static ISet<string> MakeStopSet(IList<object> stopWords, bool
ignoreCase)
-        {
-            var stopSet = new CharArraySet(stopWords.Count, ignoreCase);
-            foreach(var word in stopWords)
-                stopSet.Add(word.ToString());
-            return stopSet;
-        }
-        
-        /// <summary> Returns the next input Token whose term() is not a stop word.</summary>
-        public override bool IncrementToken()
-        {
-            // return the first non-stop word found
-            int skippedPositions = 0;
-            while (input.IncrementToken())
-            {
-                if (!stopWords.Contains(termAtt.TermBuffer(), 0, termAtt.TermLength()))
-                {
-                    if (enablePositionIncrements)
-                    {
-                        posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
-                    }
-                    return true;
-                }
-                skippedPositions += posIncrAtt.PositionIncrement;
-            }
-            // reached EOS -- return false
-            return false;
-        }
-        
-        /// <summary> Returns version-dependent default for enablePositionIncrements.
Analyzers
-        /// that embed StopFilter use this method when creating the StopFilter. Prior
-        /// to 2.9, this returns false. On 2.9 or later, it returns true.
-        /// </summary>
-        public static bool GetEnablePositionIncrementsVersionDefault(Version matchVersion)
-        {
-            return matchVersion.OnOrAfter(Version.LUCENE_29);
-        }
-
-        /// <summary> If <c>true</c>, this StopFilter will preserve
-        /// positions of the incoming tokens (ie, accumulate and
-        /// set position increments of the removed stop tokens).
-        /// Generally, <c>true</c> is best as it does not
-        /// lose information (positions of the original tokens)
-        /// during indexing.
-        /// 
-        /// <p/> When set, when a token is stopped
-        /// (omitted), the position increment of the following
-        /// token is incremented.
-        /// 
-        /// <p/> <b>NOTE</b>: be sure to also
-        /// set <see cref="QueryParser.EnablePositionIncrements" /> if
-        /// you use QueryParser to create queries.
-        /// </summary>
-        public bool EnablePositionIncrements
-        {
-            get { return enablePositionIncrements; }
-            set { enablePositionIncrements = value; }
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8a97bfcf/src/Lucene.Net.Core/Analysis/TeeSinkTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/TeeSinkTokenFilter.cs b/src/Lucene.Net.Core/Analysis/TeeSinkTokenFilter.cs
deleted file mode 100644
index 6eb217f..0000000
--- a/src/Lucene.Net.Core/Analysis/TeeSinkTokenFilter.cs
+++ /dev/null
@@ -1,266 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using System.Collections.Generic;
-using Attribute = Lucene.Net.Util.Attribute;
-using AttributeSource = Lucene.Net.Util.AttributeSource;
-
-namespace Lucene.Net.Analysis
-{
-    
-    /// <summary> This TokenFilter provides the ability to set aside attribute states
-    /// that have already been analyzed.  This is useful in situations where multiple fields
share
-    /// many common analysis steps and then go their separate ways.
-    /// <p/>
-    /// It is also useful for doing things like entity extraction or proper noun analysis
as
-    /// part of the analysis workflow and saving off those tokens for use in another field.
-    /// 
-    /// <code>
-    /// TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader1));
-    /// TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
-    /// TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
-    /// TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader2));
-    /// source2.addSinkTokenStream(sink1);
-    /// source2.addSinkTokenStream(sink2);
-    /// TokenStream final1 = new LowerCaseFilter(source1);
-    /// TokenStream final2 = source2;
-    /// TokenStream final3 = new EntityDetect(sink1);
-    /// TokenStream final4 = new URLDetect(sink2);
-    /// d.add(new Field("f1", final1));
-    /// d.add(new Field("f2", final2));
-    /// d.add(new Field("f3", final3));
-    /// d.add(new Field("f4", final4));
-    /// </code>
-    /// In this example, <c>sink1</c> and <c>sink2</c> will both
get tokens from both
-    /// <c>reader1</c> and <c>reader2</c> after whitespace tokenizer
-    /// and now we can further wrap any of these in extra analysis, and more "sources" can
be inserted if desired.
-    /// It is important, that tees are consumed before sinks (in the above example, the field
names must be
-    /// less the sink's field names). If you are not sure, which stream is consumed first,
you can simply
-    /// add another sink and then pass all tokens to the sinks at once using <see cref="ConsumeAllTokens"
/>.
-    /// This TokenFilter is exhausted after this. In the above example, change
-    /// the example above to:
-    /// <code>
-    /// ...
-    /// TokenStream final1 = new LowerCaseFilter(source1.newSinkTokenStream());
-    /// TokenStream final2 = source2.newSinkTokenStream();
-    /// sink1.consumeAllTokens();
-    /// sink2.consumeAllTokens();
-    /// ...
-    /// </code>
-    /// In this case, the fields can be added in any order, because the sources are not used
anymore and all sinks are ready.
-    /// <p/>Note, the EntityDetect and URLDetect TokenStreams are for the example and
do not currently exist in Lucene.
-    /// </summary>
-    public sealed class TeeSinkTokenFilter:TokenFilter
-    {
-        public class AnonymousClassSinkFilter:SinkFilter
-        {
-            public override bool Accept(AttributeSource source)
-            {
-                return true;
-            }
-        }
-        private readonly LinkedList<WeakReference> sinks = new LinkedList<WeakReference>();
-        
-        /// <summary> Instantiates a new TeeSinkTokenFilter.</summary>
-        public TeeSinkTokenFilter(TokenStream input):base(input)
-        {
-        }
-        
-        /// <summary> Returns a new <see cref="SinkTokenStream" /> that receives
all tokens consumed by this stream.</summary>
-        public SinkTokenStream NewSinkTokenStream()
-        {
-            return NewSinkTokenStream(ACCEPT_ALL_FILTER);
-        }
-        
-        /// <summary> Returns a new <see cref="SinkTokenStream" /> that receives
all tokens consumed by this stream
-        /// that pass the supplied filter.
-        /// </summary>
-        /// <seealso cref="SinkFilter">
-        /// </seealso>
-        public SinkTokenStream NewSinkTokenStream(SinkFilter filter)
-        {
-            var sink = new SinkTokenStream(this.CloneAttributes(), filter);
-            sinks.AddLast(new WeakReference(sink));
-            return sink;
-        }
-        
-        /// <summary> Adds a <see cref="SinkTokenStream" /> created by another
<c>TeeSinkTokenFilter</c>
-        /// to this one. The supplied stream will also receive all consumed tokens.
-        /// This method can be used to pass tokens from two different tees to one sink.
-        /// </summary>
-        public void  AddSinkTokenStream(SinkTokenStream sink)
-        {
-            // check that sink has correct factory
-            if (!this.Factory.Equals(sink.Factory))
-            {
-                throw new System.ArgumentException("The supplied sink is not compatible to
this tee");
-            }
-            // add eventually missing attribute impls to the existing sink
-            foreach (var impl in this.CloneAttributes().GetAttributeImplsIterator())
-            {
-                sink.AddAttributeImpl(impl);
-            }
-            sinks.AddLast(new WeakReference(sink));
-        }
-        
-        /// <summary> <c>TeeSinkTokenFilter</c> passes all tokens to the
added sinks
-        /// when itself is consumed. To be sure, that all tokens from the input
-        /// stream are passed to the sinks, you can call this methods.
-        /// This instance is exhausted after this, but all sinks are instant available.
-        /// </summary>
-        public void  ConsumeAllTokens()
-        {
-            while (IncrementToken())
-            {
-            }
-        }
-        
-        public override bool IncrementToken()
-        {
-            if (input.IncrementToken())
-            {
-                // capture state lazily - maybe no SinkFilter accepts this state
-                State state = null;
-                foreach(WeakReference wr in sinks)
-                {
-                    var sink = (SinkTokenStream)wr.Target;
-                    if (sink != null)
-                    {
-                        if (sink.Accept(this))
-                        {
-                            if (state == null)
-                            {
-                                state = this.CaptureState();
-                            }
-                            sink.AddState(state);
-                        }
-                    }
-                }
-                return true;
-            }
-            
-            return false;
-        }
-        
-        public override void  End()
-        {
-            base.End();
-            State finalState = CaptureState();
-            foreach(WeakReference wr in sinks)
-            {
-                var sink = (SinkTokenStream)wr.Target;
-                if (sink != null)
-                {
-                    sink.SetFinalState(finalState);
-                }
-            }
-        }
-        
-        /// <summary> A filter that decides which <see cref="AttributeSource" />
states to store in the sink.</summary>
-        public abstract class SinkFilter
-        {
-            /// <summary> Returns true, iff the current state of the passed-in <see
cref="AttributeSource" /> shall be stored
-            /// in the sink. 
-            /// </summary>
-            public abstract bool Accept(AttributeSource source);
-            
-            /// <summary> Called by <see cref="SinkTokenStream.Reset()" />. This
method does nothing by default
-            /// and can optionally be overridden.
-            /// </summary>
-            public virtual void Reset()
-            {
-                // nothing to do; can be overridden
-            }
-        }
-        
-        public sealed class SinkTokenStream : TokenStream
-        {
-            private readonly LinkedList<State> cachedStates = new LinkedList<State>();
-            private State finalState;
-            private IEnumerator<AttributeSource.State> it = null;
-            private readonly SinkFilter filter;
-
-            internal SinkTokenStream(AttributeSource source, SinkFilter filter)
-                : base(source)
-            {
-                this.filter = filter;
-            }
-            
-            internal /*private*/ bool Accept(AttributeSource source)
-            {
-                return filter.Accept(source);
-            }
-            
-            internal /*private*/ void  AddState(AttributeSource.State state)
-            {
-                if (it != null)
-                {
-                    throw new System.SystemException("The tee must be consumed before sinks
are consumed.");
-                }
-                cachedStates.AddLast(state);
-            }
-            
-            internal /*private*/ void  SetFinalState(AttributeSource.State finalState)
-            {
-                this.finalState = finalState;
-            }
-            
-            public override bool IncrementToken()
-            {
-                // lazy init the iterator
-                if (it == null)
-                {
-                    it = cachedStates.GetEnumerator();
-                }
-                
-                if (!it.MoveNext())
-                {
-                    return false;
-                }
-                
-                State state = it.Current;
-                RestoreState(state);
-                return true;
-            }
-            
-            public override void  End()
-            {
-                if (finalState != null)
-                {
-                    RestoreState(finalState);
-                }
-            }
-            
-            public override void  Reset()
-            {
-                it = cachedStates.GetEnumerator();
-            }
-
-            protected override void Dispose(bool disposing)
-            {
-                // Do nothing.
-            }
-        }
-        
-        private static readonly SinkFilter ACCEPT_ALL_FILTER;
-        static TeeSinkTokenFilter()
-        {
-            ACCEPT_ALL_FILTER = new AnonymousClassSinkFilter();
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8a97bfcf/src/Lucene.Net.Core/Analysis/WhitespaceAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/WhitespaceAnalyzer.cs b/src/Lucene.Net.Core/Analysis/WhitespaceAnalyzer.cs
deleted file mode 100644
index ae94c44..0000000
--- a/src/Lucene.Net.Core/Analysis/WhitespaceAnalyzer.cs
+++ /dev/null
@@ -1,43 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-namespace Lucene.Net.Analysis
-{
-    
-    /// <summary>An Analyzer that uses <see cref="WhitespaceTokenizer" />. </summary>
-    
-    public sealed class WhitespaceAnalyzer:Analyzer
-    {
-        public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader
reader)
-        {
-            return new WhitespaceTokenizer(reader);
-        }
-        
-        public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader
reader)
-        {
-            var tokenizer = (Tokenizer) PreviousTokenStream;
-            if (tokenizer == null)
-            {
-                tokenizer = new WhitespaceTokenizer(reader);
-                PreviousTokenStream = tokenizer;
-            }
-            else
-                tokenizer.Reset(reader);
-            return tokenizer;
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8a97bfcf/src/Lucene.Net.Core/Analysis/WhitespaceTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/WhitespaceTokenizer.cs b/src/Lucene.Net.Core/Analysis/WhitespaceTokenizer.cs
deleted file mode 100644
index ba19da9..0000000
--- a/src/Lucene.Net.Core/Analysis/WhitespaceTokenizer.cs
+++ /dev/null
@@ -1,55 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using AttributeSource = Lucene.Net.Util.AttributeSource;
-
-namespace Lucene.Net.Analysis
-{
-    
-    /// <summary>A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
-    /// Adjacent sequences of non-Whitespace characters form tokens. 
-    /// </summary>
-    
-    public class WhitespaceTokenizer:CharTokenizer
-    {
-        /// <summary>Construct a new WhitespaceTokenizer. </summary>
-        public WhitespaceTokenizer(System.IO.TextReader @in)
-            : base(@in)
-        {
-        }
-        
-        /// <summary>Construct a new WhitespaceTokenizer using a given <see cref="AttributeSource"
/>. </summary>
-        public WhitespaceTokenizer(AttributeSource source, System.IO.TextReader @in)
-            : base(source, @in)
-        {
-        }
-        
-        /// <summary>Construct a new WhitespaceTokenizer using a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory"
/>. </summary>
-        public WhitespaceTokenizer(AttributeFactory factory, System.IO.TextReader @in)
-            : base(factory, @in)
-        {
-        }
-        
-        /// <summary>Collects only characters which do not satisfy
-        /// <see cref="char.IsWhiteSpace(char)" />.
-        /// </summary>
-        protected internal override bool IsTokenChar(char c)
-        {
-            return !System.Char.IsWhiteSpace(c);
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8a97bfcf/src/Lucene.Net.Core/Analysis/WordlistLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/WordlistLoader.cs b/src/Lucene.Net.Core/Analysis/WordlistLoader.cs
deleted file mode 100644
index d3abfe6..0000000
--- a/src/Lucene.Net.Core/Analysis/WordlistLoader.cs
+++ /dev/null
@@ -1,146 +0,0 @@
-/* 
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System.Collections.Generic;
-
-namespace Lucene.Net.Analysis
-{
-    
-    /// <summary> Loader for text files that represent a list of stopwords.</summary>
-    public class WordlistLoader
-    {
-        
-        /// <summary> Loads a text file and adds every line as an entry to a HashSet
(omitting
-        /// leading and trailing whitespace). Every line of the file should contain only
-        /// one word. The words need to be in lowercase if you make use of an
-        /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-        /// </summary>
-        /// <param name="wordfile">File containing the wordlist</param>
-        /// <returns> A HashSet with the file's words</returns>
-        public static ISet<string> GetWordSet(System.IO.FileInfo wordfile)
-        {
-            using (var reader = new System.IO.StreamReader(wordfile.FullName, System.Text.Encoding.Default))
-            {
-                return GetWordSet(reader);
-            }
-        }
-        
-        /// <summary> Loads a text file and adds every non-comment line as an entry
to a HashSet (omitting
-        /// leading and trailing whitespace). Every line of the file should contain only
-        /// one word. The words need to be in lowercase if you make use of an
-        /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-        /// </summary>
-        /// <param name="wordfile">File containing the wordlist</param>
-        /// <param name="comment">The comment string to ignore</param>
-        /// <returns> A HashSet with the file's words</returns>
-        public static ISet<string> GetWordSet(System.IO.FileInfo wordfile, System.String
comment)
-        {
-            using (var reader = new System.IO.StreamReader(wordfile.FullName, System.Text.Encoding.Default))
-            {
-                return GetWordSet(reader, comment);
-            }
-        }
-        
-        
-        /// <summary> Reads lines from a Reader and adds every line as an entry to
a HashSet (omitting
-        /// leading and trailing whitespace). Every line of the Reader should contain only
-        /// one word. The words need to be in lowercase if you make use of an
-        /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-        /// </summary>
-        /// <param name="reader">Reader containing the wordlist</param>
-        /// <returns>A HashSet with the reader's words</returns>
-        public static ISet<string> GetWordSet(System.IO.TextReader reader)
-        {
-            var result = Support.Compatibility.SetFactory.CreateHashSet<string>();
-
-            System.String word;
-            while ((word = reader.ReadLine()) != null)
-            {
-                result.Add(word.Trim());
-            }
-
-            return result;
-        }
-
-        /// <summary> Reads lines from a Reader and adds every non-comment line as
an entry to a HashSet (omitting
-        /// leading and trailing whitespace). Every line of the Reader should contain only
-        /// one word. The words need to be in lowercase if you make use of an
-        /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-        /// 
-        /// </summary>
-        /// <param name="reader">Reader containing the wordlist
-        /// </param>
-        /// <param name="comment">The string representing a comment.
-        /// </param>
-        /// <returns> A HashSet with the reader's words
-        /// </returns>
-        public static ISet<string> GetWordSet(System.IO.TextReader reader, System.String
comment)
-        {
-            var result = Support.Compatibility.SetFactory.CreateHashSet<string>();
-
-            System.String word = null;
-            while ((word = reader.ReadLine()) != null)
-            {
-                if (word.StartsWith(comment) == false)
-                {
-                    result.Add(word.Trim());
-                }
-            }
-
-            return result;
-        }
-
-
-
-        /// <summary> Reads a stem dictionary. Each line contains:
-        /// <c>word<b>\t</b>stem</c>
-        /// (i.e. two tab seperated words)
-        /// 
-        /// </summary>
-        /// <returns> stem dictionary that overrules the stemming algorithm
-        /// </returns>
-        /// <throws>  IOException  </throws>
-        public static Dictionary<string, string> GetStemDict(System.IO.FileInfo wordstemfile)
-        {
-            if (wordstemfile == null)
-                throw new System.NullReferenceException("wordstemfile may not be null");
-            var result = new Dictionary<string, string>();
-            System.IO.StreamReader br = null;
-            System.IO.StreamReader fr = null;
-            try
-            {
-                fr = new System.IO.StreamReader(wordstemfile.FullName, System.Text.Encoding.Default);
-                br = new System.IO.StreamReader(fr.BaseStream, fr.CurrentEncoding);
-                System.String line;
-                char[] tab = {'\t'};
-                while ((line = br.ReadLine()) != null)
-                {
-                    System.String[] wordstem = line.Split(tab, 2);
-                    result[wordstem[0]] = wordstem[1];
-                }
-            }
-            finally
-            {
-                if (fr != null)
-                    fr.Close();
-                if (br != null)
-                    br.Close();
-            }
-            return result;
-        }
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/8a97bfcf/src/Lucene.Net.Core/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/project.json b/src/Lucene.Net.Core/project.json
index ddbdf76..6c8e54a 100644
--- a/src/Lucene.Net.Core/project.json
+++ b/src/Lucene.Net.Core/project.json
@@ -14,12 +14,6 @@
         "define": [ "FEATURE_TASKMERGESCHEDULER", "NETSTANDARD" ],
         "compile": {
           "exclude": [
-            "Analysis/Standard/*",
-            "Analysis/Keyword*.cs",
-            "Analysis/LowerCase*.cs",
-            "Analysis/Porter*.cs",
-            "Analysis/Stop*.cs",
-            "Analysis/Whitespace*.cs",
             "Document/FieldSelector*.cs",
             "Index/CompoundFile*.cs",
             "Index/DefaultSkipList*.cs",
@@ -39,22 +33,6 @@
           ],
           "excludeFiles": [
             "RectangularArrays.cs",
-            "Analysis/ASCIIFoldingFilter.cs",
-            "Analysis/BaseCharFilter.cs",
-            "Analysis/CharArraySet.cs",
-            "Analysis/CharReader.cs",
-            "Analysis/CharStream.cs",
-            "Analysis/CharTokenizer.cs",
-            "Analysis/ISOLatin1AccentFilter.cs",
-            "Analysis/LengthFilter.cs",
-            "Analysis/LetterTokenizer.cs",
-            "Analysis/MappingCharFilter.cs",
-            "Analysis/NormalizeCharMap.cs",
-            "Analysis/PerFieldAnalyzerWrapper.cs",
-            "Analysis/SimpleAnalyzer.cs",
-            "Analysis/TeeSinkTokenFilter.cs",
-            "Analysis/Tokenattributes/TermAttribute.cs",
-            "Analysis/WordlistLoader.cs",
             "Document/AbstractField.cs",
             "Document/DateField.cs",
             "Document/Fieldable.cs",
@@ -178,12 +156,6 @@
         ],
         "compile": {
           "exclude": [
-            "Analysis/Standard/*",
-            "Analysis/Keyword*.cs",
-            "Analysis/LowerCase*.cs",
-            "Analysis/Porter*.cs",
-            "Analysis/Stop*.cs",
-            "Analysis/Whitespace*.cs",
             "Document/FieldSelector*.cs",
             "Index/CompoundFile*.cs",
             "Index/DefaultSkipList*.cs",
@@ -203,22 +175,6 @@
           ],
           "excludeFiles": [
             "RectangularArrays.cs",
-            "Analysis/ASCIIFoldingFilter.cs",
-            "Analysis/BaseCharFilter.cs",
-            "Analysis/CharArraySet.cs",
-            "Analysis/CharReader.cs",
-            "Analysis/CharStream.cs",
-            "Analysis/CharTokenizer.cs",
-            "Analysis/ISOLatin1AccentFilter.cs",
-            "Analysis/LengthFilter.cs",
-            "Analysis/LetterTokenizer.cs",
-            "Analysis/MappingCharFilter.cs",
-            "Analysis/NormalizeCharMap.cs",
-            "Analysis/PerFieldAnalyzerWrapper.cs",
-            "Analysis/SimpleAnalyzer.cs",
-            "Analysis/TeeSinkTokenFilter.cs",
-            "Analysis/Tokenattributes/TermAttribute.cs",
-            "Analysis/WordlistLoader.cs",
             "Document/AbstractField.cs",
             "Document/DateField.cs",
             "Document/Fieldable.cs",


Mime
View raw message