lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From paulir...@apache.org
Subject [11/53] [abbrv] Port Highlighter namespace
Date Thu, 07 Nov 2013 13:53:26 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/45ba8d83/src/contrib/Highlighter/VectorHighlight/StringUtils.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/StringUtils.cs b/src/contrib/Highlighter/VectorHighlight/StringUtils.cs
new file mode 100644
index 0000000..7b27259
--- /dev/null
+++ b/src/contrib/Highlighter/VectorHighlight/StringUtils.cs
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Search.Vectorhighlight
+{
+    public static class StringUtils 
+    {
+        /// <summary>
+        /// Check if the termToMatch is a match for the term, considering the use of a wildcards.
+        /// </summary>
+        public static Boolean TermStringMatch(String term, String termToMatch) 
+        {
+            if (term[0] == '*' || term[0] == '?')
+                throw new NotSupportedException("Unable to do matching with wildcard at the
beginning");
+            
+            if (term[term.Length - 1] == '*') 
+            { 
+                //Wildcard at the end
+                if (termToMatch.Length < term.Length - 1) return false;
+                for (int i = 0; i < term.Length - 1; i++)
+                {
+                    if (termToMatch[i] != term[i]) return false;
+                }
+                return true;
+            }
+            return term.Equals(termToMatch);
+        }
+
+        public static Boolean AnyTermMatch(IList<String> terms, String term)
+        {
+            for (int i = 0; i < terms.Count; i++)
+            {
+                if (StringUtils.TermStringMatch(terms[i], term))
+                    return true;
+            }
+            return false;
+        }
+     }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/45ba8d83/src/contrib/Highlighter/VectorHighlight/Support.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/Support.cs b/src/contrib/Highlighter/VectorHighlight/Support.cs
new file mode 100644
index 0000000..0dfbf43
--- /dev/null
+++ b/src/contrib/Highlighter/VectorHighlight/Support.cs
@@ -0,0 +1,73 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Lucene.Net.Search.Vectorhighlight
+{
+    public class HashMap<K, V> : Dictionary<K, V>
+    {
+        V _NullKeyValue = default(V);
+
+        public new void Add(K key,V value)
+        {
+            if (key == null)
+                _NullKeyValue = value;
+            else
+                base.Add(key,value);
+        }
+
+        public new int Count
+        {
+            get
+            {
+                return base.Count + (_NullKeyValue!= null ? 1 : 0);
+            }
+        }
+
+        public new V this[K key]
+        {
+            get{
+                return Get(key);
+            }
+            set{
+                Add(key,value);
+            }
+        }
+
+        public V Get(K key)
+        {
+            if (key == null) return _NullKeyValue;
+
+            V v = default(V);
+            base.TryGetValue(key, out v);
+            return v;
+        }
+
+        public void Put(K key, V val) 
+        {
+            Add(key,val);
+        }
+    }
+}
+

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/45ba8d83/src/contrib/Highlighter/VectorHighlight/VectorHighlightMapper.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/VectorHighlightMapper.cs b/src/contrib/Highlighter/VectorHighlight/VectorHighlightMapper.cs
new file mode 100644
index 0000000..da2966c
--- /dev/null
+++ b/src/contrib/Highlighter/VectorHighlight/VectorHighlightMapper.cs
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if LUCENENET_350 //Lucene.Net specific code. See https://issues.apache.org/jira/browse/LUCENENET-350
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Search.Vectorhighlight
+{
+    public class VectorHighlightMapper : TermVectorMapper, ITermFreqVector, TermPositionVector
+    {
+        private readonly List<string> _terms;
+        private Dictionary<string, TermVectorOffsetInfo[]> _tvoi;
+        private Dictionary<string, int[]> _positions;
+        private Dictionary<string, int> _frequency;
+        private List<string> _indexMap;
+        private string _field;
+        private bool _storeOffsets;
+        private bool _storePositions;
+
+        public VectorHighlightMapper(List<string> terms)
+        {
+            _terms = terms;
+            _tvoi = new Dictionary<string, TermVectorOffsetInfo[]>();
+            _positions = new Dictionary<string, int[]>();
+            _frequency = new Dictionary<string, int>();
+            _indexMap = new List<string>();
+        }
+
+        public override void SetExpectations(string field, int numTerms, bool storeOffsets,
bool storePositions)
+        {
+            _field = field;
+            _storeOffsets = storeOffsets;
+            _storePositions = storePositions;
+            if (_storeOffsets)
+                _tvoi = new Dictionary<string, TermVectorOffsetInfo[]>(numTerms);
+            if (_storePositions)
+                _positions = new Dictionary<string, int[]>(numTerms);
+            _frequency = new Dictionary<string, int>(numTerms);
+            _indexMap = new List<string>(numTerms);
+        }
+
+        public override void Map(string term, int frequency, TermVectorOffsetInfo[] offsets,
int[] positions)
+        {
+            if (StringUtils.AnyTermMatch(_terms, term))
+            {
+                _indexMap.Add(term);
+                if (_storeOffsets)
+                    _tvoi.Add(term, offsets);
+                if (_storePositions)
+                    _positions.Add(term, positions);
+                _frequency.Add(term,frequency);
+            }
+        }
+
+        public string Field
+        {
+            get { return _field; }
+        }
+
+        public int Size
+        {
+            get { return _tvoi.Count; }
+        }
+
+        public string[] GetTerms()
+        {
+            string[] result = new string[_tvoi.Count];
+            _tvoi.Keys.CopyTo(result,0);
+            return result;
+        }
+
+        public int[] GetTermFrequencies()
+        {
+            int[] result = new int[_frequency.Count];
+            _frequency.Values.CopyTo(result,0);
+            return result;
+        }
+
+        public int IndexOf(string term)
+        {
+            return _indexMap.IndexOf(term);
+        }
+
+        public int[] IndexesOf(string[] terms, int start, int len)
+        {
+            int[] result = new int[terms.Length];
+            for (int i = 0; i < terms.Length; i++)
+            {
+                string term = terms[i];
+                result[i] = _indexMap.IndexOf(term, start, len);
+            }
+            return result;
+        }
+
+        public int[] GetTermPositions(int index)
+        {
+            if (index<_positions.Count)
+            {
+                string key = _indexMap[index];
+                return _positions[key];
+            }
+            return new int[0];
+        }
+
+        public TermVectorOffsetInfo[] GetOffsets(int index)
+        {
+            if (index < _tvoi.Count)
+            {
+                string key = _indexMap[index];
+                return _tvoi[key];
+            }
+            return new TermVectorOffsetInfo[0];
+        }
+    }
+}
+
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/45ba8d83/src/contrib/Highlighter/WeightedSpanTerm.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/WeightedSpanTerm.cs b/src/contrib/Highlighter/WeightedSpanTerm.cs
deleted file mode 100644
index 7d94383..0000000
--- a/src/contrib/Highlighter/WeightedSpanTerm.cs
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-
-namespace Lucene.Net.Search.Highlight
-{
-    /// <summary>
-    /// Lightweight class to hold term, Weight, and positions used for scoring this term.
-    /// </summary>
-    public class WeightedSpanTerm : WeightedTerm
-    {
-        private bool _positionSensitive;
-        private readonly List<PositionSpan> _positionSpans = new List<PositionSpan>();
-
-        public WeightedSpanTerm(float weight, String term)
-            : base(weight, term)
-        {
-
-            this._positionSpans = new List<PositionSpan>();
-        }
-
-        public WeightedSpanTerm(float weight, String term, bool positionSensitive)
-            : base(weight, term)
-        {
-
-            this._positionSensitive = positionSensitive;
-        }
-
-        /// <summary>
-        /// Checks to see if this term is valid at <c>position</c>.
-        /// </summary>
-        /// <param name="position">to check against valid term postions</param>
-        /// <returns>true iff this term is a hit at this position</returns>
-        public bool CheckPosition(int position)
-        {
-            // There would probably be a slight speed improvement if PositionSpans
-            // where kept in some sort of priority queue - that way this method
-            // could
-            // bail early without checking each PositionSpan.
-
-            foreach (var positionSpan in _positionSpans)
-            {
-                if (((position >= positionSpan.Start) && (position <= positionSpan.End)))
-                {
-                    return true;
-                }
-            }
-
-            return false;
-        }
-
-        public void AddPositionSpans(List<PositionSpan> positionSpans)
-        {
-            this._positionSpans.AddRange(positionSpans);
-        }
-
-        public bool IsPositionSensitive()
-        {
-            return _positionSensitive;
-        }
-
-        public void SetPositionSensitive(bool positionSensitive)
-        {
-            this._positionSensitive = positionSensitive;
-        }
-
-        public List<PositionSpan> GetPositionSpans()
-        {
-            return _positionSpans;
-        }
-    }
-
-
-    // Utility class to store a Span
-    public class PositionSpan
-    {
-        public int Start { get; private set; }
-        public int End { get; private set; }
-
-        public PositionSpan(int start, int end)
-        {
-            this.Start = start;
-            this.End = end;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/45ba8d83/src/contrib/Highlighter/WeightedSpanTermExtractor.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/WeightedSpanTermExtractor.cs b/src/contrib/Highlighter/WeightedSpanTermExtractor.cs
deleted file mode 100644
index df7a90f..0000000
--- a/src/contrib/Highlighter/WeightedSpanTermExtractor.cs
+++ /dev/null
@@ -1,667 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Text;
-using Lucene.Net.Analysis;
-using Lucene.Net.Index;
-using Lucene.Net.Index.Memory;
-using Lucene.Net.Search.Spans;
-using Lucene.Net.Store;
-using Lucene.Net.Support;
-using Lucene.Net.Util;
-
-namespace Lucene.Net.Search.Highlight
-{
-    /// <summary>
-    /// Class used to extract <see cref="WeightedSpanTerm"/>s from a <see cref="Query"/>
based on whether 
-    /// <see cref="Term"/>s from the <see cref="Query"/> are contained in a supplied
<see cref="Analysis.TokenStream"/>.
-    /// </summary>
-    public class WeightedSpanTermExtractor
-    {
-        private String fieldName;
-        private TokenStream tokenStream;
-        private IDictionary<String, IndexReader> readers = new HashMap<String, IndexReader>(10);
-        private String defaultField;
-        private bool expandMultiTermQuery;
-        private bool cachedTokenStream;
-        private bool wrapToCaching = true;
-
-        public WeightedSpanTermExtractor()
-        {
-        }
-
-        public WeightedSpanTermExtractor(String defaultField)
-        {
-            if (defaultField != null)
-            {
-                this.defaultField = StringHelper.Intern(defaultField);
-            }
-        }
-
-        private void CloseReaders()
-        {
-            ICollection<IndexReader> readerSet = readers.Values;
-
-            foreach (IndexReader reader in readerSet)
-            {
-                try
-                {
-                    reader.Close();
-                }
-                catch (IOException e)
-                {
-                    // alert?
-                }
-            }
-        }
-
-        /// <summary>
-        /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using
the terms from the supplied <c>Query</c>.
-        /// </summary>
-        /// <param name="query">Query to extract Terms from</param>
-        /// <param name="terms">Map to place created WeightedSpanTerms in</param>
-        private void Extract(Query query, IDictionary<String, WeightedSpanTerm> terms)
-        {
-            if (query is BooleanQuery)
-            {
-                BooleanClause[] queryClauses = ((BooleanQuery) query).GetClauses();
-
-                for (int i = 0; i < queryClauses.Length; i++)
-                {
-                    if (!queryClauses[i].IsProhibited)
-                    {
-                        Extract(queryClauses[i].Query, terms);
-                    }
-                }
-            }
-            else if (query is PhraseQuery)
-            {
-                PhraseQuery phraseQuery = ((PhraseQuery) query);
-                Term[] phraseQueryTerms = phraseQuery.GetTerms();
-                SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length];
-                for (int i = 0; i < phraseQueryTerms.Length; i++)
-                {
-                    clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
-                }
-                int slop = phraseQuery.Slop;
-                int[] positions = phraseQuery.GetPositions();
-                // add largest position increment to slop
-                if (positions.Length > 0)
-                {
-                    int lastPos = positions[0];
-                    int largestInc = 0;
-                    int sz = positions.Length;
-                    for (int i = 1; i < sz; i++)
-                    {
-                        int pos = positions[i];
-                        int inc = pos - lastPos;
-                        if (inc > largestInc)
-                        {
-                            largestInc = inc;
-                        }
-                        lastPos = pos;
-                    }
-                    if (largestInc > 1)
-                    {
-                        slop += largestInc;
-                    }
-                }
-
-                bool inorder = slop == 0;
-
-                SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
-                sp.Boost = query.Boost;
-                ExtractWeightedSpanTerms(terms, sp);
-            }
-            else if (query is TermQuery)
-            {
-                ExtractWeightedTerms(terms, query);
-            }
-            else if (query is SpanQuery)
-            {
-                ExtractWeightedSpanTerms(terms, (SpanQuery) query);
-            }
-            else if (query is FilteredQuery)
-            {
-                Extract(((FilteredQuery) query).Query, terms);
-            }
-            else if (query is DisjunctionMaxQuery)
-            {
-                foreach (var q in ((DisjunctionMaxQuery) query))
-                {
-                    Extract(q, terms);
-                }
-            }
-            else if (query is MultiTermQuery && expandMultiTermQuery)
-            {
-                MultiTermQuery mtq = ((MultiTermQuery) query);
-                if (mtq.RewriteMethod != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)
-                {
-                    mtq = (MultiTermQuery) mtq.Clone();
-                    mtq.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
-                    query = mtq;
-                }
-                FakeReader fReader = new FakeReader();
-                MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.Rewrite(fReader, mtq);
-                if (fReader.Field != null)
-                {
-                    IndexReader ir = GetReaderForField(fReader.Field);
-                    Extract(query.Rewrite(ir), terms);
-                }
-            }
-            else if (query is MultiPhraseQuery)
-            {
-                MultiPhraseQuery mpq = (MultiPhraseQuery) query;
-                IList<Term[]> termArrays = mpq.GetTermArrays();
-                int[] positions = mpq.GetPositions();
-                if (positions.Length > 0)
-                {
-
-                    int maxPosition = positions[positions.Length - 1];
-                    for (int i = 0; i < positions.Length - 1; ++i)
-                    {
-                        if (positions[i] > maxPosition)
-                        {
-                            maxPosition = positions[i];
-                        }
-                    }
-
-                    var disjunctLists = new List<SpanQuery>[maxPosition + 1];
-                    int distinctPositions = 0;
-
-                    for (int i = 0; i < termArrays.Count; ++i)
-                    {
-                        Term[] termArray = termArrays[i];
-                        List<SpanQuery> disjuncts = disjunctLists[positions[i]];
-                        if (disjuncts == null)
-                        {
-                            disjuncts = (disjunctLists[positions[i]] = new List<SpanQuery>(termArray.Length));
-                            ++distinctPositions;
-                        }
-                        for (int j = 0; j < termArray.Length; ++j)
-                        {
-                            disjuncts.Add(new SpanTermQuery(termArray[j]));
-                        }
-                    }
-
-                    int positionGaps = 0;
-                    int position = 0;
-                    SpanQuery[] clauses = new SpanQuery[distinctPositions];
-                    for (int i = 0; i < disjunctLists.Length; ++i)
-                    {
-                        List<SpanQuery> disjuncts = disjunctLists[i];
-                        if (disjuncts != null)
-                        {
-                            clauses[position++] = new SpanOrQuery(disjuncts.ToArray());
-                        }
-                        else
-                        {
-                            ++positionGaps;
-                        }
-                    }
-
-                    int slop = mpq.Slop;
-                    bool inorder = (slop == 0);
-
-                    SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
-                    sp.Boost = query.Boost;
-                    ExtractWeightedSpanTerms(terms, sp);
-                }
-            }
-        }
-
-        /// <summary>
-        /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using
the terms from the supplied <c>SpanQuery</c>.
-        /// </summary>
-        /// <param name="terms">Map to place created WeightedSpanTerms in</param>
-        /// <param name="spanQuery">SpanQuery to extract Terms from</param>
-        private void ExtractWeightedSpanTerms(IDictionary<String, WeightedSpanTerm>
terms, SpanQuery spanQuery)
-        {
-            HashSet<String> fieldNames;
-
-            if (fieldName == null)
-            {
-                fieldNames = new HashSet<String>();
-                CollectSpanQueryFields(spanQuery, fieldNames);
-            }
-            else
-            {
-                fieldNames = new HashSet<String>();
-                fieldNames.Add(fieldName);
-            }
-            // To support the use of the default field name
-            if (defaultField != null)
-            {
-                fieldNames.Add(defaultField);
-            }
-
-            IDictionary<String, SpanQuery> queries = new HashMap<String, SpanQuery>();
-
-            var nonWeightedTerms = Support.Compatibility.SetFactory.CreateHashSet<Term>();
-            bool mustRewriteQuery = MustRewriteQuery(spanQuery);
-            if (mustRewriteQuery)
-            {
-                foreach (String field in fieldNames)
-                {
-                    SpanQuery rewrittenQuery = (SpanQuery) spanQuery.Rewrite(GetReaderForField(field));
-                    queries[field] = rewrittenQuery;
-                    rewrittenQuery.ExtractTerms(nonWeightedTerms);
-                }
-            }
-            else
-            {
-                spanQuery.ExtractTerms(nonWeightedTerms);
-            }
-
-            List<PositionSpan> spanPositions = new List<PositionSpan>();
-
-            foreach (String field in fieldNames)
-            {
-
-                IndexReader reader = GetReaderForField(field);
-                Spans.Spans spans;
-                if (mustRewriteQuery)
-                {
-                    spans = queries[field].GetSpans(reader);
-                }
-                else
-                {
-                    spans = spanQuery.GetSpans(reader);
-                }
-
-
-                // collect span positions
-                while (spans.Next())
-                {
-                    spanPositions.Add(new PositionSpan(spans.Start(), spans.End() - 1));
-                }
-
-            }
-
-            if (spanPositions.Count == 0)
-            {
-                // no spans found
-                return;
-            }
-
-            foreach (Term queryTerm in nonWeightedTerms)
-            {
-
-                if (FieldNameComparator(queryTerm.Field))
-                {
-                    WeightedSpanTerm weightedSpanTerm = terms[queryTerm.Text];
-
-                    if (weightedSpanTerm == null)
-                    {
-                        weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text);
-                        weightedSpanTerm.AddPositionSpans(spanPositions);
-                        weightedSpanTerm.SetPositionSensitive(true);
-                        terms[queryTerm.Text] = weightedSpanTerm;
-                    }
-                    else
-                    {
-                        if (spanPositions.Count > 0)
-                        {
-                            weightedSpanTerm.AddPositionSpans(spanPositions);
-                        }
-                    }
-                }
-            }
-        }
-
-        /// <summary>
-        /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using
the terms from the supplied <c>Query</c>.
-        /// </summary>
-        /// <param name="terms"></param>
-        /// <param name="query"></param>
-        private void ExtractWeightedTerms(IDictionary<String, WeightedSpanTerm> terms,
Query query)
-        {
-            var nonWeightedTerms = Support.Compatibility.SetFactory.CreateHashSet<Term>();
-            query.ExtractTerms(nonWeightedTerms);
-
-            foreach (Term queryTerm in nonWeightedTerms)
-            {
-
-                if (FieldNameComparator(queryTerm.Field))
-                {
-                    WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.Boost,
queryTerm.Text);
-                    terms[queryTerm.Text] = weightedSpanTerm;
-                }
-            }
-        }
-
-        /// <summary>
-        /// Necessary to implement matches for queries against <c>defaultField</c>
-        /// </summary>
-        private bool FieldNameComparator(String fieldNameToCheck)
-        {
-            bool rv = fieldName == null || fieldNameToCheck == fieldName
-                      || fieldNameToCheck == defaultField;
-            return rv;
-        }
-
-        private IndexReader GetReaderForField(String field)
-        {
-            if (wrapToCaching && !cachedTokenStream && !(tokenStream is CachingTokenFilter))
-            {
-                tokenStream = new CachingTokenFilter(tokenStream);
-                cachedTokenStream = true;
-            }
-            IndexReader reader = readers[field];
-            if (reader == null)
-            {
-                MemoryIndex indexer = new MemoryIndex();
-                indexer.AddField(field, tokenStream);
-                tokenStream.Reset();
-                IndexSearcher searcher = indexer.CreateSearcher();
-                reader = searcher.IndexReader;
-                readers[field] = reader;
-            }
-
-            return reader;
-        }
-
-        /// <summary>
-        /// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c>
and <c>TokenStream</c>.
-        /// </summary>
-        /// <param name="query">query that caused hit</param>
-        /// <param name="tokenStream">TokenStream of text to be highlighted</param>
-        /// <returns>Map containing WeightedSpanTerms</returns>
-        public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTerms(Query query,
TokenStream tokenStream)
-        {
-            return GetWeightedSpanTerms(query, tokenStream, null);
-        }
-
-
-        /// <summary>
-        /// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c>
and <c>TokenStream</c>.
-        /// </summary>
-        /// <param name="query">query that caused hit</param>
-        /// <param name="tokenStream">tokenStream of text to be highlighted</param>
-        /// <param name="fieldName">restricts Term's used based on field name</param>
-        /// <returns>Map containing WeightedSpanTerms</returns>
-        public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTerms(Query query,
TokenStream tokenStream,
-                                                                          String fieldName)
-        {
-            if (fieldName != null)
-            {
-                this.fieldName = StringHelper.Intern(fieldName);
-            }
-            else
-            {
-                this.fieldName = null;
-            }
-
-            IDictionary<String, WeightedSpanTerm> terms = new PositionCheckingMap<String>();
-            this.tokenStream = tokenStream;
-            try
-            {
-                Extract(query, terms);
-            }
-            finally
-            {
-                CloseReaders();
-            }
-
-            return terms;
-        }
-
-        /// <summary>
-        /// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c>
and <c>TokenStream</c>. Uses a supplied
-        /// <c>IndexReader</c> to properly Weight terms (for gradient highlighting).
-        /// </summary>
-        /// <param name="query">Query that caused hit</param>
-        /// <param name="tokenStream">Tokenstream of text to be highlighted</param>
-        /// <param name="fieldName">restricts Term's used based on field name</param>
-        /// <param name="reader">to use for scoring</param>
-        /// <returns>Map of WeightedSpanTerms with quasi tf/idf scores</returns>
-        public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTermsWithScores(Query
query, TokenStream tokenStream,
-                                                                                    String
fieldName, IndexReader reader)
-        {
-            if (fieldName != null)
-            {
-                this.fieldName = StringHelper.Intern(fieldName);
-            }
-            else
-            {
-                this.fieldName = null;
-            }
-            this.tokenStream = tokenStream;
-
-            IDictionary<String, WeightedSpanTerm> terms = new PositionCheckingMap<String>();
-            Extract(query, terms);
-
-            int totalNumDocs = reader.NumDocs();
-            var weightedTerms = terms.Keys;
-
-            try
-            {
-                foreach (var wt in weightedTerms)
-                {
-                    WeightedSpanTerm weightedSpanTerm = terms[wt];
-                    int docFreq = reader.DocFreq(new Term(fieldName, weightedSpanTerm.Term));
-                    // docFreq counts deletes
-                    if (totalNumDocs < docFreq)
-                    {
-                        docFreq = totalNumDocs;
-                    }
-                    // IDF algorithm taken from DefaultSimilarity class
-                    float idf = (float) (Math.Log((float) totalNumDocs/(double) (docFreq
+ 1)) + 1.0);
-                    weightedSpanTerm.Weight *= idf;
-                }
-            }
-            finally
-            {
-
-                CloseReaders();
-            }
-
-            return terms;
-        }
-
-        private void CollectSpanQueryFields(SpanQuery spanQuery, HashSet<String> fieldNames)
-        {
-            if (spanQuery is FieldMaskingSpanQuery)
-            {
-                CollectSpanQueryFields(((FieldMaskingSpanQuery) spanQuery).MaskedQuery, fieldNames);
-            }
-            else if (spanQuery is SpanFirstQuery)
-            {
-                CollectSpanQueryFields(((SpanFirstQuery) spanQuery).Match, fieldNames);
-            }
-            else if (spanQuery is SpanNearQuery)
-            {
-                foreach (SpanQuery clause in ((SpanNearQuery) spanQuery).GetClauses())
-                {
-                    CollectSpanQueryFields(clause, fieldNames);
-                }
-            }
-            else if (spanQuery is SpanNotQuery)
-            {
-                CollectSpanQueryFields(((SpanNotQuery) spanQuery).Include, fieldNames);
-            }
-            else if (spanQuery is SpanOrQuery)
-            {
-                foreach (SpanQuery clause in ((SpanOrQuery) spanQuery).GetClauses())
-                {
-                    CollectSpanQueryFields(clause, fieldNames);
-                }
-            }
-            else
-            {
-                fieldNames.Add(spanQuery.Field);
-            }
-        }
-
-        private bool MustRewriteQuery(SpanQuery spanQuery)
-        {
-            if (!expandMultiTermQuery)
-            {
-                return false; // Will throw UnsupportedOperationException in case of a SpanRegexQuery.
-            }
-            else if (spanQuery is FieldMaskingSpanQuery)
-            {
-                return MustRewriteQuery(((FieldMaskingSpanQuery)spanQuery).MaskedQuery);
-            }
-            else if (spanQuery is SpanFirstQuery)
-            {
-                return MustRewriteQuery(((SpanFirstQuery)spanQuery).Match);
-            }
-            else if (spanQuery is SpanNearQuery)
-            {
-                foreach (SpanQuery clause in ((SpanNearQuery) spanQuery).GetClauses())
-                {
-                    if (MustRewriteQuery(clause))
-                    {
-                        return true;
-                    }
-                }
-                return false;
-            }
-            else if (spanQuery is SpanNotQuery)
-            {
-                SpanNotQuery spanNotQuery = (SpanNotQuery) spanQuery;
-                return MustRewriteQuery(spanNotQuery.Include) || MustRewriteQuery(spanNotQuery.Exclude);
-            }
-            else if (spanQuery is SpanOrQuery)
-            {
-                foreach (SpanQuery clause in ((SpanOrQuery) spanQuery).GetClauses())
-                {
-                    if (MustRewriteQuery(clause))
-                    {
-                        return true;
-                    }
-                }
-                return false;
-            }
-            else if (spanQuery is SpanTermQuery)
-            {
-                return false;
-            }
-            else
-            {
-                return true;
-            }
-        }
-
-        
-        /// <summary>
-        /// This class makes sure that if both position sensitive and insensitive
-        /// versions of the same term are added, the position insensitive one wins.
-        /// </summary>
-        /// <typeparam name="K"></typeparam>
-        private class PositionCheckingMap<K> : HashMap<K, WeightedSpanTerm>
-        {
-            public PositionCheckingMap()
-            {
-
-            }
-
-            public PositionCheckingMap(IEnumerable<KeyValuePair<K, WeightedSpanTerm>>
m)
-            {
-                PutAll(m);
-            }
-
-            public void PutAll(IEnumerable<KeyValuePair<K, WeightedSpanTerm>>
m)
-            {
-                foreach (var entry in m)
-                {
-                    Add(entry.Key, entry.Value);
-                }
-            }
-
-            public override void Add(K key, WeightedSpanTerm value)
-            {
-                base.Add(key, value);
-                WeightedSpanTerm prev = this[key];
-
-                if (prev == null) return;
-
-                WeightedSpanTerm prevTerm = prev;
-                WeightedSpanTerm newTerm = value;
-                if (!prevTerm.IsPositionSensitive())
-                {
-                    newTerm.SetPositionSensitive(false);
-                }
-            }
-
-        }
-
-        public bool ExpandMultiTermQuery
-        {
-            set { this.expandMultiTermQuery = value; }
-            get { return expandMultiTermQuery; }
-        }
-
-        public bool IsCachedTokenStream
-        {
-            get { return cachedTokenStream; }
-        }
-
-        public TokenStream TokenStream
-        {
-            get { return tokenStream; }
-        }
-
-
-        /// <summary>
-        /// By default, <see cref="Analysis.TokenStream"/>s that are not of the type
-        /// <see cref="CachingTokenFilter"/> are wrapped in a <see cref="CachingTokenFilter"/>
to
-        /// <see cref="Analysis.TokenStream"/> impl and you don't want it to be wrapped,
set this to
-        /// false.
-        /// </summary>
-        public void SetWrapIfNotCachingTokenFilter(bool wrap)
-        {
-            this.wrapToCaching = wrap;
-        }
-
-        /// <summary>
-        /// A fake IndexReader class to extract the field from a MultiTermQuery
-        /// </summary>
-        protected internal sealed class FakeReader : FilterIndexReader
-        {
-
-            private static IndexReader EMPTY_MEMORY_INDEX_READER = new MemoryIndex().CreateSearcher().IndexReader;
-
-            public String Field { get; private set; }
-
-            protected internal FakeReader()
-                : base(EMPTY_MEMORY_INDEX_READER)
-            {
-
-            }
-
-            public override TermEnum Terms(Term t)
-            {
-                // only set first fieldname, maybe use a Set?
-                if (t != null && Field == null)
-                    Field = t.Field;
-                return base.Terms(t);
-            }
-
-
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/45ba8d83/src/contrib/Highlighter/WeightedTerm.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/WeightedTerm.cs b/src/contrib/Highlighter/WeightedTerm.cs
deleted file mode 100644
index cfe3e12..0000000
--- a/src/contrib/Highlighter/WeightedTerm.cs
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-
-namespace Lucene.Net.Search.Highlight
-{
-    /// <summary>
-    /// Lightweight class to hold term and a Weight value used for scoring this term
-    /// </summary>
-    public class WeightedTerm
-    {
-        public WeightedTerm(float weight, String term)
-        {
-            this.Weight = weight;
-            this.Term = term;
-        }
-
-        /// <summary>
-        /// the term value (stemmed)
-        /// </summary>
-        public string Term { get; set; }
-
-        /// <summary>
-        /// the Weight associated with this term
-        /// </summary>
-        /// <value> </value>
-        public float Weight { get; set; }
-    }
-}
\ No newline at end of file


Mime
View raw message