lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From paulir...@apache.org
Subject [17/53] [abbrv] Finish Memory and VectorHighlighter
Date Thu, 07 Nov 2013 13:53:32 GMT
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/FieldTermStack.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/FieldTermStack.cs b/src/contrib/Highlighter/VectorHighlight/FieldTermStack.cs
index d3c8220..7362eef 100644
--- a/src/contrib/Highlighter/VectorHighlight/FieldTermStack.cs
+++ b/src/contrib/Highlighter/VectorHighlight/FieldTermStack.cs
@@ -23,41 +23,42 @@ using Lucene.Net.Analysis;
 using Lucene.Net.Documents;
 using Lucene.Net.Search;
 using Lucene.Net.Index;
-using Lucene.Net.QueryParsers;
 using Lucene.Net.Store;
+using Lucene.Net.Util;
+using Lucene.Net.Support;
 
 
-namespace Lucene.Net.Search.Vectorhighlight
+namespace Lucene.Net.Search.VectorHighlight
 {
-   
-   /// <summary>
-   /// <c>FieldTermStack</c> is a stack that keeps query terms in the specified field
-   /// of the document to be highlighted.
-   /// </summary>
+
+    /// <summary>
+    /// <c>FieldTermStack</c> is a stack that keeps query terms in the specified field
+    /// of the document to be highlighted.
+    /// </summary>
     public class FieldTermStack
     {
-        private String fieldName;
-        public LinkedList<TermInfo> termList = new LinkedList<TermInfo>();
+        private readonly String fieldName;
+        internal List<TermInfo> termList = new List<TermInfo>();
 
-        public static void Main(String[] args)
-        {
-            Analyzer analyzer = new WhitespaceAnalyzer();
-            QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "f", analyzer);
-            Query query = parser.Parse("a x:b");
-            FieldQuery fieldQuery = new FieldQuery(query, true, false);
-
-            Directory dir = new RAMDirectory();
-            IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
-            Document doc = new Document();
-            doc.Add(new Field("f", "a a a b b c a b b c d e f", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
-            doc.Add(new Field("f", "b a b a f", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
-            writer.AddDocument(doc);
-            writer.Close();
-
-            IndexReader reader = IndexReader.Open(dir,true);
-            FieldTermStack ftl = new FieldTermStack(reader, 0, "f", fieldQuery);
-            reader.Close();
-        }
+        //public static void Main(String[] args)
+        //{
+        //    Analyzer analyzer = new WhitespaceAnalyzer();
+        //    QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "f", analyzer);
+        //    Query query = parser.Parse("a x:b");
+        //    FieldQuery fieldQuery = new FieldQuery(query, true, false);
+
+        //    Directory dir = new RAMDirectory();
+        //    IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
+        //    Document doc = new Document();
+        //    doc.Add(new Field("f", "a a a b b c a b b c d e f", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
+        //    doc.Add(new Field("f", "b a b a f", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
+        //    writer.AddDocument(doc);
+        //    writer.Close();
+
+        //    IndexReader reader = IndexReader.Open(dir,true);
+        //    FieldTermStack ftl = new FieldTermStack(reader, 0, "f", fieldQuery);
+        //    reader.Close();
+        //}
 
         /// <summary>
         /// a constructor. 
@@ -103,65 +104,79 @@ namespace Lucene.Net.Search.Vectorhighlight
         {
             this.fieldName = fieldName;
 
-            TermFreqVector tfv = reader.GetTermFreqVector(docId, fieldName);
-            if (tfv == null) return; // just return to make null snippets
-            TermPositionVector tpv = null;
-            try
+            ISet<String> termSet = fieldQuery.GetTermSet(fieldName);
+            // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
+            if (termSet == null) return;
+
+            Fields vectors = reader.GetTermVectors(docId);
+            if (vectors == null)
             {
-                tpv = (TermPositionVector)tfv;
+                // null snippet
+                return;
             }
-            catch (InvalidCastException e)
+
+            Terms vector = vectors.Terms(fieldName);
+            if (vector == null)
             {
-                return; // just return to make null snippets
+                // null snippet
+                return;
             }
 
-            List<String> termSet = fieldQuery.getTermSet(fieldName);
-            // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
-            if (termSet == null) return;
+            CharsRef spare = new CharsRef();
+            TermsEnum termsEnum = vector.Iterator(null);
+            DocsAndPositionsEnum dpEnum = null;
+            BytesRef text;
+
+            int numDocs = reader.MaxDoc;
 
-            foreach (String term in tpv.GetTerms())
+            while ((text = termsEnum.Next()) != null)
             {
-                if (!termSet.Contains(term)) continue;
-                int index = tpv.IndexOf(term);
-                TermVectorOffsetInfo[] tvois = tpv.GetOffsets(index);
-                if (tvois == null) return; // just return to make null snippets
-                int[] poss = tpv.GetTermPositions(index);
-                if (poss == null) return; // just return to make null snippets
-                for (int i = 0; i < tvois.Length; i++)
-                    termList.AddLast(new TermInfo(term, tvois[i].GetStartOffset(), tvois[i].GetEndOffset(), poss[i]));
+                UnicodeUtil.UTF8toUTF16(text, spare);
+                String term = spare.ToString();
+                if (!termSet.Contains(term))
+                {
+                    continue;
+                }
+                dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
+                if (dpEnum == null)
+                {
+                    // null snippet
+                    return;
+                }
+
+                dpEnum.NextDoc();
+
+                // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html
+                float weight = (float)(Math.Log(numDocs / (double)(reader.DocFreq(new Term(fieldName, text)) + 1)) + 1.0);
+
+                int freq = dpEnum.Freq;
+
+                for (int i = 0; i < freq; i++)
+                {
+                    int pos = dpEnum.NextPosition();
+                    if (dpEnum.StartOffset < 0)
+                    {
+                        return; // no offsets, null snippet
+                    }
+                    termList.Add(new TermInfo(term, dpEnum.StartOffset, dpEnum.EndOffset, pos, weight));
+                }
             }
 
             // sort by position
-            //Collections.sort(termList);
-            Sort(termList);
+            termList.Sort();
         }
 #endif
 
-        void Sort(LinkedList<TermInfo> linkList)
-        {
-            TermInfo[] arr = new TermInfo[linkList.Count];
-            linkList.CopyTo(arr, 0);
-            Array.Sort(arr, new Comparison<TermInfo>(PosComparer));
-
-            linkList.Clear();
-            foreach (TermInfo t in arr) linkList.AddLast(t);
-        }
-
-        int PosComparer(TermInfo t1,TermInfo t2)
+        /// <summary>
+        /// 
+        /// </summary>
+        /// <value> field name </value>
+        public string FieldName
         {
-            return t1.Position - t2.Position;
+            get { return fieldName; }
         }
 
-       /// <summary>
-       /// 
-       /// </summary>
-       /// <value> field name </value>
-       public string FieldName
-       {
-           get { return fieldName; }
-       }
-
-       /// <summary>
+        /// <summary>
         /// 
         /// </summary>
         /// <returns>the top TermInfo object of the stack</returns>
@@ -169,19 +184,18 @@ namespace Lucene.Net.Search.Vectorhighlight
         {
             if (termList.Count == 0) return null;
 
-            LinkedListNode<TermInfo> top =  termList.First;
-            termList.RemoveFirst();
-            return top.Value;
+            TermInfo last = termList[termList.Count - 1];
+            termList.RemoveAt(termList.Count - 1);
+            return last;
         }
-                
+
         /// <summary>
         /// 
         /// </summary>
         /// <param name="termInfo">the TermInfo object to be put on the top of the stack</param>
         public void Push(TermInfo termInfo)
         {
-            // termList.push( termInfo );  // avoid Java 1.6 feature
-            termList.AddFirst(termInfo);
+            termList.Add(termInfo);
         }
 
         /// <summary>
@@ -195,18 +209,21 @@ namespace Lucene.Net.Search.Vectorhighlight
 
         public class TermInfo : IComparable<TermInfo>
         {
+            private readonly String text;
+            private readonly int startOffset;
+            private readonly int endOffset;
+            private readonly int position;
 
-            String text;
-            int startOffset;
-            int endOffset;
-            int position;
-
-            public TermInfo(String text, int startOffset, int endOffset, int position)
+            // IDF-weight of this term
+            private readonly float weight;
+            
+            public TermInfo(String text, int startOffset, int endOffset, int position, float weight)
             {
                 this.text = text;
                 this.startOffset = startOffset;
                 this.endOffset = endOffset;
                 this.position = position;
+                this.weight = weight;
             }
 
             public string Text
@@ -229,6 +246,11 @@ namespace Lucene.Net.Search.Vectorhighlight
                 get { return position; }
             }
 
+            public float Weight
+            {
+                get { return weight; }
+            }
+
             public override string ToString()
             {
                 StringBuilder sb = new StringBuilder();

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/FragListBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/FragListBuilder.cs b/src/contrib/Highlighter/VectorHighlight/FragListBuilder.cs
deleted file mode 100644
index d8f18df..0000000
--- a/src/contrib/Highlighter/VectorHighlight/FragListBuilder.cs
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using System.Collections.Generic;
-using System.Text;
-
-namespace Lucene.Net.Search.Vectorhighlight
-{
-    /// <summary>
-    /// 
-    /// FragListBuilder is an interface for FieldFragList builder classes.
-    /// A FragListBuilder class can be plugged in to Highlighter.
-     /// </summary>
-    public interface FragListBuilder
-    {
-        /// <summary>
-        /// create a FieldFragList. 
-        /// </summary>
-        /// <param name="fieldPhraseList">FieldPhraseList object</param>
-        /// <param name="fragCharSize">the length (number of chars) of a fragment</param>
-        /// <returns>the created FieldFragList object</returns>
-        FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize);
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/FragmentsBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/FragmentsBuilder.cs b/src/contrib/Highlighter/VectorHighlight/FragmentsBuilder.cs
deleted file mode 100644
index b9df295..0000000
--- a/src/contrib/Highlighter/VectorHighlight/FragmentsBuilder.cs
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using System.Collections.Generic;
-using System.Text;
-
-using Lucene.Net.Documents;
-using Lucene.Net.Search;
-using Lucene.Net.Index;
-
-namespace Lucene.Net.Search.Vectorhighlight
-{
-    /// <summary>
-    /// FragmentsBuilder is an interface for fragments (snippets) builder classes.
-    /// A FragmentsBuilder class can be plugged in to Highlighter.
-    /// </summary>
-    public interface FragmentsBuilder
-    {
-        /// <summary>
-        /// create a fragment.
-        /// </summary>
-        /// <param name="reader">IndexReader of the index</param>
-        /// <param name="docId">document id to be highlighted</param>
-        /// <param name="fieldName">field of the document to be highlighted</param>
-        /// <param name="fieldFragList">FieldFragList object</param>
-        /// <returns>a created fragment or null when no fragment created</returns>
-        String CreateFragment( IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList ) ;
-
-        
-        /// <summary>
-        /// create multiple fragments.
-        /// </summary>
-        /// <param name="reader">IndexReader of the index</param>
-        /// <param name="docId">document id to be highlighted</param>
-        /// <param name="fieldName">field of the document to be highlighted</param>
-        /// <param name="fieldFragList">ieldFragList object</param>
-        /// <param name="maxNumFragments">maximum number of fragments</param>
-        /// <returns>created fragments or null when no fragments created. Size of the array can be less than maxNumFragments</returns>
-        String[] CreateFragments( IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList, int maxNumFragments ) ;
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/IBoundaryScanner.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/IBoundaryScanner.cs b/src/contrib/Highlighter/VectorHighlight/IBoundaryScanner.cs
new file mode 100644
index 0000000..6d5c605
--- /dev/null
+++ b/src/contrib/Highlighter/VectorHighlight/IBoundaryScanner.cs
@@ -0,0 +1,13 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Search.VectorHighlight
+{
+    public interface IBoundaryScanner
+    {
+        int FindStartOffset(StringBuilder buffer, int start);
+        int FindEndOffset(StringBuilder buffer, int start);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/IFragListBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/IFragListBuilder.cs b/src/contrib/Highlighter/VectorHighlight/IFragListBuilder.cs
new file mode 100644
index 0000000..494fbf3
--- /dev/null
+++ b/src/contrib/Highlighter/VectorHighlight/IFragListBuilder.cs
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Lucene.Net.Search.VectorHighlight
+{
+    /// <summary>
+    /// 
+    /// FragListBuilder is an interface for FieldFragList builder classes.
+    /// A FragListBuilder class can be plugged in to Highlighter.
+     /// </summary>
+    public interface IFragListBuilder
+    {
+        /// <summary>
+        /// create a FieldFragList. 
+        /// </summary>
+        /// <param name="fieldPhraseList">FieldPhraseList object</param>
+        /// <param name="fragCharSize">the length (number of chars) of a fragment</param>
+        /// <returns>the created FieldFragList object</returns>
+        FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/IFragmentsBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/IFragmentsBuilder.cs b/src/contrib/Highlighter/VectorHighlight/IFragmentsBuilder.cs
new file mode 100644
index 0000000..f4e7d3d
--- /dev/null
+++ b/src/contrib/Highlighter/VectorHighlight/IFragmentsBuilder.cs
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+using Lucene.Net.Documents;
+using Lucene.Net.Search;
+using Lucene.Net.Index;
+using Lucene.Net.Search.Highlight;
+
+namespace Lucene.Net.Search.VectorHighlight
+{
+    /// <summary>
+    /// FragmentsBuilder is an interface for fragments (snippets) builder classes.
+    /// A FragmentsBuilder class can be plugged in to Highlighter.
+    /// </summary>
+    public interface IFragmentsBuilder
+    {
+        /// <summary>
+        /// create a fragment.
+        /// </summary>
+        /// <param name="reader">IndexReader of the index</param>
+        /// <param name="docId">document id to be highlighted</param>
+        /// <param name="fieldName">field of the document to be highlighted</param>
+        /// <param name="fieldFragList">FieldFragList object</param>
+        /// <returns>a created fragment or null when no fragment created</returns>
+        String CreateFragment(IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList);
+
+
+        /// <summary>
+        /// create multiple fragments.
+        /// </summary>
+        /// <param name="reader">IndexReader of the index</param>
+        /// <param name="docId">document id to be highlighted</param>
+        /// <param name="fieldName">field of the document to be highlighted</param>
+        /// <param name="fieldFragList">ieldFragList object</param>
+        /// <param name="maxNumFragments">maximum number of fragments</param>
+        /// <returns>created fragments or null when no fragments created. Size of the array can be less than maxNumFragments</returns>
+        String[] CreateFragments(IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList, int maxNumFragments);
+
+        string CreateFragment(IndexReader reader, int docId, string fieldName, FieldFragList fieldFragList, String[] preTags, String[] postTags, IEncoder encoder);
+        
+        String[] CreateFragments(IndexReader reader, int docId, string fieldName, FieldFragList fieldFragList, int maxNumFragments, String[] preTags, String[] postTags, IEncoder encoder);
+
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/ScoreOrderFragmentsBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/ScoreOrderFragmentsBuilder.cs b/src/contrib/Highlighter/VectorHighlight/ScoreOrderFragmentsBuilder.cs
index 4fda622..0aec3d0 100644
--- a/src/contrib/Highlighter/VectorHighlight/ScoreOrderFragmentsBuilder.cs
+++ b/src/contrib/Highlighter/VectorHighlight/ScoreOrderFragmentsBuilder.cs
@@ -15,58 +15,76 @@
  * limitations under the License.
  */
 
+using Lucene.Net.Support;
 using System;
 using System.Collections.Generic;
 using System.Text;
+using WeightedFragInfo = Lucene.Net.Search.VectorHighlight.FieldFragList.WeightedFragInfo;
 
-using WeightedFragInfo = Lucene.Net.Search.Vectorhighlight.FieldFragList.WeightedFragInfo;
-
-namespace Lucene.Net.Search.Vectorhighlight
+namespace Lucene.Net.Search.VectorHighlight
 {
     /*
- * An implementation of FragmentsBuilder that outputs score-order fragments.
- */
+    * An implementation of FragmentsBuilder that outputs score-order fragments.
+    */
     public class ScoreOrderFragmentsBuilder : BaseFragmentsBuilder
     {
-
         /// <summary>
         /// a constructor.
         /// </summary>
-        public ScoreOrderFragmentsBuilder():base()
+        public ScoreOrderFragmentsBuilder()
+            : base()
         {
         }
 
-
         /// <summary>
         /// a constructor.
         /// </summary>
         /// <param name="preTags">array of pre-tags for markup terms</param>
         /// <param name="postTags">array of post-tags for markup terms</param>
-        public ScoreOrderFragmentsBuilder(String[] preTags, String[] postTags):  base(preTags, postTags)
+        public ScoreOrderFragmentsBuilder(String[] preTags, String[] postTags)
+            : base(preTags, postTags)
         {
         }
 
-        /// <summary>
-        /// Sort by score the list of WeightedFragInfo
-        /// </summary>
-        public override List<WeightedFragInfo> GetWeightedFragInfoList(List<WeightedFragInfo> src)
+        public ScoreOrderFragmentsBuilder(IBoundaryScanner bs)
+            : base(bs)
         {
-            src.Sort(new ScoreComparator());
-            return src;
         }
 
-        public class ScoreComparator : IComparer<WeightedFragInfo>
-        {  // Comparator<WeightedFragInfo> {
+        public ScoreOrderFragmentsBuilder(String[] preTags, String[] postTags, IBoundaryScanner bs)
+            : base(preTags, postTags, bs)
+        {
+        }
 
+        public override IList<WeightedFragInfo> GetWeightedFragInfoList(IList<WeightedFragInfo> src)
+        {
+            // .NET implementation as IList lacks .Sort
+            List<WeightedFragInfo> asList = src as List<WeightedFragInfo>;
+
+            if (asList != null)
+            {
+                asList.Sort(new ScoreComparator());
+                return asList;
+            }
+            else
+            {
+                asList = new List<WeightedFragInfo>(src);
+                asList.Sort(new ScoreComparator());
+                return asList;
+            }
+        }
+
+        public class ScoreComparator : IComparer<WeightedFragInfo>
+        {  
             public int Compare(WeightedFragInfo o1, WeightedFragInfo o2)
             {
-                if (o1.totalBoost > o2.totalBoost) return -1;
-                else if (o1.totalBoost < o2.totalBoost) return 1;
+                if (o1.TotalBoost > o2.TotalBoost) return -1;
+                else if (o1.TotalBoost < o2.TotalBoost) return 1;
                 // if same score then check startOffset
                 else
                 {
-                    if (o1.startOffset < o2.startOffset) return -1;
-                    else if (o1.startOffset > o2.startOffset) return 1;
+                    if (o1.StartOffset < o2.StartOffset) return -1;
+                    else if (o1.StartOffset > o2.StartOffset) return 1;
                 }
                 return 0;
             }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/SimpleBoundaryScanner.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/SimpleBoundaryScanner.cs b/src/contrib/Highlighter/VectorHighlight/SimpleBoundaryScanner.cs
new file mode 100644
index 0000000..b11ff3f
--- /dev/null
+++ b/src/contrib/Highlighter/VectorHighlight/SimpleBoundaryScanner.cs
@@ -0,0 +1,78 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Search.VectorHighlight
+{
+    public class SimpleBoundaryScanner : IBoundaryScanner
+    {
+        public static readonly int DEFAULT_MAX_SCAN = 20;
+        public static readonly char[] DEFAULT_BOUNDARY_CHARS = { '.', ',', '!', '?', ' ', '\t', '\n' };
+        protected int maxScan;
+        protected ISet<char> boundaryChars;
+
+        public SimpleBoundaryScanner()
+            : this(DEFAULT_MAX_SCAN, DEFAULT_BOUNDARY_CHARS)
+        {
+        }
+
+        public SimpleBoundaryScanner(int maxScan)
+            : this(maxScan, DEFAULT_BOUNDARY_CHARS)
+        {
+        }
+
+        public SimpleBoundaryScanner(char[] boundaryChars)
+            : this(DEFAULT_MAX_SCAN, boundaryChars)
+        {
+        }
+
+        public SimpleBoundaryScanner(int maxScan, char[] boundaryChars)
+        {
+            this.maxScan = maxScan;
+            this.boundaryChars = new HashSet<char>();
+            this.boundaryChars.UnionWith(boundaryChars);
+        }
+
+        public SimpleBoundaryScanner(int maxScan, ISet<char> boundaryChars)
+        {
+            this.maxScan = maxScan;
+            this.boundaryChars = boundaryChars;
+        }
+
+        public int FindStartOffset(StringBuilder buffer, int start)
+        {
+            if (start > buffer.Length || start < 1)
+                return start;
+            int offset, count = maxScan;
+            for (offset = start; offset > 0 && count > 0; count--)
+            {
+                if (boundaryChars.Contains(buffer[offset - 1]))
+                    return offset;
+                offset--;
+            }
+
+            if (offset == 0)
+            {
+                return 0;
+            }
+
+            return start;
+        }
+
+        public int FindEndOffset(StringBuilder buffer, int start)
+        {
+            if (start > buffer.Length || start < 0)
+                return start;
+            int offset, count = maxScan;
+            for (offset = start; offset < buffer.Length && count > 0; count--)
+            {
+                if (boundaryChars.Contains(buffer[offset]))
+                    return offset;
+                offset++;
+            }
+
+            return start;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/SimpleFieldFragList.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/SimpleFieldFragList.cs b/src/contrib/Highlighter/VectorHighlight/SimpleFieldFragList.cs
new file mode 100644
index 0000000..4c5a421
--- /dev/null
+++ b/src/contrib/Highlighter/VectorHighlight/SimpleFieldFragList.cs
@@ -0,0 +1,28 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Search.VectorHighlight
+{
+    public class SimpleFieldFragList : FieldFragList
+    {
+        public SimpleFieldFragList(int fragCharSize)
+            : base(fragCharSize)
+        {
+        }
+
+        public override void Add(int startOffset, int endOffset, IList<FieldPhraseList.WeightedPhraseInfo> phraseInfoList)
+        {
+            float totalBoost = 0;
+            var subInfos = new List<FieldFragList.WeightedFragInfo.SubInfo>();
+            foreach (FieldPhraseList.WeightedPhraseInfo phraseInfo in phraseInfoList)
+            {
+                subInfos.Add(new FieldFragList.WeightedFragInfo.SubInfo(phraseInfo.Text, phraseInfo.TermsOffsets, phraseInfo.Seqnum));
+                totalBoost += phraseInfo.Boost;
+            }
+
+            FragInfos.Add(new WeightedFragInfo(startOffset, endOffset, subInfos, totalBoost));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/SimpleFragListBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/SimpleFragListBuilder.cs b/src/contrib/Highlighter/VectorHighlight/SimpleFragListBuilder.cs
index e090240..6b7038a 100644
--- a/src/contrib/Highlighter/VectorHighlight/SimpleFragListBuilder.cs
+++ b/src/contrib/Highlighter/VectorHighlight/SimpleFragListBuilder.cs
@@ -19,74 +19,28 @@ using System;
 using System.Collections.Generic;
 using System.Text;
 
-using WeightedPhraseInfo = Lucene.Net.Search.Vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
+using WeightedPhraseInfo = Lucene.Net.Search.VectorHighlight.FieldPhraseList.WeightedPhraseInfo;
 
-namespace Lucene.Net.Search.Vectorhighlight
+namespace Lucene.Net.Search.VectorHighlight
 {
     /// <summary>
     /// A simple implementation of FragListBuilder.
     /// </summary>
-    public class SimpleFragListBuilder : FragListBuilder
+    public class SimpleFragListBuilder : BaseFragListBuilder
     {
-
-        public static int MARGIN = 6;
-        public static int MIN_FRAG_CHAR_SIZE = MARGIN * 3;
-
-        public FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize)
+        public SimpleFragListBuilder()
+            : base()
         {
-            if (fragCharSize < MIN_FRAG_CHAR_SIZE)
-                throw new ArgumentException("fragCharSize(" + fragCharSize + ") is too small. It must be " +
-                    MIN_FRAG_CHAR_SIZE + " or higher.");
-
-            FieldFragList ffl = new FieldFragList(fragCharSize);
-
-            List<WeightedPhraseInfo> wpil = new List<WeightedPhraseInfo>();
-            LinkedList<WeightedPhraseInfo>.Enumerator ite = fieldPhraseList.phraseList.GetEnumerator();
-
-            WeightedPhraseInfo phraseInfo = null;
-            int startOffset = 0;
-            bool taken = false;
-            while (true)
-            {
-                if (!taken)
-                {
-                    if (!ite.MoveNext()) break;
-                    phraseInfo = ite.Current;
-                }
-                taken = false;
-                if (phraseInfo == null) break;
-
-                // if the phrase violates the border of previous fragment, discard it and try next phrase
-                if (phraseInfo.StartOffset < startOffset) continue;
-
-                wpil.Clear();
-                wpil.Add(phraseInfo);
-                int st = phraseInfo.StartOffset - MARGIN < startOffset ?
-                    startOffset : phraseInfo.StartOffset - MARGIN;
-                int en = st + fragCharSize;
-                if (phraseInfo.EndOffset > en)
-                    en = phraseInfo.EndOffset;
-                startOffset = en;
+        }
 
-                while (true)
-                {
-                    if (ite.MoveNext())
-                    {
-                        phraseInfo = ite.Current;
-                        taken = true;
-                        if (phraseInfo == null) break;
-                    }
-                    else
-                        break;
-                    if (phraseInfo.EndOffset <= en)
-                        wpil.Add(phraseInfo);
-                    else
-                        break;
-                }
-                ffl.Add(st, en, wpil);
-            }
-            return ffl;
+        public SimpleFragListBuilder(int margin)
+            : base(margin)
+        {
         }
 
+        public override FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize)
+        {
+            return CreateFieldFragList(fieldPhraseList, new SimpleFieldFragList(fragCharSize), fragCharSize);
+        }
     }
 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/SimpleFragmentsBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/SimpleFragmentsBuilder.cs b/src/contrib/Highlighter/VectorHighlight/SimpleFragmentsBuilder.cs
index bfc2478..a70b33f 100644
--- a/src/contrib/Highlighter/VectorHighlight/SimpleFragmentsBuilder.cs
+++ b/src/contrib/Highlighter/VectorHighlight/SimpleFragmentsBuilder.cs
@@ -19,11 +19,11 @@ using System;
 using System.Collections.Generic;
 using System.Text;
 
-using WeightedFragInfo = Lucene.Net.Search.Vectorhighlight.FieldFragList.WeightedFragInfo;
+using WeightedFragInfo = Lucene.Net.Search.VectorHighlight.FieldFragList.WeightedFragInfo;
 
-namespace Lucene.Net.Search.Vectorhighlight
+namespace Lucene.Net.Search.VectorHighlight
 {
-   
+
     /// <summary>
     /// A simple implementation of FragmentsBuilder.
     /// </summary>
@@ -32,10 +32,11 @@ namespace Lucene.Net.Search.Vectorhighlight
         /// <summary>
         /// a constructor.
         /// </summary>
-        public SimpleFragmentsBuilder() : base()
+        public SimpleFragmentsBuilder()
+            : base()
         {
         }
-                
+
 
         /// <summary>
         /// a constructor.
@@ -45,13 +46,22 @@ namespace Lucene.Net.Search.Vectorhighlight
         public SimpleFragmentsBuilder(String[] preTags, String[] postTags)
             : base(preTags, postTags)
         {
+        }
 
+        public SimpleFragmentsBuilder(IBoundaryScanner bs)
+            : base(bs)
+        {
+        }
+
+        public SimpleFragmentsBuilder(String[] preTags, String[] postTags, IBoundaryScanner bs)
+            : base(bs)
+        {
         }
 
         /// <summary>
         /// do nothing. return the source list.
         /// </summary>
-        public override List<WeightedFragInfo> GetWeightedFragInfoList(List<WeightedFragInfo> src)
+        public override IList<WeightedFragInfo> GetWeightedFragInfoList(IList<WeightedFragInfo> src)
         {
             return src;
         }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/SingleFragListBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/SingleFragListBuilder.cs b/src/contrib/Highlighter/VectorHighlight/SingleFragListBuilder.cs
new file mode 100644
index 0000000..037ce59
--- /dev/null
+++ b/src/contrib/Highlighter/VectorHighlight/SingleFragListBuilder.cs
@@ -0,0 +1,31 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Search.VectorHighlight
+{
+    public class SingleFragListBuilder : IFragListBuilder
+    {
+        public FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize)
+        {
+            FieldFragList ffl = new SimpleFieldFragList(fragCharSize);
+            List<FieldPhraseList.WeightedPhraseInfo> wpil = new List<FieldPhraseList.WeightedPhraseInfo>();
+            IEnumerator<FieldPhraseList.WeightedPhraseInfo> ite = fieldPhraseList.phraseList.GetEnumerator();
+            FieldPhraseList.WeightedPhraseInfo phraseInfo = null;
+            while (true)
+            {
+                if (!ite.MoveNext())
+                    break;
+                phraseInfo = ite.Current;
+                if (phraseInfo == null)
+                    break;
+                wpil.Add(phraseInfo);
+            }
+
+            if (wpil.Count > 0)
+                ffl.Add(0, int.MaxValue, wpil);
+            return ffl;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/StringUtils.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/StringUtils.cs b/src/contrib/Highlighter/VectorHighlight/StringUtils.cs
deleted file mode 100644
index 7b27259..0000000
--- a/src/contrib/Highlighter/VectorHighlight/StringUtils.cs
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-using System;
-using System.Collections.Generic;
-
-namespace Lucene.Net.Search.Vectorhighlight
-{
-    public static class StringUtils 
-    {
-        /// <summary>
-        /// Check if the termToMatch is a match for the term, considering the use of a wildcards.
-        /// </summary>
-        public static Boolean TermStringMatch(String term, String termToMatch) 
-        {
-            if (term[0] == '*' || term[0] == '?')
-                throw new NotSupportedException("Unable to do matching with wildcard at the beginning");
-            
-            if (term[term.Length - 1] == '*') 
-            { 
-                //Wildcard at the end
-                if (termToMatch.Length < term.Length - 1) return false;
-                for (int i = 0; i < term.Length - 1; i++)
-                {
-                    if (termToMatch[i] != term[i]) return false;
-                }
-                return true;
-            }
-            return term.Equals(termToMatch);
-        }
-
-        public static Boolean AnyTermMatch(IList<String> terms, String term)
-        {
-            for (int i = 0; i < terms.Count; i++)
-            {
-                if (StringUtils.TermStringMatch(terms[i], term))
-                    return true;
-            }
-            return false;
-        }
-     }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/Support.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/Support.cs b/src/contrib/Highlighter/VectorHighlight/Support.cs
deleted file mode 100644
index 0dfbf43..0000000
--- a/src/contrib/Highlighter/VectorHighlight/Support.cs
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.Text;
-
-namespace Lucene.Net.Search.Vectorhighlight
-{
-    public class HashMap<K, V> : Dictionary<K, V>
-    {
-        V _NullKeyValue = default(V);
-
-        public new void Add(K key,V value)
-        {
-            if (key == null)
-                _NullKeyValue = value;
-            else
-                base.Add(key,value);
-        }
-
-        public new int Count
-        {
-            get
-            {
-                return base.Count + (_NullKeyValue!= null ? 1 : 0);
-            }
-        }
-
-        public new V this[K key]
-        {
-            get{
-                return Get(key);
-            }
-            set{
-                Add(key,value);
-            }
-        }
-
-        public V Get(K key)
-        {
-            if (key == null) return _NullKeyValue;
-
-            V v = default(V);
-            base.TryGetValue(key, out v);
-            return v;
-        }
-
-        public void Put(K key, V val) 
-        {
-            Add(key,val);
-        }
-    }
-}
-

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/VectorHighlightMapper.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/VectorHighlightMapper.cs b/src/contrib/Highlighter/VectorHighlight/VectorHighlightMapper.cs
deleted file mode 100644
index da2966c..0000000
--- a/src/contrib/Highlighter/VectorHighlight/VectorHighlightMapper.cs
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#if LUCENENET_350 //Lucene.Net specific code. See https://issues.apache.org/jira/browse/LUCENENET-350
-
-using System;
-using System.Collections.Generic;
-using Lucene.Net.Index;
-
-namespace Lucene.Net.Search.Vectorhighlight
-{
-    public class VectorHighlightMapper : TermVectorMapper, ITermFreqVector, TermPositionVector
-    {
-        private readonly List<string> _terms;
-        private Dictionary<string, TermVectorOffsetInfo[]> _tvoi;
-        private Dictionary<string, int[]> _positions;
-        private Dictionary<string, int> _frequency;
-        private List<string> _indexMap;
-        private string _field;
-        private bool _storeOffsets;
-        private bool _storePositions;
-
-        public VectorHighlightMapper(List<string> terms)
-        {
-            _terms = terms;
-            _tvoi = new Dictionary<string, TermVectorOffsetInfo[]>();
-            _positions = new Dictionary<string, int[]>();
-            _frequency = new Dictionary<string, int>();
-            _indexMap = new List<string>();
-        }
-
-        public override void SetExpectations(string field, int numTerms, bool storeOffsets, bool storePositions)
-        {
-            _field = field;
-            _storeOffsets = storeOffsets;
-            _storePositions = storePositions;
-            if (_storeOffsets)
-                _tvoi = new Dictionary<string, TermVectorOffsetInfo[]>(numTerms);
-            if (_storePositions)
-                _positions = new Dictionary<string, int[]>(numTerms);
-            _frequency = new Dictionary<string, int>(numTerms);
-            _indexMap = new List<string>(numTerms);
-        }
-
-        public override void Map(string term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions)
-        {
-            if (StringUtils.AnyTermMatch(_terms, term))
-            {
-                _indexMap.Add(term);
-                if (_storeOffsets)
-                    _tvoi.Add(term, offsets);
-                if (_storePositions)
-                    _positions.Add(term, positions);
-                _frequency.Add(term,frequency);
-            }
-        }
-
-        public string Field
-        {
-            get { return _field; }
-        }
-
-        public int Size
-        {
-            get { return _tvoi.Count; }
-        }
-
-        public string[] GetTerms()
-        {
-            string[] result = new string[_tvoi.Count];
-            _tvoi.Keys.CopyTo(result,0);
-            return result;
-        }
-
-        public int[] GetTermFrequencies()
-        {
-            int[] result = new int[_frequency.Count];
-            _frequency.Values.CopyTo(result,0);
-            return result;
-        }
-
-        public int IndexOf(string term)
-        {
-            return _indexMap.IndexOf(term);
-        }
-
-        public int[] IndexesOf(string[] terms, int start, int len)
-        {
-            int[] result = new int[terms.Length];
-            for (int i = 0; i < terms.Length; i++)
-            {
-                string term = terms[i];
-                result[i] = _indexMap.IndexOf(term, start, len);
-            }
-            return result;
-        }
-
-        public int[] GetTermPositions(int index)
-        {
-            if (index<_positions.Count)
-            {
-                string key = _indexMap[index];
-                return _positions[key];
-            }
-            return new int[0];
-        }
-
-        public TermVectorOffsetInfo[] GetOffsets(int index)
-        {
-            if (index < _tvoi.Count)
-            {
-                string key = _indexMap[index];
-                return _tvoi[key];
-            }
-            return new TermVectorOffsetInfo[0];
-        }
-    }
-}
-
-#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/WeightedFieldFragList.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/WeightedFieldFragList.cs b/src/contrib/Highlighter/VectorHighlight/WeightedFieldFragList.cs
new file mode 100644
index 0000000..fec7d9d
--- /dev/null
+++ b/src/contrib/Highlighter/VectorHighlight/WeightedFieldFragList.cs
@@ -0,0 +1,36 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Search.VectorHighlight
+{
+    public class WeightedFieldFragList : FieldFragList
+    {
+        public WeightedFieldFragList(int fragCharSize)
+            : base(fragCharSize)
+        {
+        }
+
+        public override void Add(int startOffset, int endOffset, IList<FieldPhraseList.WeightedPhraseInfo> phraseInfoList)
+        {
+            float totalBoost = 0;
+            List<FieldFragList.WeightedFragInfo.SubInfo> subInfos = new List<FieldFragList.WeightedFragInfo.SubInfo>();
+            HashSet<String> distinctTerms = new HashSet<String>();
+            int length = 0;
+            foreach (FieldPhraseList.WeightedPhraseInfo phraseInfo in phraseInfoList)
+            {
+                subInfos.Add(new FieldFragList.WeightedFragInfo.SubInfo(phraseInfo.Text, phraseInfo.TermsOffsets, phraseInfo.Seqnum));
+                foreach (FieldTermStack.TermInfo ti in phraseInfo.TermsInfos)
+                {
+                    if (distinctTerms.Add(ti.Text))
+                        totalBoost += ti.Weight * phraseInfo.Boost;
+                    length++;
+                }
+            }
+
+            totalBoost *= length * (1 / (float)Math.Sqrt(length));
+            FragInfos.Add(new WeightedFragInfo(startOffset, endOffset, subInfos, totalBoost));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/WeightedFragListBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/WeightedFragListBuilder.cs b/src/contrib/Highlighter/VectorHighlight/WeightedFragListBuilder.cs
new file mode 100644
index 0000000..ed70472
--- /dev/null
+++ b/src/contrib/Highlighter/VectorHighlight/WeightedFragListBuilder.cs
@@ -0,0 +1,11 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Search.VectorHighlight
+{
+    class WeightedFragListBuilder
+    {
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Memory/CollectionsHelper.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Memory/CollectionsHelper.cs b/src/contrib/Memory/CollectionsHelper.cs
deleted file mode 100644
index 4fdcd98..0000000
--- a/src/contrib/Memory/CollectionsHelper.cs
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-
-namespace Lucene.Net.Index.Memory
-{
-    internal static class CollectionsHelper<T>
-    {
-        private static readonly T[] EmptyArray = new T[0];
-
-        /// <summary>
-        /// Returns an empty list of type T
-        /// </summary>
-        public static IList<T> EmptyList()
-        {
-            return EmptyArray;
-        }
-    }
-
-    public static class CollectionsExtensions
-    {
-        public static ICollection<T> AsReadOnly<T>(this ICollection<T> collection)
-        {
-            return new ReadOnlyCollection<T>(collection);
-        }
-
-        private sealed class ReadOnlyCollection<T> : ICollection<T>
-        {
-            private readonly ICollection<T> _other;
-
-            public ReadOnlyCollection(ICollection<T> other)
-            {
-                _other = other;
-            }
-
-            public IEnumerator<T> GetEnumerator()
-            {
-                return _other.GetEnumerator();
-            }
-
-            IEnumerator IEnumerable.GetEnumerator()
-            {
-                return GetEnumerator();
-            }
-
-            public void Add(T item)
-            {
-                throw new NotSupportedException("Collection is read only!");
-            }
-
-            public void Clear()
-            {
-                throw new NotSupportedException("Collection is read only!");
-            }
-
-            public bool Contains(T item)
-            {
-                return _other.Contains(item);
-            }
-
-            public void CopyTo(T[] array, int arrayIndex)
-            {
-                _other.CopyTo(array, arrayIndex);
-            }
-
-            public bool Remove(T item)
-            {
-                throw new NotSupportedException("Collection is read only!");
-            }
-
-            public int Count
-            {
-                get { return _other.Count; }
-            }
-
-            public bool IsReadOnly
-            {
-                get { return true; }
-            }
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Memory/Contrib.Memory.csproj
----------------------------------------------------------------------
diff --git a/src/contrib/Memory/Contrib.Memory.csproj b/src/contrib/Memory/Contrib.Memory.csproj
index 030890a..67d18bf 100644
--- a/src/contrib/Memory/Contrib.Memory.csproj
+++ b/src/contrib/Memory/Contrib.Memory.csproj
@@ -98,15 +98,16 @@
     <Reference Include="System.Xml" />
   </ItemGroup>
   <ItemGroup>
-    <Compile Include="CollectionsHelper.cs" />
-    <Compile Include="EmptyCollector.cs" />
+    <Compile Include="FillingCollector.cs" />
     <Compile Include="KeywordTokenStream.cs" />
+    <Compile Include="MemoryDocsAndPositionsEnum.cs" />
+    <Compile Include="MemoryDocsEnum.cs" />
     <Compile Include="MemoryIndex.cs" />
-    <Compile Include="MemoryTermPositionVector.cs" />
+    <Compile Include="MemoryIndexNormDocValues.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="SliceByteStartArray.cs" />
     <Compile Include="TermComparer.cs" />
-    <Compile Include="MemoryTermEnum.cs" />
-    <Compile Include="MemoryTermPositions.cs" />
+    <Compile Include="MemoryTermsEnum.cs" />
   </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="..\..\core\Lucene.Net.csproj">

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Memory/EmptyCollector.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Memory/EmptyCollector.cs b/src/contrib/Memory/EmptyCollector.cs
deleted file mode 100644
index 022b3fe..0000000
--- a/src/contrib/Memory/EmptyCollector.cs
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- *
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- *
-*/
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using Lucene.Net.Search;
-
-namespace Lucene.Net.Index.Memory
-{
-    public partial class MemoryIndex
-    {
-        /// <summary>
-        /// Fills the given float array with the values
-        /// as the collector scores the search
-        /// </summary>
-        private sealed class FillingCollector : Collector
-        {
-            private readonly float[] _scores;
-            private Scorer _scorer;
-
-            public FillingCollector(float[] scores)
-            {
-                _scores = scores;
-            }
-
-            public override void SetScorer(Scorer scorer)
-            {
-                _scorer = scorer;
-            }
-
-            public override void Collect(int doc)
-            {
-                _scores[0] = _scorer.Score();
-            }
-
-            public override void SetNextReader(IndexReader reader, int docBase)
-            { }
-
-            public override bool AcceptsDocsOutOfOrder
-            {
-                get { return true; }
-            }
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Memory/FillingCollector.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Memory/FillingCollector.cs b/src/contrib/Memory/FillingCollector.cs
new file mode 100644
index 0000000..b4393f1
--- /dev/null
+++ b/src/contrib/Memory/FillingCollector.cs
@@ -0,0 +1,65 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Search;
+
+namespace Lucene.Net.Index.Memory
+{
+    public partial class MemoryIndex
+    {
+        /// <summary>
+        /// Fills the given float array with the values
+        /// as the collector scores the search
+        /// </summary>
+        private sealed class FillingCollector : Collector
+        {
+            private readonly float[] _scores;
+            private Scorer _scorer;
+
+            public FillingCollector(float[] scores)
+            {
+                _scores = scores;
+            }
+
+            public override void SetScorer(Scorer scorer)
+            {
+                _scorer = scorer;
+            }
+
+            public override void Collect(int doc)
+            {
+                _scores[0] = _scorer.Score();
+            }
+
+            public override void SetNextReader(AtomicReaderContext reader)
+            { }
+
+            public override bool AcceptsDocsOutOfOrder
+            {
+                get { return true; }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Memory/KeywordTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Memory/KeywordTokenStream.cs b/src/contrib/Memory/KeywordTokenStream.cs
index 5e9a8e7..1f9484c 100644
--- a/src/contrib/Memory/KeywordTokenStream.cs
+++ b/src/contrib/Memory/KeywordTokenStream.cs
@@ -34,13 +34,13 @@ namespace Lucene.Net.Index.Memory
         {
             private IEnumerator<T> iter;
             private int start = 0;
-            private ITermAttribute termAtt;
+            private ICharTermAttribute termAtt;
             private IOffsetAttribute offsetAtt;
 
             public KeywordTokenStream(IEnumerable<T> keywords)
             {
                 iter = keywords.GetEnumerator();
-                termAtt = AddAttribute<ITermAttribute>();
+                termAtt = AddAttribute<ICharTermAttribute>();
                 offsetAtt = AddAttribute<IOffsetAttribute>();
             }
 
@@ -54,8 +54,8 @@ namespace Lucene.Net.Index.Memory
 
                 String term = obj.ToString();
                 ClearAttributes();
-                termAtt.SetTermBuffer(term);
-                offsetAtt.SetOffset(start, start + termAtt.TermLength());
+                termAtt.SetEmpty().Append(term);
+                offsetAtt.SetOffset(start, start + termAtt.Length);
                 start += term.Length + 1; // separate words by 1 (blank) character
                 return true;
             }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Memory/MemoryDocsAndPositionsEnum.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Memory/MemoryDocsAndPositionsEnum.cs b/src/contrib/Memory/MemoryDocsAndPositionsEnum.cs
new file mode 100644
index 0000000..c16ff10
--- /dev/null
+++ b/src/contrib/Memory/MemoryDocsAndPositionsEnum.cs
@@ -0,0 +1,128 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Index.Memory
+{
+    public partial class MemoryIndex
+    {
+        private class MemoryDocsAndPositionsEnum : DocsAndPositionsEnum
+        {
+            private int posUpto; // for assert
+            private bool hasNext;
+            private IBits liveDocs;
+            private int doc = -1;
+            private IntBlockPool.SliceReader sliceReader;
+            private int freq;
+            private int startOffset;
+            private int endOffset;
+
+            private readonly MemoryIndex index;
+
+            public MemoryDocsAndPositionsEnum(MemoryIndex index)
+            {
+                this.index = index; // .NET: needed for storeOffsets access
+                this.sliceReader = new IntBlockPool.SliceReader(index.intBlockPool);
+            }
+
+            public DocsAndPositionsEnum Reset(IBits liveDocs, int start, int end, int freq)
+            {
+                this.liveDocs = liveDocs;
+                this.sliceReader.Reset(start, end);
+                posUpto = 0; // for assert
+                hasNext = true;
+                doc = -1;
+                this.freq = freq;
+                return this;
+            }
+
+            public override int DocID
+            {
+                get { return doc; }
+            }
+
+            public override int NextDoc()
+            {
+                if (hasNext && (liveDocs == null || liveDocs[0]))
+                {
+                    hasNext = false;
+                    return doc = 0;
+                }
+                else
+                {
+                    return doc = NO_MORE_DOCS;
+                }
+            }
+
+            public override int Advance(int target)
+            {
+                return SlowAdvance(target);
+            }
+
+            public override int Freq
+            {
+                get { return freq; }
+            }
+
+            public override int NextPosition()
+            {
+                //assert posUpto++ < freq;
+                //assert !sliceReader.endOfSlice() : " stores offsets : " + startOffset;
+                if (index.storeOffsets)
+                {
+                    int pos = sliceReader.ReadInt();
+                    startOffset = sliceReader.ReadInt();
+                    endOffset = sliceReader.ReadInt();
+                    return pos;
+                }
+                else
+                {
+                    return sliceReader.ReadInt();
+                }
+            }
+
+            public override int StartOffset
+            {
+                get { return startOffset; }
+            }
+
+            public override int EndOffset
+            {
+                get { return endOffset; }
+            }
+
+            public override BytesRef Payload
+            {
+                get { return null; }
+            }
+
+            public override long Cost
+            {
+                get { return 1; }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Memory/MemoryDocsEnum.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Memory/MemoryDocsEnum.cs b/src/contrib/Memory/MemoryDocsEnum.cs
new file mode 100644
index 0000000..e3ed27d
--- /dev/null
+++ b/src/contrib/Memory/MemoryDocsEnum.cs
@@ -0,0 +1,82 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+*/
+
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Index.Memory
+{
+    public partial class MemoryIndex
+    {
+        private class MemoryDocsEnum : DocsEnum
+        {
+            private bool hasNext;
+            private IBits liveDocs;
+            private int doc = -1;
+            private int freq;
+
+            public DocsEnum Reset(IBits liveDocs, int freq)
+            {
+                this.liveDocs = liveDocs;
+                hasNext = true;
+                doc = -1;
+                this.freq = freq;
+                return this;
+            }
+
+            public override int DocID
+            {
+                get { return doc; }
+            }
+
+            public override int NextDoc()
+            {
+                if (hasNext && (liveDocs == null || liveDocs[0]))
+                {
+                    hasNext = false;
+                    return doc = 0;
+                }
+                else
+                {
+                    return doc = NO_MORE_DOCS;
+                }
+            }
+
+            public override int Advance(int target)
+            {
+                return SlowAdvance(target);
+            }
+
+            public override int Freq
+            {
+                get { return freq; }
+            }
+
+            public override long Cost
+            {
+                get { return 1; }
+            }
+        }
+    }
+}


Mime
View raw message