lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From paulir...@apache.org
Subject [18/53] [abbrv] git commit: Finish Memory and VectorHighlighter
Date Thu, 07 Nov 2013 13:53:33 GMT
Finish Memory and VectorHighlighter


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/86d417ed
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/86d417ed
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/86d417ed

Branch: refs/heads/branch_4x
Commit: 86d417ed4a456a1381edc50478d9b33b5dc6e72c
Parents: 45ba8d8
Author: Paul Irwin <paulirwin@gmail.com>
Authored: Mon Oct 28 21:42:44 2013 -0400
Committer: Paul Irwin <paulirwin@gmail.com>
Committed: Mon Oct 28 21:42:44 2013 -0400

----------------------------------------------------------------------
 build/vs2012/Lucene.Net.All/Lucene.Net.All.sln  |  11 -
 .../Highlighter/Contrib.Highlighter.csproj      |  76 +-
 .../Highlighter/Highlight/Highlighter.cs        |  16 +-
 .../Highlight/SimpleSpanFragmenter.cs           |   4 +-
 .../Highlight/SpanGradientFormatter.cs          |   2 +-
 .../Highlighter/Highlight/TextFragment.cs       |   2 +-
 src/contrib/Highlighter/Highlight/TokenGroup.cs |   8 +-
 .../Highlight/WeightedSpanTermExtractor.cs      |   7 +-
 .../VectorHighlight/BaseFragListBuilder.cs      | 131 +++
 .../VectorHighlight/BaseFragmentsBuilder.cs     | 323 +++++--
 .../BreakIteratorBoundaryScanner.cs             |  35 +
 .../VectorHighlight/FastVectorHighlighter.cs    |  79 +-
 .../VectorHighlight/FieldFragList.cs            |  87 +-
 .../VectorHighlight/FieldPhraseList.cs          |  76 +-
 .../Highlighter/VectorHighlight/FieldQuery.cs   | 220 ++---
 .../VectorHighlight/FieldTermStack.cs           | 188 ++--
 .../VectorHighlight/FragListBuilder.cs          |  39 -
 .../VectorHighlight/FragmentsBuilder.cs         |  56 --
 .../VectorHighlight/IBoundaryScanner.cs         |  13 +
 .../VectorHighlight/IFragListBuilder.cs         |  39 +
 .../VectorHighlight/IFragmentsBuilder.cs        |  62 ++
 .../ScoreOrderFragmentsBuilder.cs               |  60 +-
 .../VectorHighlight/SimpleBoundaryScanner.cs    |  78 ++
 .../VectorHighlight/SimpleFieldFragList.cs      |  28 +
 .../VectorHighlight/SimpleFragListBuilder.cs    |  72 +-
 .../VectorHighlight/SimpleFragmentsBuilder.cs   |  22 +-
 .../VectorHighlight/SingleFragListBuilder.cs    |  31 +
 .../Highlighter/VectorHighlight/StringUtils.cs  |  56 --
 .../Highlighter/VectorHighlight/Support.cs      |  73 --
 .../VectorHighlight/VectorHighlightMapper.cs    | 134 ---
 .../VectorHighlight/WeightedFieldFragList.cs    |  36 +
 .../VectorHighlight/WeightedFragListBuilder.cs  |  11 +
 src/contrib/Memory/CollectionsHelper.cs         | 105 ---
 src/contrib/Memory/Contrib.Memory.csproj        |  11 +-
 src/contrib/Memory/EmptyCollector.cs            |  65 --
 src/contrib/Memory/FillingCollector.cs          |  65 ++
 src/contrib/Memory/KeywordTokenStream.cs        |   8 +-
 .../Memory/MemoryDocsAndPositionsEnum.cs        | 128 +++
 src/contrib/Memory/MemoryDocsEnum.cs            |  82 ++
 src/contrib/Memory/MemoryIndex.cs               | 866 +++++++------------
 src/contrib/Memory/MemoryIndexNormDocValues.cs  |  45 +
 src/contrib/Memory/MemoryTermEnum.cs            | 105 ---
 src/contrib/Memory/MemoryTermPositionVector.cs  | 116 ---
 src/contrib/Memory/MemoryTermPositions.cs       | 151 ----
 src/contrib/Memory/MemoryTermsEnum.cs           | 189 ++++
 src/contrib/Memory/SliceByteStartArray.cs       |  77 ++
 src/contrib/Memory/TermComparer.cs              |  10 +-
 src/core/Support/Collections.cs                 |   2 +-
 48 files changed, 2193 insertions(+), 1907 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln
----------------------------------------------------------------------
diff --git a/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln b/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln
index dfa5f05..c413839 100644
--- a/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln
+++ b/build/vs2012/Lucene.Net.All/Lucene.Net.All.sln
@@ -11,8 +11,6 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Contrib.Analyzers", "..\..\
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Contrib.Core", "..\..\..\src\contrib\Core\Contrib.Core.csproj", "{FEF899EB-610C-4D3C-A556-A01F56F4AFE0}"
 EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Contrib.FastVectorHighlighter", "..\..\..\src\contrib\FastVectorHighlighter\Contrib.FastVectorHighlighter.csproj", "{9D2E3153-076F-49C5-B83D-FB2573536B5F}"
-EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Contrib.Highlighter", "..\..\..\src\contrib\Highlighter\Contrib.Highlighter.csproj", "{901D5415-383C-4AA6-A256-879558841BEA}"
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Contrib.Memory", "..\..\..\src\contrib\Memory\Contrib.Memory.csproj", "{112B9A7C-29CC-4539-8F5A-45669C07CD4D}"
@@ -73,14 +71,6 @@ Global
 		{FEF899EB-610C-4D3C-A556-A01F56F4AFE0}.Release|Any CPU.Build.0 = Release|Any CPU
 		{FEF899EB-610C-4D3C-A556-A01F56F4AFE0}.Release35|Any CPU.ActiveCfg = Release35|Any CPU
 		{FEF899EB-610C-4D3C-A556-A01F56F4AFE0}.Release35|Any CPU.Build.0 = Release35|Any CPU
-		{9D2E3153-076F-49C5-B83D-FB2573536B5F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{9D2E3153-076F-49C5-B83D-FB2573536B5F}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{9D2E3153-076F-49C5-B83D-FB2573536B5F}.Debug35|Any CPU.ActiveCfg = Debug35|Any CPU
-		{9D2E3153-076F-49C5-B83D-FB2573536B5F}.Debug35|Any CPU.Build.0 = Debug35|Any CPU
-		{9D2E3153-076F-49C5-B83D-FB2573536B5F}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{9D2E3153-076F-49C5-B83D-FB2573536B5F}.Release|Any CPU.Build.0 = Release|Any CPU
-		{9D2E3153-076F-49C5-B83D-FB2573536B5F}.Release35|Any CPU.ActiveCfg = Release35|Any CPU
-		{9D2E3153-076F-49C5-B83D-FB2573536B5F}.Release35|Any CPU.Build.0 = Release35|Any CPU
 		{901D5415-383C-4AA6-A256-879558841BEA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{901D5415-383C-4AA6-A256-879558841BEA}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{901D5415-383C-4AA6-A256-879558841BEA}.Debug35|Any CPU.ActiveCfg = Debug35|Any CPU
@@ -201,7 +191,6 @@ Global
 		{5D4AD9BE-1FFB-41AB-9943-25737971BF57} = {6CCE76E6-1EAC-4E67-9CD3-0ACA3197327E}
 		{4286E961-9143-4821-B46D-3D39D3736386} = {7E19085A-545B-4DE8-BBF5-B1DBC370FD37}
 		{FEF899EB-610C-4D3C-A556-A01F56F4AFE0} = {7E19085A-545B-4DE8-BBF5-B1DBC370FD37}
-		{9D2E3153-076F-49C5-B83D-FB2573536B5F} = {7E19085A-545B-4DE8-BBF5-B1DBC370FD37}
 		{901D5415-383C-4AA6-A256-879558841BEA} = {7E19085A-545B-4DE8-BBF5-B1DBC370FD37}
 		{112B9A7C-29CC-4539-8F5A-45669C07CD4D} = {7E19085A-545B-4DE8-BBF5-B1DBC370FD37}
 		{481CF6E3-52AF-4621-9DEB-022122079AF6} = {7E19085A-545B-4DE8-BBF5-B1DBC370FD37}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/Contrib.Highlighter.csproj
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/Contrib.Highlighter.csproj b/src/contrib/Highlighter/Contrib.Highlighter.csproj
index 9e18743..51861c1 100644
--- a/src/contrib/Highlighter/Contrib.Highlighter.csproj
+++ b/src/contrib/Highlighter/Contrib.Highlighter.csproj
@@ -173,66 +173,87 @@
     <Compile Include="AssemblyInfo.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="DefaultEncoder.cs">
+    <Compile Include="Highlight\DefaultEncoder.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="IEncoder.cs">
+    <Compile Include="Highlight\IEncoder.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="IFormatter.cs">
+    <Compile Include="Highlight\IFormatter.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="IFragmenter.cs">
+    <Compile Include="Highlight\IFragmenter.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="GradientFormatter.cs">
+    <Compile Include="Highlight\GradientFormatter.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="Highlighter.cs">
+    <Compile Include="Highlight\Highlighter.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="InvalidTokenOffsetsException.cs" />
-    <Compile Include="NullFragmenter.cs">
+    <Compile Include="Highlight\InvalidTokenOffsetsException.cs" />
+    <Compile Include="Highlight\NullFragmenter.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="QueryScorer.cs">
+    <Compile Include="Highlight\OffsetLimitTokenFilter.cs" />
+    <Compile Include="Highlight\PositionSpan.cs" />
+    <Compile Include="Highlight\QueryScorer.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="QueryTermExtractor.cs">
+    <Compile Include="Highlight\QueryTermExtractor.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="QueryTermScorer.cs" />
-    <Compile Include="IScorer.cs">
+    <Compile Include="Highlight\QueryTermScorer.cs" />
+    <Compile Include="Highlight\IScorer.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="SimpleFragmenter.cs">
+    <Compile Include="Highlight\SimpleFragmenter.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="SimpleHTMLEncoder.cs">
+    <Compile Include="Highlight\SimpleHTMLEncoder.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="SimpleHTMLFormatter.cs">
+    <Compile Include="Highlight\SimpleHTMLFormatter.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="SimpleSpanFragmenter.cs" />
-    <Compile Include="SpanGradientFormatter.cs">
+    <Compile Include="Highlight\SimpleSpanFragmenter.cs" />
+    <Compile Include="Highlight\SpanGradientFormatter.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="TextFragment.cs">
+    <Compile Include="Highlight\TextFragment.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="TokenGroup.cs">
+    <Compile Include="Highlight\TokenGroup.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="TokenSources.cs">
+    <Compile Include="Highlight\TokenSources.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Compile Include="WeightedSpanTerm.cs" />
-    <Compile Include="WeightedSpanTermExtractor.cs" />
-    <Compile Include="WeightedTerm.cs">
+    <Compile Include="Highlight\TokenStreamFromTermPositionVector.cs" />
+    <Compile Include="Highlight\WeightedSpanTerm.cs" />
+    <Compile Include="Highlight\WeightedSpanTermExtractor.cs" />
+    <Compile Include="Highlight\WeightedTerm.cs">
       <SubType>Code</SubType>
     </Compile>
-    <Content Include="Package.html" />
+    <Compile Include="VectorHighlight\BaseFragListBuilder.cs" />
+    <Compile Include="VectorHighlight\BaseFragmentsBuilder.cs" />
+    <Compile Include="VectorHighlight\BreakIteratorBoundaryScanner.cs" />
+    <Compile Include="VectorHighlight\FastVectorHighlighter.cs" />
+    <Compile Include="VectorHighlight\FieldFragList.cs" />
+    <Compile Include="VectorHighlight\FieldPhraseList.cs" />
+    <Compile Include="VectorHighlight\FieldQuery.cs" />
+    <Compile Include="VectorHighlight\FieldTermStack.cs" />
+    <Compile Include="VectorHighlight\IBoundaryScanner.cs" />
+    <Compile Include="VectorHighlight\IFragListBuilder.cs" />
+    <Compile Include="VectorHighlight\IFragmentsBuilder.cs" />
+    <Compile Include="VectorHighlight\ScoreOrderFragmentsBuilder.cs" />
+    <Compile Include="VectorHighlight\SimpleBoundaryScanner.cs" />
+    <Compile Include="VectorHighlight\SimpleFieldFragList.cs" />
+    <Compile Include="VectorHighlight\SimpleFragListBuilder.cs" />
+    <Compile Include="VectorHighlight\SimpleFragmentsBuilder.cs" />
+    <Compile Include="VectorHighlight\SingleFragListBuilder.cs" />
+    <Compile Include="VectorHighlight\WeightedFieldFragList.cs" />
+    <Compile Include="VectorHighlight\WeightedFragListBuilder.cs" />
   </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="..\Memory\Contrib.Memory.csproj">
@@ -243,6 +264,10 @@
       <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
       <Name>Lucene.Net</Name>
     </ProjectReference>
+    <ProjectReference Include="..\Queries\Contrib.Queries.csproj">
+      <Project>{481cf6e3-52af-4621-9deb-022122079af6}</Project>
+      <Name>Contrib.Queries</Name>
+    </ProjectReference>
   </ItemGroup>
   <ItemGroup>
     <BootstrapperPackage Include=".NETFramework,Version=v4.0">
@@ -269,6 +294,9 @@
   <ItemGroup>
     <None Include="Lucene.Net.snk" />
   </ItemGroup>
+  <ItemGroup>
+    <Folder Include="PostingsHighlight\" />
+  </ItemGroup>
   <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
   <PropertyGroup>
     <PreBuildEvent>

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/Highlight/Highlighter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/Highlight/Highlighter.cs b/src/contrib/Highlighter/Highlight/Highlighter.cs
index b026671..d865f13 100644
--- a/src/contrib/Highlighter/Highlight/Highlighter.cs
+++ b/src/contrib/Highlighter/Highlight/Highlighter.cs
@@ -208,12 +208,12 @@ namespace Lucene.Net.Search.Highlight
                         throw new InvalidTokenOffsetsException("Token " + termAtt.ToString()
                                                                + " exceeds length of provided text sized " + text.Length);
                     }
-                    if ((tokenGroup.NumTokens > 0) && (tokenGroup.IsDistinct()))
+                    if ((tokenGroup.numTokens > 0) && (tokenGroup.IsDistinct()))
                     {
                         //the current token is distinct from previous tokens -
                         // markup the cached token group info
-                        startOffset = tokenGroup.MatchStartOffset;
-                        endOffset = tokenGroup.MatchEndOffset;
+                        startOffset = tokenGroup.matchStartOffset;
+                        endOffset = tokenGroup.matchEndOffset;
                         tokenText = text.Substring(startOffset, endOffset - startOffset);
                         String markedUpText = _formatter.HighlightTerm(_encoder.EncodeText(tokenText), tokenGroup);
                         //store any whitespace etc from between this and last group
@@ -228,7 +228,7 @@ namespace Lucene.Net.Search.Highlight
                         {
                             currentFrag.Score = _fragmentScorer.FragmentScore;
                             //record stats for a new fragment
-                            currentFrag.TextEndPos = newText.Length;
+                            currentFrag.textEndPos = newText.Length;
                             currentFrag = new TextFragment(newText, newText.Length, docFrags.Count);
                             _fragmentScorer.StartFragment(currentFrag);
                             docFrags.Add(currentFrag);
@@ -244,11 +244,11 @@ namespace Lucene.Net.Search.Highlight
                 }
                 currentFrag.Score = _fragmentScorer.FragmentScore;
 
-                if (tokenGroup.NumTokens > 0)
+                if (tokenGroup.numTokens > 0)
                 {
                     //flush the accumulated text (same code as in above loop)
-                    startOffset = tokenGroup.MatchStartOffset;
-                    endOffset = tokenGroup.MatchEndOffset;
+                    startOffset = tokenGroup.matchStartOffset;
+                    endOffset = tokenGroup.matchEndOffset;
                     tokenText = text.Substring(startOffset, endOffset - startOffset);
                     var markedUpText = _formatter.HighlightTerm(_encoder.EncodeText(tokenText), tokenGroup);
                     //store any whitespace etc from between this and last group
@@ -271,7 +271,7 @@ namespace Lucene.Net.Search.Highlight
                     newText.Append(_encoder.EncodeText(text.Substring(lastEndOffset)));
                 }
 
-                currentFrag.TextEndPos = newText.Length;
+                currentFrag.textEndPos = newText.Length;
 
                 //sort the most relevant sections of the text
                 foreach (var f in docFrags)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/Highlight/SimpleSpanFragmenter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/Highlight/SimpleSpanFragmenter.cs b/src/contrib/Highlighter/Highlight/SimpleSpanFragmenter.cs
index f07a850..7430484 100644
--- a/src/contrib/Highlighter/Highlight/SimpleSpanFragmenter.cs
+++ b/src/contrib/Highlighter/Highlight/SimpleSpanFragmenter.cs
@@ -78,9 +78,9 @@ namespace Lucene.Net.Search.Highlight
 
                 for (int i = 0; i < positionSpans.Count; i++)
                 {
-                    if (positionSpans[i].Start == position)
+                    if (positionSpans[i].start == position)
                     {
-                        waitForPos = positionSpans[i].End + 1;
+                        waitForPos = positionSpans[i].end + 1;
                         break;
                     }
                 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/Highlight/SpanGradientFormatter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/Highlight/SpanGradientFormatter.cs b/src/contrib/Highlighter/Highlight/SpanGradientFormatter.cs
index d0a3c3d..c716469 100644
--- a/src/contrib/Highlighter/Highlight/SpanGradientFormatter.cs
+++ b/src/contrib/Highlighter/Highlight/SpanGradientFormatter.cs
@@ -30,7 +30,7 @@ namespace Lucene.Net.Search.Highlight
     {
         // guess how much extra text we'll add to the text we're highlighting to try to avoid a  StringBuilder resize
         private const string TEMPLATE = "<span style=\"background: #EEEEEE; color: #000000;\">...</span>";
-        private const int EXTRA = TEMPLATE.Length;
+        private static readonly int EXTRA = TEMPLATE.Length;
 
         public SpanGradientFormatter(float maxScore, string minForegroundColor,
                                      string maxForegroundColor, string minBackgroundColor,

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/Highlight/TextFragment.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/Highlight/TextFragment.cs b/src/contrib/Highlighter/Highlight/TextFragment.cs
index 7915497..99ae611 100644
--- a/src/contrib/Highlighter/Highlight/TextFragment.cs
+++ b/src/contrib/Highlighter/Highlight/TextFragment.cs
@@ -28,7 +28,7 @@ namespace Lucene.Net.Search.Highlight
         private StringBuilder markedUpText;
         private int fragNum;
         private int textStartPos;
-        private int textEndPos;
+        internal int textEndPos;
         private float score;
 
         public TextFragment(StringBuilder markedUpText, int textStartPos, int fragNum)

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/Highlight/TokenGroup.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/Highlight/TokenGroup.cs b/src/contrib/Highlighter/Highlight/TokenGroup.cs
index 2c2a57c..26c2896 100644
--- a/src/contrib/Highlighter/Highlight/TokenGroup.cs
+++ b/src/contrib/Highlighter/Highlight/TokenGroup.cs
@@ -30,11 +30,11 @@ namespace Lucene.Net.Search.Highlight
 
         private Token[] tokens = new Token[MAX_NUM_TOKENS_PER_GROUP];
         private float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP];
-        private int numTokens = 0;
-        private int startOffset = 0;
-        private int endOffset = 0;
+        internal int numTokens = 0;
+        internal int startOffset = 0;
+        internal int endOffset = 0;
         private float tot;
-        private int matchStartOffset, matchEndOffset;
+        internal int matchStartOffset, matchEndOffset;
 
         private IOffsetAttribute offsetAtt;
         private ICharTermAttribute termAtt;

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/Highlight/WeightedSpanTermExtractor.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/Highlight/WeightedSpanTermExtractor.cs b/src/contrib/Highlighter/Highlight/WeightedSpanTermExtractor.cs
index ff22481..93cd51e 100644
--- a/src/contrib/Highlighter/Highlight/WeightedSpanTermExtractor.cs
+++ b/src/contrib/Highlighter/Highlight/WeightedSpanTermExtractor.cs
@@ -422,7 +422,7 @@ namespace Lucene.Net.Search.Highlight
             {
                 get
                 {
-                    return new AnonymousFilterFields();
+                    return new AnonymousFilterFields(base.Fields);
                 }
             }
 
@@ -449,6 +449,11 @@ namespace Lucene.Net.Search.Highlight
 
         private sealed class AnonymousFilterFields : FilterAtomicReader.FilterFields
         {
+            public AnonymousFilterFields(Fields instance)
+                : base(instance)
+            {
+            }
+
             public override Terms Terms(string field)
             {
                 return base.Terms(DelegatingAtomicReader.FIELD_NAME);

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/BaseFragListBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/BaseFragListBuilder.cs b/src/contrib/Highlighter/VectorHighlight/BaseFragListBuilder.cs
new file mode 100644
index 0000000..7de4e85
--- /dev/null
+++ b/src/contrib/Highlighter/VectorHighlight/BaseFragListBuilder.cs
@@ -0,0 +1,131 @@
+´╗┐using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using WeightedPhraseInfo = Lucene.Net.Search.VectorHighlight.FieldPhraseList.WeightedPhraseInfo;
+
+namespace Lucene.Net.Search.VectorHighlight
+{
+    public abstract class BaseFragListBuilder : IFragListBuilder
+    {
+        public static readonly int MARGIN_DEFAULT = 6;
+        public static readonly int MIN_FRAG_CHAR_SIZE_FACTOR = 3;
+        readonly int margin;
+        readonly int minFragCharSize;
+
+        public BaseFragListBuilder(int margin)
+        {
+            if (margin < 0)
+                throw new ArgumentException(@"margin(" + margin + @") is too small. It must be 0 or higher.");
+            this.margin = margin;
+            this.minFragCharSize = Math.Max(1, margin * MIN_FRAG_CHAR_SIZE_FACTOR);
+        }
+
+        public BaseFragListBuilder()
+            : this(MARGIN_DEFAULT)
+        {
+        }
+
+        protected virtual FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, FieldFragList fieldFragList, int fragCharSize)
+        {
+            if (fragCharSize < minFragCharSize)
+                throw new ArgumentException(@"fragCharSize(" + fragCharSize + @") is too small. It must be " + minFragCharSize + @" or higher.");
+            List<WeightedPhraseInfo> wpil = new List<WeightedPhraseInfo>();
+            IteratorQueue<WeightedPhraseInfo> queue = new IteratorQueue<WeightedPhraseInfo>(fieldPhraseList.PhraseList.GetEnumerator());
+            WeightedPhraseInfo phraseInfo = null;
+            int startOffset = 0;
+            while ((phraseInfo = queue.Top()) != null)
+            {
+                if (phraseInfo.StartOffset < startOffset)
+                {
+                    queue.RemoveTop();
+                    continue;
+                }
+
+                wpil.Clear();
+                int currentPhraseStartOffset = phraseInfo.StartOffset;
+                int currentPhraseEndOffset = phraseInfo.EndOffset;
+                int spanStart = Math.Max(currentPhraseStartOffset - margin, startOffset);
+                int spanEnd = Math.Max(currentPhraseEndOffset, spanStart + fragCharSize);
+                if (AcceptPhrase(queue.RemoveTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize))
+                {
+                    wpil.Add(phraseInfo);
+                }
+
+                while ((phraseInfo = queue.Top()) != null)
+                {
+                    if (phraseInfo.EndOffset <= spanEnd)
+                    {
+                        currentPhraseEndOffset = phraseInfo.EndOffset;
+                        if (AcceptPhrase(queue.RemoveTop(), currentPhraseEndOffset - currentPhraseStartOffset, fragCharSize))
+                        {
+                            wpil.Add(phraseInfo);
+                        }
+                    }
+                    else
+                    {
+                        break;
+                    }
+                }
+
+                if (wpil.Count == 0)
+                {
+                    continue;
+                }
+
+                int matchLen = currentPhraseEndOffset - currentPhraseStartOffset;
+                int newMargin = Math.Max(0, (fragCharSize - matchLen) / 2);
+                spanStart = currentPhraseStartOffset - newMargin;
+                if (spanStart < startOffset)
+                {
+                    spanStart = startOffset;
+                }
+
+                spanEnd = spanStart + Math.Max(matchLen, fragCharSize);
+                startOffset = spanEnd;
+                fieldFragList.Add(spanStart, spanEnd, wpil);
+            }
+
+            return fieldFragList;
+        }
+
+        protected virtual bool AcceptPhrase(WeightedPhraseInfo info, int matchLength, int fragCharSize)
+        {
+            return info.TermsOffsets.Count <= 1 || matchLength <= fragCharSize;
+        }
+
+        private sealed class IteratorQueue<T>
+            where T : class
+        {
+            private readonly IEnumerator<T> iter;
+            private T top;
+            public IteratorQueue(IEnumerator<T> iter)
+            {
+                this.iter = iter;
+                T removeTop = RemoveTop();
+            }
+
+            public T Top()
+            {
+                return top;
+            }
+
+            public T RemoveTop()
+            {
+                T currentTop = top;
+                if (iter.MoveNext())
+                {
+                    top = iter.Current;
+                }
+                else
+                {
+                    top = null;
+                }
+
+                return currentTop;
+            }
+        }
+
+        public abstract FieldFragList CreateFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize);
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/BaseFragmentsBuilder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/BaseFragmentsBuilder.cs b/src/contrib/Highlighter/VectorHighlight/BaseFragmentsBuilder.cs
index f7b2f80..200dbdf 100644
--- a/src/contrib/Highlighter/VectorHighlight/BaseFragmentsBuilder.cs
+++ b/src/contrib/Highlighter/VectorHighlight/BaseFragmentsBuilder.cs
@@ -17,19 +17,22 @@
 
 using System;
 using System.Collections.Generic;
+using System.Linq;
 using System.Text;
 
 using Lucene.Net.Documents;
 using Lucene.Net.Search;
 using Lucene.Net.Index;
 
-using WeightedFragInfo = Lucene.Net.Search.Vectorhighlight.FieldFragList.WeightedFragInfo;
-using SubInfo = Lucene.Net.Search.Vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
-using Toffs = Lucene.Net.Search.Vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
+using WeightedFragInfo = Lucene.Net.Search.VectorHighlight.FieldFragList.WeightedFragInfo;
+using SubInfo = Lucene.Net.Search.VectorHighlight.FieldFragList.WeightedFragInfo.SubInfo;
+using Toffs = Lucene.Net.Search.VectorHighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
+using Lucene.Net.Search.Highlight;
+using Lucene.Net.Support;
 
-namespace Lucene.Net.Search.Vectorhighlight
+namespace Lucene.Net.Search.VectorHighlight
 {
-    public abstract class BaseFragmentsBuilder : FragmentsBuilder
+    public abstract class BaseFragmentsBuilder : IFragmentsBuilder
     {
         protected String[] preTags, postTags;
         public static String[] COLORED_PRE_TAGS = {
@@ -43,6 +46,9 @@ namespace Lucene.Net.Search.Vectorhighlight
         };
 
         public static String[] COLORED_POST_TAGS = { "</b>" };
+        private char multiValuedSeparator = ' ';
+        private readonly IBoundaryScanner boundaryScanner;
+        private bool discreteMultiValueHighlighting = false;
 
         protected BaseFragmentsBuilder()
             : this(new String[] { "<b>" }, new String[] { "</b>" })
@@ -51,9 +57,20 @@ namespace Lucene.Net.Search.Vectorhighlight
         }
 
         protected BaseFragmentsBuilder(String[] preTags, String[] postTags)
+            : this(preTags, postTags, new SimpleBoundaryScanner())
+        {
+        }
+
+        protected BaseFragmentsBuilder(IBoundaryScanner boundaryScanner)
+            : this(new String[] { "<b>" }, new String[] { "</b>" }, boundaryScanner)
+        {
+        }
+
+        protected BaseFragmentsBuilder(String[] preTags, String[] postTags, IBoundaryScanner boundaryScanner)
         {
             this.preTags = preTags;
             this.postTags = postTags;
+            this.boundaryScanner = boundaryScanner;
         }
 
         static Object CheckTagsArgument(Object tags)
@@ -63,114 +80,139 @@ namespace Lucene.Net.Search.Vectorhighlight
             throw new ArgumentException("type of preTags/postTags must be a String or String[]");
         }
 
-        public abstract List<WeightedFragInfo> GetWeightedFragInfoList(List<WeightedFragInfo> src);
+        public abstract IList<WeightedFragInfo> GetWeightedFragInfoList(IList<WeightedFragInfo> src);
+
+        private static readonly IEncoder NULL_ENCODER = new DefaultEncoder();
 
         public virtual String CreateFragment(IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList)
         {
-            String[] fragments = CreateFragments(reader, docId, fieldName, fieldFragList, 1);
+            return CreateFragment(reader, docId, fieldName, fieldFragList, preTags, postTags, NULL_ENCODER);
+        }
+
+        public virtual String[] CreateFragments(IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList, int maxNumFragments)
+        {
+            return CreateFragments(reader, docId, fieldName, fieldFragList, maxNumFragments, preTags, postTags, NULL_ENCODER);
+        }
+
+        public String CreateFragment(IndexReader reader, int docId,
+            String fieldName, FieldFragList fieldFragList, String[] preTags, String[] postTags,
+            IEncoder encoder)
+        {
+            String[] fragments = CreateFragments(reader, docId, fieldName, fieldFragList, 1,
+                preTags, postTags, encoder);
             if (fragments == null || fragments.Length == 0) return null;
             return fragments[0];
         }
 
-        public virtual String[] CreateFragments(IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList, int maxNumFragments)
+        public String[] CreateFragments(IndexReader reader, int docId,
+            String fieldName, FieldFragList fieldFragList, int maxNumFragments,
+            String[] preTags, String[] postTags, IEncoder encoder)
         {
+
             if (maxNumFragments < 0)
+            {
                 throw new ArgumentException("maxNumFragments(" + maxNumFragments + ") must be positive number.");
+            }
 
-            List<WeightedFragInfo> fragInfos = GetWeightedFragInfoList(fieldFragList.fragInfos);
-
-            List<String> fragments = new List<String>(maxNumFragments);
+            IList<WeightedFragInfo> fragInfos = fieldFragList.FragInfos;
             Field[] values = GetFields(reader, docId, fieldName);
-            if (values.Length == 0) return null;
+            if (values.Length == 0)
+            {
+                return null;
+            }
+
+            if (discreteMultiValueHighlighting && values.Length > 1)
+            {
+                fragInfos = DiscreteMultiValueHighlighting(fragInfos, values);
+            }
+
+            fragInfos = GetWeightedFragInfoList(fragInfos);
+            int limitFragments = maxNumFragments < fragInfos.Count ? maxNumFragments : fragInfos.Count;
+            List<String> fragments = new List<String>(limitFragments);
+
             StringBuilder buffer = new StringBuilder();
             int[] nextValueIndex = { 0 };
-            for (int n = 0; n < maxNumFragments && n < fragInfos.Count; n++)
+            for (int n = 0; n < limitFragments; n++)
             {
                 WeightedFragInfo fragInfo = fragInfos[n];
-                fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo));
+                fragments.Add(MakeFragment(buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder));
             }
             return fragments.ToArray();
         }
 
-        [Obsolete]
-        protected virtual String[] GetFieldValues(IndexReader reader, int docId, String fieldName)
-        {
-            Document doc = reader.Document(docId, new MapFieldSelector(new String[] { fieldName }));
-            return doc.GetValues(fieldName); // according to Document class javadoc, this never returns null
-        }
-
         protected virtual Field[] GetFields(IndexReader reader, int docId, String fieldName)
         {
             // according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
-            Document doc = reader.Document(docId, new MapFieldSelector(new String[] { fieldName }));
-            return doc.GetFields(fieldName); // according to Document class javadoc, this never returns null
+            IList<Field> fields = new List<Field>();
+            reader.Document(docId, new AnonymousGetFieldsStoredFieldVisitor(fields, fieldName));
+            return fields.ToArray();
         }
 
-        [Obsolete]
-        protected virtual String MakeFragment(StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo)
+        private sealed class AnonymousGetFieldsStoredFieldVisitor : StoredFieldVisitor
         {
-            int s = fragInfo.startOffset;
-            return MakeFragment(fragInfo, GetFragmentSource(buffer, index, values, s, fragInfo.endOffset), s);
-        }
+            private readonly IList<Field> fields;
+            private readonly string fieldName;
 
-        protected virtual String MakeFragment(StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo)
-        {
-            int s = fragInfo.startOffset;
-            return MakeFragment(fragInfo, GetFragmentSource(buffer, index, values, s, fragInfo.endOffset), s);
-        }
+            public AnonymousGetFieldsStoredFieldVisitor(IList<Field> fields, string fieldName)
+            {
+                this.fields = fields;
+                this.fieldName = fieldName;
+            }
 
-        private String MakeFragment(WeightedFragInfo fragInfo, String src, int s)
-        {
-            StringBuilder fragment = new StringBuilder();
-            int srcIndex = 0;
-            foreach (SubInfo subInfo in fragInfo.subInfos)
+            public override void StringField(FieldInfo fieldInfo, string value)
             {
-                foreach (Toffs to in subInfo.termsOffsets)
-                {
-                    fragment.Append(src.Substring(srcIndex, to.startOffset - s - srcIndex)).Append(GetPreTag(subInfo.seqnum))
-                      .Append(src.Substring(to.startOffset - s, to.endOffset - s - (to.startOffset - s))).Append(GetPostTag(subInfo.seqnum));
-                    srcIndex = to.endOffset - s;
-                }
+                FieldType ft = new FieldType(TextField.TYPE_STORED);
+                ft.StoreTermVectors = fieldInfo.HasVectors;
+                fields.Add(new Field(fieldInfo.name, value, ft));
+            }
+
+            public override Status NeedsField(FieldInfo fieldInfo)
+            {
+                return fieldInfo.name.Equals(fieldName) ? Status.YES : Status.NO;
             }
-            fragment.Append(src.Substring(srcIndex));
-            return fragment.ToString();
         }
 
-        /*
-        [Obsolete]
-        protected String MakeFragment(StringBuilder buffer, int[] index, String[] values, WeightedFragInfo fragInfo)
+        protected String MakeFragment(StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
+            String[] preTags, String[] postTags, IEncoder encoder)
         {
             StringBuilder fragment = new StringBuilder();
-            int s = fragInfo.startOffset;
-            String src = GetFragmentSource(buffer, index, values, s, fragInfo.endOffset);
+            int s = fragInfo.StartOffset;
+            int[] modifiedStartOffset = { s };
+            String src = GetFragmentSourceMSO(buffer, index, values, s, fragInfo.EndOffset, modifiedStartOffset);
             int srcIndex = 0;
-            foreach (SubInfo subInfo in fragInfo.subInfos)
+            foreach (SubInfo subInfo in fragInfo.SubInfos)
             {
-                foreach (Toffs to in subInfo.termsOffsets)
+                foreach (Toffs to in subInfo.TermsOffsets)
                 {
-                    fragment.Append(src.Substring(srcIndex, to.startOffset - s - srcIndex)).Append(GetPreTag(subInfo.seqnum))
-                      .Append(src.Substring(to.startOffset - s, to.endOffset - s - (to.startOffset - s))).Append(GetPostTag(subInfo.seqnum));
-                    srcIndex = to.endOffset - s;
+                    fragment
+                      .Append(encoder.EncodeText(src.Substring(srcIndex, to.StartOffset - modifiedStartOffset[0] - srcIndex)))
+                      .Append(GetPreTag(preTags, subInfo.Seqnum))
+                      .Append(encoder.EncodeText(src.Substring(to.StartOffset - modifiedStartOffset[0], to.EndOffset - modifiedStartOffset[0] - to.StartOffset)))
+                      .Append(GetPostTag(postTags, subInfo.Seqnum));
+                    srcIndex = to.EndOffset - modifiedStartOffset[0];
                 }
             }
-            fragment.Append(src.Substring(srcIndex));
+            fragment.Append(encoder.EncodeText(src.Substring(srcIndex)));
             return fragment.ToString();
         }
-        */
-
 
-        [Obsolete]
-        protected virtual String GetFragmentSource(StringBuilder buffer, int[] index, String[] values, int startOffset, int endOffset)
+        protected string GetFragmentSourceMSO(StringBuilder buffer, int[] index, Field[] values,
+            int startOffset, int endOffset, int[] modifiedStartOffset)
         {
             while (buffer.Length < endOffset && index[0] < values.Length)
             {
-                buffer.Append(values[index[0]]);
-                if (values[index[0]].Length > 0 && index[0] + 1 < values.Length)
-                    buffer.Append(' ');
-                index[0]++;
+                buffer.Append(values[index[0]++].StringValue);
+                buffer.Append(MultiValuedSeparator);
             }
-            int eo = buffer.Length < endOffset ? buffer.Length : endOffset;
-            return buffer.ToString().Substring(startOffset, eo - startOffset);
+            int bufferLength = buffer.Length;
+            // we added the multi value char to the last buffer, ignore it
+            if (values[index[0] - 1].FieldTypeValue.Tokenized)
+            {
+                bufferLength--;
+            }
+            int eo = bufferLength < endOffset ? bufferLength : boundaryScanner.FindEndOffset(buffer, endOffset);
+            modifiedStartOffset[0] = boundaryScanner.FindStartOffset(buffer, startOffset);
+            return buffer.ToString().Substring(modifiedStartOffset[0], eo - modifiedStartOffset[0]);
         }
 
         protected virtual String GetFragmentSource(StringBuilder buffer, int[] index, Field[] values, int startOffset, int endOffset)
@@ -178,21 +220,162 @@ namespace Lucene.Net.Search.Vectorhighlight
             while (buffer.Length < endOffset && index[0] < values.Length)
             {
                 buffer.Append(values[index[0]].StringValue);
-                if (values[index[0]].IsTokenized && values[index[0]].StringValue.Length > 0 && index[0] + 1 < values.Length)
-                    buffer.Append(' ');
+                buffer.Append(multiValuedSeparator);
                 index[0]++;
             }
             int eo = buffer.Length < endOffset ? buffer.Length : endOffset;
             return buffer.ToString().Substring(startOffset, eo - startOffset);
         }
 
+        protected virtual List<WeightedFragInfo> DiscreteMultiValueHighlighting(IList<WeightedFragInfo> fragInfos, Field[] fields)
+        {
+            IDictionary<String, List<WeightedFragInfo>> fieldNameToFragInfos = new HashMap<String, List<WeightedFragInfo>>();
+            foreach (Field field in fields)
+            {
+                fieldNameToFragInfos[field.Name] = new List<WeightedFragInfo>();
+            }
+
+            foreach (WeightedFragInfo fragInfo in fragInfos)
+            {
+                int fieldStart;
+                int fieldEnd = 0;
+                bool shouldContinueOuter = false; // .NET port: using in place of continue-to-label
+
+                foreach (Field field in fields)
+                {
+                    if (string.IsNullOrEmpty(field.StringValue))
+                    {
+                        fieldEnd++;
+                        continue;
+                    }
+
+                    fieldStart = fieldEnd;
+                    fieldEnd += field.StringValue.Length + 1;
+                    if (fragInfo.StartOffset >= fieldStart && fragInfo.EndOffset >= fieldStart && fragInfo.StartOffset <= fieldEnd && fragInfo.EndOffset <= fieldEnd)
+                    {
+                        fieldNameToFragInfos[field.Name].Add(fragInfo);
+                        shouldContinueOuter = true;
+                        //continue;
+                        break;
+                    }
+
+                    if (fragInfo.SubInfos.Count == 0)
+                    {
+                        shouldContinueOuter = true;
+                        //continue;
+                        break;
+                    }
+
+                    Toffs firstToffs = fragInfo.SubInfos[0].TermsOffsets[0];
+                    if (fragInfo.StartOffset >= fieldEnd || firstToffs.StartOffset >= fieldEnd)
+                    {
+                        continue;
+                    }
+
+                    int fragStart = fieldStart;
+                    if (fragInfo.StartOffset > fieldStart && fragInfo.StartOffset < fieldEnd)
+                    {
+                        fragStart = fragInfo.StartOffset;
+                    }
+
+                    int fragEnd = fieldEnd;
+                    if (fragInfo.EndOffset > fieldStart && fragInfo.EndOffset < fieldEnd)
+                    {
+                        fragEnd = fragInfo.EndOffset;
+                    }
+
+                    List<SubInfo> subInfos = new List<SubInfo>();
+                    WeightedFragInfo weightedFragInfo = new WeightedFragInfo(fragStart, fragEnd, subInfos, fragInfo.TotalBoost);
+                    //IEnumerator<SubInfo> subInfoIterator = fragInfo.SubInfos.GetEnumerator();
+
+                    for (int i = 0; i < fragInfo.SubInfos.Count; i++)
+                    //while (subInfoIterator.MoveNext())
+                    {
+                        //SubInfo subInfo = subInfoIterator.Current;
+                        SubInfo subInfo = fragInfo.SubInfos[i];
+                        List<Toffs> toffsList = new List<Toffs>();
+                        //IEnumerator<Toffs> toffsIterator = subInfo.TermsOffsets.GetEnumerator();
+
+                        for (int j = 0; j < subInfo.TermsOffsets.Count; j++)
+                        //while (toffsIterator.MoveNext())
+                        {
+                            //Toffs toffs = toffsIterator.Current;
+                            Toffs toffs = subInfo.TermsOffsets[j];
+
+                            if (toffs.StartOffset >= fieldStart && toffs.EndOffset <= fieldEnd)
+                            {
+                                toffsList.Add(toffs);
+                                //toffsIterator.Remove();
+                                subInfo.TermsOffsets.RemoveAt(j--);
+                            }
+                        }
+
+                        if (toffsList.Count > 0)
+                        {
+                            subInfos.Add(new SubInfo(subInfo.Text, toffsList, subInfo.Seqnum));
+                        }
+
+                        if (subInfo.TermsOffsets.Count == 0)
+                        {
+                            fragInfo.SubInfos.RemoveAt(i--);
+                        }
+                    }
+
+                    fieldNameToFragInfos[field.Name].Add(weightedFragInfo);
+                }
+
+                // not really needed right now, but can't hurt
+                if (shouldContinueOuter)
+                    continue;
+            }
+
+            List<WeightedFragInfo> result = new List<WeightedFragInfo>();
+            foreach (List<WeightedFragInfo> weightedFragInfos in fieldNameToFragInfos.Values)
+            {
+                result.AddRange(weightedFragInfos);
+            }
+
+            result.Sort(new AnonymousComparator());
+            return result;
+        }
+
+        private sealed class AnonymousComparator : IComparer<WeightedFragInfo>
+        {
+            public int Compare(FieldFragList.WeightedFragInfo info1, FieldFragList.WeightedFragInfo info2)
+            {
+                return info1.StartOffset - info2.StartOffset;
+            }
+        }
+
+        public char MultiValuedSeparator
+        {
+            get { return multiValuedSeparator; }
+            set { multiValuedSeparator = value; }
+        }
+
+        public bool IsDiscreteMultiValueHighlighting
+        {
+            get { return discreteMultiValueHighlighting; }
+            set { discreteMultiValueHighlighting = value; }
+        }
+
         protected virtual String GetPreTag(int num)
         {
+            return GetPreTag(preTags, num);
+        }
+
+        protected virtual String GetPostTag(int num)
+        {
+            return GetPostTag(postTags, num);
+        }
+
+        protected virtual String GetPreTag(String[] preTags, int num)
+        {
             int n = num % preTags.Length;
             return preTags[n];
         }
 
-        protected virtual String GetPostTag(int num)
+        protected virtual String GetPostTag(String[] postTags, int num)
         {
             int n = num % postTags.Length;
             return postTags[n];

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs b/src/contrib/Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs
new file mode 100644
index 0000000..b023452
--- /dev/null
+++ b/src/contrib/Highlighter/VectorHighlight/BreakIteratorBoundaryScanner.cs
@@ -0,0 +1,35 @@
+´╗┐using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Search.VectorHighlight
+{
+    // .NET: without re-implementing BreakIterator from scratch, we can't use this type.
+    //public class BreakIteratorBoundaryScanner : IBoundaryScanner
+    //{
+    //    readonly BreakIterator bi;
+
+    //    public BreakIteratorBoundaryScanner(BreakIterator bi)
+    //    {
+    //        this.bi = bi;
+    //    }
+
+    //    public override int FindStartOffset(StringBuilder buffer, int start)
+    //    {
+    //        if (start > buffer.Length || start < 1)
+    //            return start;
+    //        bi.SetText(buffer.ToString().Substring(0, start));
+    //        bi.Last();
+    //        return bi.Previous();
+    //    }
+
+    //    public override int FindEndOffset(StringBuilder buffer, int start)
+    //    {
+    //        if (start > buffer.Length || start < 0)
+    //            return start;
+    //        bi.SetText(buffer.ToString().Substring(start));
+    //        return bi.Next() + start;
+    //    }
+    //}
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/FastVectorHighlighter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/FastVectorHighlighter.cs b/src/contrib/Highlighter/VectorHighlight/FastVectorHighlighter.cs
index 3f599f5..d051907 100644
--- a/src/contrib/Highlighter/VectorHighlight/FastVectorHighlighter.cs
+++ b/src/contrib/Highlighter/VectorHighlight/FastVectorHighlighter.cs
@@ -20,24 +20,25 @@ using System.Collections.Generic;
 using System.Text;
 
 using Lucene.Net.Index;
+using Lucene.Net.Search.Highlight;
 
-namespace Lucene.Net.Search.Vectorhighlight
+namespace Lucene.Net.Search.VectorHighlight
 {
     public class FastVectorHighlighter
     {
-
-        public static bool DEFAULT_PHRASE_HIGHLIGHT = true;
-        public static bool DEFAULT_FIELD_MATCH = true;
-        private bool phraseHighlight;
-        private bool fieldMatch;
-        private FragListBuilder fragListBuilder;
-        private FragmentsBuilder fragmentsBuilder;
+        public const bool DEFAULT_PHRASE_HIGHLIGHT = true;
+        public const bool DEFAULT_FIELD_MATCH = true;
+        private readonly bool phraseHighlight;
+        private readonly bool fieldMatch;
+        private readonly IFragListBuilder fragListBuilder;
+        private readonly IFragmentsBuilder fragmentsBuilder;
         private int phraseLimit = Int32.MaxValue;
 
         /// <summary>
         /// the default constructor.
         /// </summary>
-        public FastVectorHighlighter():this(DEFAULT_PHRASE_HIGHLIGHT, DEFAULT_FIELD_MATCH)
+        public FastVectorHighlighter()
+            : this(DEFAULT_PHRASE_HIGHLIGHT, DEFAULT_FIELD_MATCH)
         {
         }
 
@@ -46,7 +47,8 @@ namespace Lucene.Net.Search.Vectorhighlight
         /// </summary>
         /// <param name="phraseHighlight">true or false for phrase highlighting</param>
         /// <param name="fieldMatch">true of false for field matching</param>
-        public FastVectorHighlighter(bool phraseHighlight, bool fieldMatch):this(phraseHighlight, fieldMatch, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder())
+        public FastVectorHighlighter(bool phraseHighlight, bool fieldMatch)
+            : this(phraseHighlight, fieldMatch, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder())
         {
         }
 
@@ -58,7 +60,7 @@ namespace Lucene.Net.Search.Vectorhighlight
         /// <param name="fragListBuilder">an instance of FragListBuilder</param>
         /// <param name="fragmentsBuilder">an instance of FragmentsBuilder</param>
         public FastVectorHighlighter(bool phraseHighlight, bool fieldMatch,
-            FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder)
+            IFragListBuilder fragListBuilder, IFragmentsBuilder fragmentsBuilder)
         {
             this.phraseHighlight = phraseHighlight;
             this.fieldMatch = fieldMatch;
@@ -73,9 +75,13 @@ namespace Lucene.Net.Search.Vectorhighlight
         /// <returns>the created FieldQuery object</returns>
         public FieldQuery GetFieldQuery(Query query)
         {
-            return new FieldQuery(query, phraseHighlight, fieldMatch);
+            return new FieldQuery(query, null, phraseHighlight, fieldMatch);
         }
 
+        public FieldQuery GetFieldQuery(Query query, IndexReader reader)
+        {
+            return new FieldQuery(query, reader, phraseHighlight, fieldMatch);
+        }
 
         /// <summary>
         /// return the best fragment.
@@ -89,7 +95,7 @@ namespace Lucene.Net.Search.Vectorhighlight
         public String GetBestFragment(FieldQuery fieldQuery, IndexReader reader, int docId,
             String fieldName, int fragCharSize)
         {
-            FieldFragList fieldFragList = GetFieldFragList(fieldQuery, reader, docId, fieldName, fragCharSize);
+            FieldFragList fieldFragList = GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize);
             return fragmentsBuilder.CreateFragment(reader, docId, fieldName, fieldFragList);
         }
 
@@ -106,11 +112,32 @@ namespace Lucene.Net.Search.Vectorhighlight
         public String[] GetBestFragments(FieldQuery fieldQuery, IndexReader reader, int docId,
             String fieldName, int fragCharSize, int maxNumFragments)
         {
-            FieldFragList fieldFragList = GetFieldFragList(fieldQuery, reader, docId, fieldName, fragCharSize);
+            FieldFragList fieldFragList = GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize);
             return fragmentsBuilder.CreateFragments(reader, docId, fieldName, fieldFragList, maxNumFragments);
         }
 
-        private FieldFragList GetFieldFragList(FieldQuery fieldQuery, IndexReader reader, int docId,
+        public String GetBestFragment(FieldQuery fieldQuery, IndexReader reader, int docId,
+            String fieldName, int fragCharSize,
+            IFragListBuilder fragListBuilder, IFragmentsBuilder fragmentsBuilder,
+            String[] preTags, String[] postTags, IEncoder encoder)
+        {
+            FieldFragList fieldFragList = GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize);
+            return fragmentsBuilder.CreateFragment(reader, docId, fieldName, fieldFragList, preTags, postTags, encoder);
+        }
+
+        public String[] GetBestFragments(FieldQuery fieldQuery, IndexReader reader, int docId,
+            String fieldName, int fragCharSize, int maxNumFragments,
+            IFragListBuilder fragListBuilder, IFragmentsBuilder fragmentsBuilder,
+            String[] preTags, String[] postTags, IEncoder encoder)
+        {
+            FieldFragList fieldFragList =
+              GetFieldFragList(fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize);
+            return fragmentsBuilder.CreateFragments(reader, docId, fieldName, fieldFragList, maxNumFragments,
+                preTags, postTags, encoder);
+        }
+
+        private FieldFragList GetFieldFragList(IFragListBuilder fragListBuilder, 
+            FieldQuery fieldQuery, IndexReader reader, int docId,
             String fieldName, int fragCharSize)
         {
             FieldTermStack fieldTermStack = new FieldTermStack(reader, docId, fieldName, fieldQuery);
@@ -122,29 +149,35 @@ namespace Lucene.Net.Search.Vectorhighlight
         /// return whether phraseHighlight or not.
         /// </summary>
         /// <returns>return whether phraseHighlight or not.</returns>
-        public bool IsPhraseHighlight()
+        public bool IsPhraseHighlight
         {
-            return phraseHighlight;
+            get
+            {
+                return phraseHighlight;
+            }
         }
 
         /// <summary>
         /// return whether fieldMatch or not.
         /// </summary>
         /// <returns>return whether fieldMatch or not.</returns>
-        public bool IsFieldMatch()
+        public bool IsFieldMatch
         {
-            return fieldMatch;
+            get
+            {
+                return fieldMatch;
+            }
         }
-                                
+
         /// <summary>
         /// The maximum number of phrases to analyze when searching for the highest-scoring phrase.
         /// The default is 5000.  To ensure that all phrases are analyzed, use a negative number or Integer.MAX_VALUE.
         /// </summary>
-        
+
         public int PhraseLimit
         {
-            get{ return phraseLimit; }
-            set{ this.phraseLimit = value; }
+            get { return phraseLimit; }
+            set { this.phraseLimit = value; }
         }
     }
 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/FieldFragList.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/FieldFragList.cs b/src/contrib/Highlighter/VectorHighlight/FieldFragList.cs
index 8f9a566..48c0f76 100644
--- a/src/contrib/Highlighter/VectorHighlight/FieldFragList.cs
+++ b/src/contrib/Highlighter/VectorHighlight/FieldFragList.cs
@@ -23,29 +23,26 @@ using Lucene.Net.Documents;
 using Lucene.Net.Search;
 using Lucene.Net.Index;
 
-using Toffs = Lucene.Net.Search.Vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
-using WeightedPhraseInfo = Lucene.Net.Search.Vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
+using Toffs = Lucene.Net.Search.VectorHighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
+using WeightedPhraseInfo = Lucene.Net.Search.VectorHighlight.FieldPhraseList.WeightedPhraseInfo;
 
 
-namespace Lucene.Net.Search.Vectorhighlight
+namespace Lucene.Net.Search.VectorHighlight
 {
     ///<summary>
     /// FieldFragList has a list of "frag info" that is used by FragmentsBuilder class
     /// to create fragments (snippets).
     ///</summary>
-    public class FieldFragList
+    public abstract class FieldFragList
     {
-        private int fragCharSize;
-        public List<WeightedFragInfo> fragInfos = new List<WeightedFragInfo>();
-
-        
+        private IList<WeightedFragInfo> fragInfos = new List<WeightedFragInfo>();
+                
         /// <summary>
         /// a constructor.
         /// </summary>
         /// <param name="fragCharSize">the length (number of chars) of a fragment</param>
         public FieldFragList(int fragCharSize)
         {
-            this.fragCharSize = fragCharSize;
         }
                 
         /// <summary>
@@ -54,30 +51,46 @@ namespace Lucene.Net.Search.Vectorhighlight
         /// <param name="startOffset">start offset of the fragment</param>
         /// <param name="endOffset">end offset of the fragment</param>
         /// <param name="phraseInfoList">list of WeightedPhraseInfo objects</param>
-        public void Add(int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList)
+        public abstract void Add(int startOffset, int endOffset, IList<WeightedPhraseInfo> phraseInfoList);
+
+        public IList<WeightedFragInfo> FragInfos
         {
-            fragInfos.Add(new WeightedFragInfo(startOffset, endOffset, phraseInfoList));
+            get { return fragInfos; }
         }
 
         public class WeightedFragInfo
         {
+            private IList<SubInfo> subInfos;
+            private float totalBoost;
+            private int startOffset;
+            private int endOffset;
 
-            internal List<SubInfo> subInfos;
-            internal float totalBoost;
-            internal int startOffset;
-            internal int endOffset;
-
-            public WeightedFragInfo(int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList)
+            public WeightedFragInfo(int startOffset, int endOffset, IList<SubInfo> subInfos, float totalBoost)
             {
                 this.startOffset = startOffset;
                 this.endOffset = endOffset;
-                subInfos = new List<SubInfo>();
-                foreach (WeightedPhraseInfo phraseInfo in phraseInfoList)
-                {
-                    SubInfo subInfo = new SubInfo(phraseInfo.text, phraseInfo.termsOffsets, phraseInfo.seqnum);
-                    subInfos.Add(subInfo);
-                    totalBoost += phraseInfo.boost;
-                }
+                this.totalBoost = totalBoost;
+                this.subInfos = subInfos;
+            }
+
+            public IList<SubInfo> SubInfos
+            {
+                get { return subInfos; }
+            }
+
+            public float TotalBoost
+            {
+                get { return totalBoost; }
+            }
+
+            public int StartOffset
+            {
+                get { return startOffset; }
+            }
+
+            public int EndOffset
+            {
+                get { return endOffset; }
             }
 
             public override string ToString()
@@ -90,19 +103,35 @@ namespace Lucene.Net.Search.Vectorhighlight
                 return sb.ToString();
             }
 
-            internal class SubInfo
+            public class SubInfo
             {
-                internal String text;  // unnecessary member, just exists for debugging purpose
-                internal List<Toffs> termsOffsets;   // usually termsOffsets.size() == 1,
+                private readonly String text;  // unnecessary member, just exists for debugging purpose
+                private readonly IList<Toffs> termsOffsets;   // usually termsOffsets.size() == 1,
                 // but if position-gap > 1 and slop > 0 then size() could be greater than 1
-                internal int seqnum;
-                internal SubInfo(String text, List<Toffs> termsOffsets, int seqnum)
+                private int seqnum;
+
+                public SubInfo(String text, IList<Toffs> termsOffsets, int seqnum)
                 {
                     this.text = text;
                     this.termsOffsets = termsOffsets;
                     this.seqnum = seqnum;
                 }
 
+                public IList<Toffs> TermsOffsets
+                {
+                    get { return termsOffsets; }
+                }
+
+                public int Seqnum
+                {
+                    get { return seqnum; }
+                }
+
+                public string Text
+                {
+                    get { return text; }
+                }
+
                 public override string ToString()
                 {
                     StringBuilder sb = new StringBuilder();

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/FieldPhraseList.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/FieldPhraseList.cs b/src/contrib/Highlighter/VectorHighlight/FieldPhraseList.cs
index 2ec3a60..07b7be8 100644
--- a/src/contrib/Highlighter/VectorHighlight/FieldPhraseList.cs
+++ b/src/contrib/Highlighter/VectorHighlight/FieldPhraseList.cs
@@ -23,10 +23,10 @@ using Lucene.Net.Documents;
 using Lucene.Net.Search;
 using Lucene.Net.Index;
 
-using TermInfo = Lucene.Net.Search.Vectorhighlight.FieldTermStack.TermInfo;
-using QueryPhraseMap = Lucene.Net.Search.Vectorhighlight.FieldQuery.QueryPhraseMap;
+using TermInfo = Lucene.Net.Search.VectorHighlight.FieldTermStack.TermInfo;
+using QueryPhraseMap = Lucene.Net.Search.VectorHighlight.FieldQuery.QueryPhraseMap;
 
-namespace Lucene.Net.Search.Vectorhighlight
+namespace Lucene.Net.Search.VectorHighlight
 {
     /// <summary>
     /// FieldPhraseList has a list of WeightedPhraseInfo that is used by FragListBuilder
@@ -41,10 +41,15 @@ namespace Lucene.Net.Search.Vectorhighlight
         /// <param name="fieldQuery">FieldTermStack object</param>
         /// <param name="fieldTermStack">FieldQuery object</param>
         /// </summary>
-        public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery) : this(fieldTermStack, fieldQuery, Int32.MaxValue)
+        public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery) 
+            : this(fieldTermStack, fieldQuery, Int32.MaxValue)
         {
         }
-  
+
+        public LinkedList<WeightedPhraseInfo> PhraseList
+        {
+            get { return phraseList; }
+        }
 
         /// <summary>
         /// a constructor. 
@@ -116,7 +121,13 @@ namespace Lucene.Net.Search.Vectorhighlight
         {
             foreach (WeightedPhraseInfo existWpi in phraseList)
             {
-                if (existWpi.IsOffsetOverlap(wpi)) return;
+                if (existWpi.IsOffsetOverlap(wpi))
+                {
+                    // WeightedPhraseInfo.addIfNoOverlap() dumps the second part of, for example, hyphenated words (social-economics). 
+                    // The result is that all informations in TermInfo are lost and not available for further operations. 
+                    existWpi.TermsInfos.AddRange(wpi.TermsInfos);
+                    return;
+                }
             }
             phraseList.AddLast(wpi);
         }
@@ -129,15 +140,42 @@ namespace Lucene.Net.Search.Vectorhighlight
             // but if position-gap > 1 and slop > 0 then size() could be greater than 1
             internal float boost;  // query boost
             internal int seqnum;
+            
+            private List<TermInfo> termsInfos;
+
+            public string Text
+            {
+                get { return text; }
+            }
+
+            public List<Toffs> TermsOffsets
+            {
+                get { return termsOffsets; }
+            }
+
+            public float Boost
+            {
+                get { return boost; }
+            }
 
-            public WeightedPhraseInfo(LinkedList<TermInfo> terms, float boost):  this(terms, boost, 0)
+            public List<TermInfo> TermsInfos
             {
+                get { return termsInfos; }
             }
 
-            public WeightedPhraseInfo(LinkedList<TermInfo> terms, float boost, int number)
+            public WeightedPhraseInfo(LinkedList<TermInfo> terms, float boost)
+                : this(terms, boost, 0)
+            {
+            }
+
+            public WeightedPhraseInfo(LinkedList<TermInfo> terms, float boost, int seqnum)
             {
                 this.boost = boost;
-                this.seqnum = number;
+                this.seqnum = seqnum;
+
+                // We keep TermInfos for further operations
+                termsInfos = new List<TermInfo>(terms);
+
                 termsOffsets = new List<Toffs>(terms.Count);
                 TermInfo ti = terms.First.Value;
                 termsOffsets.Add(new Toffs(ti.StartOffset, ti.EndOffset));
@@ -161,7 +199,7 @@ namespace Lucene.Net.Search.Vectorhighlight
                     if (ti.Position - pos == 1)
                     {
                         Toffs to = termsOffsets[termsOffsets.Count - 1];
-                        to.SetEndOffset(ti.EndOffset);
+                        to.EndOffset = ti.EndOffset;
                     }
                     else
                     {
@@ -208,19 +246,33 @@ namespace Lucene.Net.Search.Vectorhighlight
                 return sb.ToString();
             }
 
+            public int Seqnum
+            {
+                get { return seqnum; }
+            }
+
             public class Toffs
             {
                 internal int startOffset;
                 internal int endOffset;
+
                 public Toffs(int startOffset, int endOffset)
                 {
                     this.startOffset = startOffset;
                     this.endOffset = endOffset;
                 }
-                internal void SetEndOffset(int endOffset)
+
+                public int StartOffset
                 {
-                    this.endOffset = endOffset;
+                    get { return startOffset; }
                 }
+
+                public int EndOffset
+                {
+                    get { return endOffset; }
+                    set { endOffset = value; }
+                }
+
                 public override string ToString()
                 {
                     StringBuilder sb = new StringBuilder();

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/86d417ed/src/contrib/Highlighter/VectorHighlight/FieldQuery.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Highlighter/VectorHighlight/FieldQuery.cs b/src/contrib/Highlighter/VectorHighlight/FieldQuery.cs
index 9d5d198..09017b8 100644
--- a/src/contrib/Highlighter/VectorHighlight/FieldQuery.cs
+++ b/src/contrib/Highlighter/VectorHighlight/FieldQuery.cs
@@ -22,9 +22,10 @@ using System.Text;
 using Lucene.Net.Search;
 using Lucene.Net.Index;
 using Lucene.Net.Support.Compatibility;
-using TermInfo = Lucene.Net.Search.Vectorhighlight.FieldTermStack.TermInfo;
+using TermInfo = Lucene.Net.Search.VectorHighlight.FieldTermStack.TermInfo;
+using Lucene.Net.Support;
 
-namespace Lucene.Net.Search.Vectorhighlight
+namespace Lucene.Net.Search.VectorHighlight
 {
     public class FieldQuery
     {
@@ -32,99 +33,123 @@ namespace Lucene.Net.Search.Vectorhighlight
 
         // fieldMatch==true,  Map<fieldName,QueryPhraseMap>
         // fieldMatch==false, Map<null,QueryPhraseMap>
-        public HashMap<String, QueryPhraseMap> rootMaps = new HashMap<String, QueryPhraseMap>();
+        IDictionary<String, QueryPhraseMap> rootMaps = new HashMap<String, QueryPhraseMap>();
 
         // fieldMatch==true,  Map<fieldName,setOfTermsInQueries>
         // fieldMatch==false, Map<null,setOfTermsInQueries>
-        public HashMap<String, List<String>> termSetMap = new HashMap<String, List<String>>();
+        IDictionary<String, ISet<String>> termSetMap = new HashMap<String, ISet<String>>();
 
         int termOrPhraseNumber; // used for colored tag support
 
-        public FieldQuery(Query query, bool phraseHighlight, bool fieldMatch)
+        // The maximum number of different matching terms accumulated from any one MultiTermQuery
+        private const int MAX_MTQ_TERMS = 1024;
+
+        public FieldQuery(Query query, IndexReader reader, bool phraseHighlight, bool fieldMatch)
         {
             this.fieldMatch = fieldMatch;
-            Dictionary<Query, Query> flatQueries = new Dictionary<Query, Query>();
-            flatten(query, flatQueries);
-            SaveTerms(flatQueries);
-            Dictionary<Query, Query> expandQueries = expand(flatQueries);
+            ISet<Query> flatQueries = new HashSet<Query>();
+            Flatten(query, reader, flatQueries);
+            SaveTerms(flatQueries, reader);
+            ISet<Query> expandQueries = Expand(flatQueries);
 
-            foreach (Query flatQuery in expandQueries.Keys)
+            foreach (Query flatQuery in expandQueries)
             {
-                QueryPhraseMap rootMap = getRootMap(flatQuery);
-                rootMap.Add(flatQuery);
+                QueryPhraseMap rootMap = GetRootMap(flatQuery);
+                rootMap.Add(flatQuery, reader);
                 if (!phraseHighlight && flatQuery is PhraseQuery)
                 {
                     PhraseQuery pq = (PhraseQuery)flatQuery;
                     if (pq.GetTerms().Length > 1)
                     {
                         foreach (Term term in pq.GetTerms())
-                            rootMap.AddTerm(term.Text, flatQuery.Boost);
+                            rootMap.AddTerm(term, flatQuery.Boost);
                     }
                 }
             }
         }
 
-        public void flatten(Query sourceQuery, Dictionary<Query, Query> flatQueries)
+        /** For backwards compatibility you can initialize FieldQuery without
+        * an IndexReader, which is only required to support MultiTermQuery
+        */
+        internal FieldQuery(Query query, bool phraseHighlight, bool fieldMatch)
+            : this(query, null, phraseHighlight, fieldMatch)
+        {
+        }
+
+        public void Flatten(Query sourceQuery, IndexReader reader, ISet<Query> flatQueries)
         {
             if (sourceQuery is BooleanQuery)
             {
                 BooleanQuery bq = (BooleanQuery)sourceQuery;
-                foreach (BooleanClause clause in bq.GetClauses())
+                foreach (BooleanClause clause in bq.Clauses)
                 {
                     if (!clause.IsProhibited)
-                        flatten(clause.Query, flatQueries);
+                        Flatten(clause.Query, reader, flatQueries);
                 }
             }
-            else if (sourceQuery is PrefixQuery)
-            {
-                if (!flatQueries.ContainsKey(sourceQuery))
-                    flatQueries.Add(sourceQuery, sourceQuery);
-            }
             else if (sourceQuery is DisjunctionMaxQuery)
             {
                 DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
                 foreach (Query query in dmq)
                 {
-                    flatten(query, flatQueries);
+                    Flatten(query, reader, flatQueries);
                 }
             }
             else if (sourceQuery is TermQuery)
             {
-                if (!flatQueries.ContainsKey(sourceQuery))
-                    flatQueries.Add(sourceQuery, sourceQuery);
+                if (!flatQueries.Contains(sourceQuery))
+                    flatQueries.Add(sourceQuery);
             }
             else if (sourceQuery is PhraseQuery)
             {
-                if (!flatQueries.ContainsKey(sourceQuery))
+                if (!flatQueries.Contains(sourceQuery))
                 {
                     PhraseQuery pq = (PhraseQuery)sourceQuery;
                     if (pq.GetTerms().Length > 1)
-                        flatQueries.Add(pq, pq);
+                        flatQueries.Add(pq);
                     else if (pq.GetTerms().Length == 1)
                     {
                         Query q = new TermQuery(pq.GetTerms()[0]);
-                        flatQueries.Add(q, q);
+                        flatQueries.Add(q);
                     }
                 }
             }
-            else
+            else if (sourceQuery is ConstantScoreQuery)
+            {
+                Query q = ((ConstantScoreQuery)sourceQuery).Query;
+                if (q != null)
+                {
+                    Flatten(q, reader, flatQueries);
+                }
+            }
+            else if (sourceQuery is FilteredQuery)
             {
-                // Fallback to using extracted terms
-                ISet<Term> terms = SetFactory.CreateHashSet<Term>();
-                try
+                Query q = ((FilteredQuery)sourceQuery).Query;
+                if (q != null)
                 {
-                    sourceQuery.ExtractTerms(terms);
+                    Flatten(q, reader, flatQueries);
                 }
-                catch (NotSupportedException)
-                { // thrown by default impl
-                    return; // ignore error and discard query
+            }
+            else if (reader != null)
+            {
+                Query query = sourceQuery;
+                if (sourceQuery is MultiTermQuery)
+                {
+                    MultiTermQuery copy = (MultiTermQuery)sourceQuery.Clone();
+                    copy.SetRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS));
+                    query = copy;
                 }
-
-                foreach (var term in terms)
+                Query rewritten = query.Rewrite(reader);
+                if (rewritten != query)
                 {
-                    flatten(new TermQuery(term), flatQueries);
+                    // only rewrite once and then flatten again - the rewritten query could have a speacial treatment
+                    // if this method is overwritten in a subclass.
+                    Flatten(rewritten, reader, flatQueries);
+
                 }
+                // if the query is already rewritten we discard it
             }
+            // else discard queries
         }
 
         /*
@@ -137,16 +162,16 @@ namespace Lucene.Net.Search.Vectorhighlight
          * ex2) flatQueries={a,"b c","c d"}
          *      => expandQueries={a,"b c","c d","b c d"}
          */
-        public Dictionary<Query, Query> expand(Dictionary<Query, Query> flatQueries)
+        public ISet<Query> Expand(ISet<Query> flatQueries)
         {
-            Dictionary<Query, Query> expandQueries = new Dictionary<Query, Query>();
-            foreach (Query query in new Dictionary<Query, Query>(flatQueries).Keys)
+            ISet<Query> expandQueries = new HashSet<Query>();
+            foreach (Query query in new HashSet<Query>(flatQueries))
             {
                 //Query query = i.next();
                 flatQueries.Remove(query);
-                expandQueries.Add(query, query);
+                expandQueries.Add(query);
                 if (!(query is PhraseQuery)) continue;
-                foreach (Query qj in flatQueries.Keys)
+                foreach (Query qj in flatQueries)
                 {
                     if (!(qj is PhraseQuery)) continue;
                     CheckOverlap(expandQueries, (PhraseQuery)query, (PhraseQuery)qj);
@@ -162,7 +187,7 @@ namespace Lucene.Net.Search.Vectorhighlight
          * ex2) A="b c", B="a b" => overlap; expandQueries={"a b c"}
          * ex3) A="a b", B="c d" => no overlap; expandQueries={}
          */
-        private void CheckOverlap(Dictionary<Query, Query> expandQueries, PhraseQuery a, PhraseQuery b)
+        private void CheckOverlap(ISet<Query> expandQueries, PhraseQuery a, PhraseQuery b)
         {
             if (a.Slop != b.Slop) return;
             Term[] ats = a.GetTerms();
@@ -185,7 +210,7 @@ namespace Lucene.Net.Search.Vectorhighlight
          *                                     expandQueries={"a a a a a","a a a a a a"}
          * ex8) src="a b c d", dest="b c"   => no overlap
          */
-        private void CheckOverlap(Dictionary<Query, Query> expandQueries, Term[] src, Term[] dest, int slop, float boost)
+        private void CheckOverlap(ISet<Query> expandQueries, Term[] src, Term[] dest, int slop, float boost)
         {
             // beginning from 1 (not 0) is safe because that the PhraseQuery has multiple terms
             // is guaranteed in flatten() method (if PhraseQuery has only one term, flatten()
@@ -212,20 +237,20 @@ namespace Lucene.Net.Search.Vectorhighlight
                     }
                     pq.Slop = slop;
                     pq.Boost = boost;
-                    if (!expandQueries.ContainsKey(pq))
-                        expandQueries.Add(pq, pq);
+                    if (!expandQueries.Contains(pq))
+                        expandQueries.Add(pq);
                 }
             }
         }
 
-        public QueryPhraseMap getRootMap(Query query)
+        internal QueryPhraseMap GetRootMap(Query query)
         {
             String key = GetKey(query);
-            QueryPhraseMap map = rootMaps.Get(key);
+            QueryPhraseMap map = rootMaps[key];
             if (map == null)
             {
                 map = new QueryPhraseMap(this);
-                rootMaps.Put(key, map);
+                rootMaps[key] = map;
             }
             return map;
         }
@@ -239,18 +264,18 @@ namespace Lucene.Net.Search.Vectorhighlight
             if (!fieldMatch) return null;
             if (query is TermQuery)
                 return ((TermQuery)query).Term.Field;
-
-            if (query is PrefixQuery)
-                return ((PrefixQuery)query).Prefix.Field;
-
-            if (query is PhraseQuery)
+            else if (query is PhraseQuery)
             {
                 PhraseQuery pq = (PhraseQuery)query;
                 Term[] terms = pq.GetTerms();
                 return terms[0].Field;
             }
-
-            throw new ApplicationException("query \"" + query + "\" must be flatten first.");
+            else if (query is MultiTermQuery)
+            {
+                return ((MultiTermQuery)query).Field;
+            }
+            else
+                throw new ApplicationException("query \"" + query + "\" must be flatten first.");
         }
 
         /*
@@ -275,40 +300,46 @@ namespace Lucene.Net.Search.Vectorhighlight
          *      - fieldMatch==false
          *          termSetMap=Map<null,Set<"john","lennon">>
          */
-        void SaveTerms(Dictionary<Query, Query> flatQueries)
+        void SaveTerms(ISet<Query> flatQueries, IndexReader reader)
         {
-            foreach (Query query in flatQueries.Keys)
+            foreach (Query query in flatQueries)
             {
-                List<String> termSet = GetTermSet(query);
+                ISet<String> termSet = GetTermSet(query);
                 if (query is TermQuery)
                     termSet.Add(((TermQuery)query).Term.Text);
-                else if (query is PrefixQuery)
-                    termSet.Add(((PrefixQuery)query).Prefix.Text + "*");
                 else if (query is PhraseQuery)
                 {
                     foreach (Term term in ((PhraseQuery)query).GetTerms())
                         termSet.Add(term.Text);
                 }
+                else if (query is MultiTermQuery && reader != null)
+                {
+                    BooleanQuery mtqTerms = (BooleanQuery)query.Rewrite(reader);
+                    foreach (BooleanClause clause in mtqTerms.Clauses)
+                    {
+                        termSet.Add(((TermQuery)clause.Query).Term.Text);
+                    }
+                }
                 else
-                    throw new System.ApplicationException("query \"" + query.ToString() + "\" must be flatten first.");
+                    throw new ApplicationException("query \"" + query.ToString() + "\" must be flatten first.");
             }
         }
 
-        private List<String> GetTermSet(Query query)
+        private ISet<String> GetTermSet(Query query)
         {
             String key = GetKey(query);
-            List<String> set = termSetMap.Get(key);
+            ISet<String> set = termSetMap[key];
             if (set == null)
             {
-                set = new List<String>();
-                termSetMap.Put(key, set);
+                set = new HashSet<String>();
+                termSetMap[key] = set;
             }
             return set;
         }
 
-        public List<String> getTermSet(String field)
+        public ISet<String> GetTermSet(String field)
         {
-            return termSetMap.Get(fieldMatch ? field : null);
+            return termSetMap[fieldMatch ? field : null];
         }
 
         /*
@@ -317,20 +348,10 @@ namespace Lucene.Net.Search.Vectorhighlight
          * <param name="term"></param>
          * <returns>QueryPhraseMap</returns>
          */
-        public QueryPhraseMap   GetFieldTermMap(String fieldName, String term)
+        public QueryPhraseMap GetFieldTermMap(String fieldName, String term)
         {
             QueryPhraseMap rootMap = GetRootMap(fieldName);
-            return rootMap == null ? null : RetrieveQueryFromSubMap(rootMap, term);
-        }
-
-        public QueryPhraseMap RetrieveQueryFromSubMap(QueryPhraseMap rootMap, String term)
-        {
-            foreach (var kvp in rootMap.subMap)
-            {
-                if (StringUtils.TermStringMatch(kvp.Key, term))
-                    return kvp.Value;
-            }
-            return null;
+            return rootMap == null ? null : rootMap.subMap[term];
         }
 
         /*
@@ -339,7 +360,7 @@ namespace Lucene.Net.Search.Vectorhighlight
          * <param name="phraseCandidate"></param>
          * <returns>QueryPhraseMap</returns>
          */
-        public QueryPhraseMap SearchPhrase(String fieldName, List<TermInfo> phraseCandidate)
+        public QueryPhraseMap SearchPhrase(String fieldName, IList<TermInfo> phraseCandidate)
         {
             QueryPhraseMap root = GetRootMap(fieldName);
             if (root == null) return null;
@@ -348,7 +369,7 @@ namespace Lucene.Net.Search.Vectorhighlight
 
         private QueryPhraseMap GetRootMap(String fieldName)
         {
-            return rootMaps.Get(fieldMatch ? fieldName : null);
+            return rootMaps[fieldMatch ? fieldName : null];
         }
 
         int NextTermOrPhraseNumber()
@@ -358,51 +379,46 @@ namespace Lucene.Net.Search.Vectorhighlight
 
         public class QueryPhraseMap
         {
-
-            public bool terminal;
+            bool terminal;
             int slop;   // valid if terminal == true and phraseHighlight == true
-            public float boost;  // valid if terminal == true
+            float boost;  // valid if terminal == true
             int termOrPhraseNumber;   // valid if terminal == true
             FieldQuery fieldQuery;
-            public HashMap<String, QueryPhraseMap> subMap = new HashMap<String, QueryPhraseMap>();
+            internal IDictionary<String, QueryPhraseMap> subMap = new HashMap<String, QueryPhraseMap>();
 
             public QueryPhraseMap(FieldQuery fieldQuery)
             {
                 this.fieldQuery = fieldQuery;
             }
 
-            public void AddTerm(String termText, float boost)
+            public void AddTerm(Term term, float boost)
             {
-                QueryPhraseMap map = GetOrNewMap(subMap, termText);
+                QueryPhraseMap map = GetOrNewMap(subMap, term.Text);
                 map.MarkTerminal(boost);
             }
 
-            private QueryPhraseMap GetOrNewMap(HashMap<String, QueryPhraseMap> subMap, String term)
+            private QueryPhraseMap GetOrNewMap(IDictionary<String, QueryPhraseMap> subMap, String term)
             {
-                QueryPhraseMap map = subMap.Get(term);
+                QueryPhraseMap map = subMap[term];
                 if (map == null)
                 {
                     map = new QueryPhraseMap(fieldQuery);
-                    subMap.Put(term, map);
+                    subMap[term] = map;
                 }
                 return map;
             }
 
-            public void Add(Query query)
+            public void Add(Query query, IndexReader reader)
             {
                 if (query is TermQuery)
                 {
-                    AddTerm(((TermQuery)query).Term.Text, query.Boost);
-                }
-                else if (query is PrefixQuery)
-                {
-                    AddTerm(((PrefixQuery)query).Prefix.Text + "*", query.Boost);
+                    AddTerm(((TermQuery)query).Term, query.Boost);
                 }
                 else if (query is PhraseQuery)
                 {
                     PhraseQuery pq = (PhraseQuery)query;
                     Term[] terms = pq.GetTerms();
-                    HashMap<String, QueryPhraseMap> map = subMap;
+                    IDictionary<String, QueryPhraseMap> map = subMap;
                     QueryPhraseMap qpm = null;
                     foreach (Term term in terms)
                     {
@@ -417,7 +433,7 @@ namespace Lucene.Net.Search.Vectorhighlight
 
             public QueryPhraseMap GetTermMap(String term)
             {
-                return subMap.Get(term);
+                return subMap[term];
             }
 
             private void MarkTerminal(float boost)
@@ -453,7 +469,7 @@ namespace Lucene.Net.Search.Vectorhighlight
                 get { return termOrPhraseNumber; }
             }
 
-            public QueryPhraseMap SearchPhrase(List<TermInfo> phraseCandidate)
+            public QueryPhraseMap SearchPhrase(IList<TermInfo> phraseCandidate)
             {
                 QueryPhraseMap currMap = this;
                 foreach (TermInfo ti in phraseCandidate)
@@ -464,7 +480,7 @@ namespace Lucene.Net.Search.Vectorhighlight
                 return currMap.IsValidTermOrPhrase(phraseCandidate) ? currMap : null;
             }
 
-            public bool IsValidTermOrPhrase(List<TermInfo> phraseCandidate)
+            public bool IsValidTermOrPhrase(IList<TermInfo> phraseCandidate)
             {
                 // check terminal
                 if (!terminal) return false;


Mime
View raw message