lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d...@apache.org
Subject svn commit: r925400 - in /lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net: FastVectorHighlighter.Net.csproj FieldTermStack.cs VectorHighlightMapper.cs
Date Fri, 19 Mar 2010 20:01:07 GMT
Author: digy
Date: Fri Mar 19 20:01:07 2010
New Revision: 925400

URL: http://svn.apache.org/viewvc?rev=925400&view=rev
Log:
LUCENENET-350 Performance enhancement in FastVectorHighlighter (contrib)

Added:
    lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/VectorHighlightMapper.cs
Modified:
    lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FastVectorHighlighter.Net.csproj
    lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FieldTermStack.cs

Modified: lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FastVectorHighlighter.Net.csproj
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FastVectorHighlighter.Net.csproj?rev=925400&r1=925399&r2=925400&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FastVectorHighlighter.Net.csproj
(original)
+++ lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FastVectorHighlighter.Net.csproj
Fri Mar 19 20:01:07 2010
@@ -36,7 +36,7 @@
     <DebugType>full</DebugType>
     <Optimize>false</Optimize>
     <OutputPath>bin\Debug\</OutputPath>
-    <DefineConstants>DEBUG;TRACE</DefineConstants>
+    <DefineConstants>TRACE;DEBUG;LUCENENET_350</DefineConstants>
     <ErrorReport>prompt</ErrorReport>
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
@@ -44,7 +44,7 @@
     <DebugType>pdbonly</DebugType>
     <Optimize>true</Optimize>
     <OutputPath>bin\Release\</OutputPath>
-    <DefineConstants>TRACE</DefineConstants>
+    <DefineConstants>TRACE;LUCENENET_350</DefineConstants>
     <ErrorReport>prompt</ErrorReport>
     <WarningLevel>4</WarningLevel>
   </PropertyGroup>
@@ -71,6 +71,7 @@
     <Compile Include="SimpleFragListBuilder.cs" />
     <Compile Include="SimpleFragmentsBuilder.cs" />
     <Compile Include="Support.cs" />
+    <Compile Include="VectorHighlightMapper.cs" />
   </ItemGroup>
   <ItemGroup>
     <None Include="package.html" />

Modified: lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FieldTermStack.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FieldTermStack.cs?rev=925400&r1=925399&r2=925400&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FieldTermStack.cs
(original)
+++ lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FieldTermStack.cs
Fri Mar 19 20:01:07 2010
@@ -58,7 +58,7 @@ namespace Lucene.Net.Search.Vectorhighli
             FieldTermStack ftl = new FieldTermStack(reader, 0, "f", fieldQuery);
             reader.Close();
         }
-                
+
         /// <summary>
         /// a constructor. 
         /// </summary>
@@ -66,6 +66,39 @@ namespace Lucene.Net.Search.Vectorhighli
         /// <param name="docId">document id to be highlighted</param>
         /// <param name="fieldName">field of the document to be highlighted</param>
         /// <param name="fieldQuery">FieldQuery object</param>
+#if LUCENENET_350 //Lucene.Net specific code. See https://issues.apache.org/jira/browse/LUCENENET-350
+        public FieldTermStack(IndexReader reader, int docId, String fieldName, FieldQuery
fieldQuery)
+        {
+            this.fieldName = fieldName;
+            
+            List<string> termSet = fieldQuery.getTermSet(fieldName);
+
+            // just return to make null snippet if un-matched fieldName specified when fieldMatch
== true
+            if (termSet == null) return;
+
+            //TermFreqVector tfv = reader.GetTermFreqVector(docId, fieldName);
+            VectorHighlightMapper tfv = new VectorHighlightMapper(termSet);    
+            reader.GetTermFreqVector(docId, fieldName, tfv);
+            
+            if (tfv.Size()==0) return; // just return to make null snippets
+            
+            string[] terms = tfv.GetTerms();
+            foreach (String term in terms)
+            {
+                if (!termSet.Contains(term)) continue;
+                int index = tfv.IndexOf(term);
+                TermVectorOffsetInfo[] tvois = tfv.GetOffsets(index);
+                if (tvois == null) return; // just return to make null snippets
+                int[] poss = tfv.GetTermPositions(index);
+                if (poss == null) return; // just return to make null snippets
+                for (int i = 0; i < tvois.Length; i++)
+                    termList.AddLast(new TermInfo(term, tvois[i].GetStartOffset(), tvois[i].GetEndOffset(),
poss[i]));
+            }
+            // sort by position
+            //Collections.sort(termList);
+            Sort(termList);
+        }
+#else   //Original Port
         public FieldTermStack(IndexReader reader, int docId, String fieldName, FieldQuery
fieldQuery)
         {
             this.fieldName = fieldName;
@@ -102,6 +135,7 @@ namespace Lucene.Net.Search.Vectorhighli
             //Collections.sort(termList);
             Sort(termList);
         }
+#endif
 
         void Sort(LinkedList<TermInfo> linkList)
         {

Added: lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/VectorHighlightMapper.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/VectorHighlightMapper.cs?rev=925400&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/VectorHighlightMapper.cs
(added)
+++ lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/VectorHighlightMapper.cs
Fri Mar 19 20:01:07 2010
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if LUCENENET_350 //Lucene.Net specific code. See https://issues.apache.org/jira/browse/LUCENENET-350
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Search.Vectorhighlight
+{
+    public class VectorHighlightMapper : TermVectorMapper, TermFreqVector, TermPositionVector
+    {
+        private readonly List<string> _terms;
+        private Dictionary<string, TermVectorOffsetInfo[]> _tvoi;
+        private Dictionary<string, int[]> _positions;
+        private Dictionary<string, int> _frequency;
+        private List<string> _indexMap;
+        private string _field;
+        private bool _storeOffsets;
+        private bool _storePositions;
+
+        public VectorHighlightMapper(List<string> terms)
+        {
+            _terms = terms;
+            _tvoi = new Dictionary<string, TermVectorOffsetInfo[]>();
+            _positions = new Dictionary<string, int[]>();
+            _frequency = new Dictionary<string, int>();
+            _indexMap = new List<string>();
+        }
+
+        public override void SetExpectations(string field, int numTerms, bool storeOffsets,
bool storePositions)
+        {
+            _field = field;
+            _storeOffsets = storeOffsets;
+            _storePositions = storePositions;
+            if (_storeOffsets)
+                _tvoi = new Dictionary<string, TermVectorOffsetInfo[]>(numTerms);
+            if (_storePositions)
+                _positions = new Dictionary<string, int[]>(numTerms);
+            _frequency = new Dictionary<string, int>(numTerms);
+            _indexMap = new List<string>(numTerms);
+        }
+
+        public override void Map(string term, int frequency, TermVectorOffsetInfo[] offsets,
int[] positions)
+        {
+            if (_terms.Contains(term))
+            {
+                _indexMap.Add(term);
+                if (_storeOffsets)
+                    _tvoi.Add(term, offsets);
+                if (_storePositions)
+                    _positions.Add(term, positions);
+                _frequency.Add(term,frequency);
+            }
+        }
+
+        public string GetField()
+        {
+            return _field;
+        }
+
+        public int Size()
+        {
+            return _tvoi.Count;
+        }
+
+        public string[] GetTerms()
+        {
+            string[] result = new string[_tvoi.Count];
+            _tvoi.Keys.CopyTo(result,0);
+            return result;
+        }
+
+        public int[] GetTermFrequencies()
+        {
+            int[] result = new int[_frequency.Count];
+            _frequency.Values.CopyTo(result,0);
+            return result;
+        }
+
+        public int IndexOf(string term)
+        {
+            return _indexMap.IndexOf(term);
+        }
+
+        public int[] IndexesOf(string[] terms, int start, int len)
+        {
+            int[] result = new int[terms.Length];
+            for (int i = 0; i < terms.Length; i++)
+            {
+                string term = terms[i];
+                result[i] = _indexMap.IndexOf(term, start, len);
+            }
+            return result;
+        }
+
+        public int[] GetTermPositions(int index)
+        {
+            if (index<_positions.Count)
+            {
+                string key = _indexMap[index];
+                return _positions[key];
+            }
+            return new int[0];
+        }
+
+        public TermVectorOffsetInfo[] GetOffsets(int index)
+        {
+            if (index < _tvoi.Count)
+            {
+                string key = _indexMap[index];
+                return _tvoi[key];
+            }
+            return new TermVectorOffsetInfo[0];
+        }
+    }
+}
+
+#endif
\ No newline at end of file



Mime
View raw message