lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d...@apache.org
Subject svn commit: r916415 - in /lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net: ./ FastVectorHighlighter.Net/ Test/
Date Thu, 25 Feb 2010 19:31:42 GMT
Author: digy
Date: Thu Feb 25 19:31:42 2010
New Revision: 916415

URL: http://svn.apache.org/viewvc?rev=916415&view=rev
Log:
LUCENE-2278 FastVectorHighlighter: highlighted term is out of alignment in multi-valued NOT_ANALYZED
field.

Since lucene java 2.9.2 is ready to release, this patch is applied only to 3.1.

Added:
    lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Readme.txt
Removed:
    lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net.txt
Modified:
    lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/BaseFragmentsBuilder.cs
    lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FieldQuery.cs
    lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Test/AbstractTestCase.cs
    lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Test/FieldQueryTest.cs
    lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Test/SimpleFragmentsBuilderTest.cs

Modified: lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/BaseFragmentsBuilder.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/BaseFragmentsBuilder.cs?rev=916415&r1=916414&r2=916415&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/BaseFragmentsBuilder.cs
(original)
+++ lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/BaseFragmentsBuilder.cs
Thu Feb 25 19:31:42 2010
@@ -76,7 +76,7 @@
             List<WeightedFragInfo> fragInfos = GetWeightedFragInfoList(fieldFragList.fragInfos);
 
             List<String> fragments = new List<String>(maxNumFragments);
-            String[] values = GetFieldValues(reader, docId, fieldName);
+            Field[] values = GetFields(reader, docId, fieldName);
             if (values.Length == 0) return null;
             StringBuilder buffer = new StringBuilder();
             int[] nextValueIndex = { 0 };
@@ -88,12 +88,52 @@
             return fragments.ToArray();
         }
 
+        [Obsolete]
         protected String[] GetFieldValues(IndexReader reader, int docId, String fieldName)
         {
             Document doc = reader.Document(docId, new MapFieldSelector(new String[] { fieldName
}));
             return doc.GetValues(fieldName); // according to Document class javadoc, this
never returns null
         }
 
+        protected Field[] GetFields(IndexReader reader, int docId, String fieldName)
+        {
+            // according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded
field???
+            Document doc = reader.Document(docId, new MapFieldSelector(new String[] { fieldName
}));
+            return doc.GetFields(fieldName); // according to Document class javadoc, this
never returns null
+        }
+
+        [Obsolete]
+        protected String MakeFragment(StringBuilder buffer, int[] index, String[] values,
WeightedFragInfo fragInfo)
+        {
+            int s = fragInfo.startOffset;
+            return MakeFragment(fragInfo, GetFragmentSource(buffer, index, values, s, fragInfo.endOffset),
s);
+        }
+
+        protected String MakeFragment(StringBuilder buffer, int[] index, Field[] values,
WeightedFragInfo fragInfo)
+        {
+            int s = fragInfo.startOffset;
+            return MakeFragment(fragInfo, GetFragmentSource(buffer, index, values, s, fragInfo.endOffset),
s);
+        }
+
+        private String MakeFragment(WeightedFragInfo fragInfo, String src, int s)
+        {
+            StringBuilder fragment = new StringBuilder();
+            int srcIndex = 0;
+            foreach (SubInfo subInfo in fragInfo.subInfos)
+            {
+                foreach (Toffs to in subInfo.termsOffsets)
+                {
+                    fragment.Append(src.Substring(srcIndex, to.startOffset - s - srcIndex)).Append(GetPreTag(subInfo.seqnum))
+                      .Append(src.Substring(to.startOffset - s, to.endOffset - s - (to.startOffset
- s))).Append(GetPostTag(subInfo.seqnum));
+                    srcIndex = to.endOffset - s;
+                }
+            }
+            fragment.Append(src.Substring(srcIndex));
+            return fragment.ToString();
+        }
+
+        /*
+        [Obsolete]
         protected String MakeFragment(StringBuilder buffer, int[] index, String[] values,
WeightedFragInfo fragInfo)
         {
             StringBuilder fragment = new StringBuilder();
@@ -112,7 +152,10 @@
             fragment.Append(src.Substring(srcIndex));
             return fragment.ToString();
         }
+        */
+
 
+        [Obsolete]
         protected String GetFragmentSource(StringBuilder buffer, int[] index, String[] values,
int startOffset, int endOffset)
         {
             while (buffer.Length < endOffset && index[0] < values.Length)
@@ -125,6 +168,18 @@
             return buffer.ToString().Substring(startOffset, eo - startOffset);
         }
 
+        protected String GetFragmentSource(StringBuilder buffer, int[] index, Field[] values,
int startOffset, int endOffset)
+        {
+            while (buffer.Length < endOffset && index[0] < values.Length)
+            {
+                if (index[0] > 0 && values[index[0]].IsTokenized() &&
values[index[0]].StringValue().Length > 0)
+                    buffer.Append(' ');
+                buffer.Append(values[index[0]++].StringValue());
+            }
+            int eo = buffer.Length < endOffset ? buffer.Length: endOffset;
+            return buffer.ToString().Substring(startOffset, eo - startOffset );
+        }
+
         protected String GetPreTag(int num)
         {
             return preTags.Length > num ? preTags[num] : preTags[0];

Modified: lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FieldQuery.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FieldQuery.cs?rev=916415&r1=916414&r2=916415&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FieldQuery.cs
(original)
+++ lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/FastVectorHighlighter.Net/FieldQuery.cs
Thu Feb 25 19:31:42 2010
@@ -75,10 +75,20 @@
                         flatten(clause.GetQuery(), flatQueries);
                 }
             }
+            else if (sourceQuery is DisjunctionMaxQuery)
+            {
+                DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
+                System.Collections.IEnumerator en = dmq.Iterator();
+                while (en.MoveNext())
+                {
+                    Query query = (Query)en.Current;
+                    flatten(query, flatQueries);
+                }
+            }
             else if (sourceQuery is TermQuery)
             {
                 if (!flatQueries.ContainsKey(sourceQuery))
-                    flatQueries.Add(sourceQuery,sourceQuery);
+                    flatQueries.Add(sourceQuery, sourceQuery);
             }
             else if (sourceQuery is PhraseQuery)
             {
@@ -86,11 +96,11 @@
                 {
                     PhraseQuery pq = (PhraseQuery)sourceQuery;
                     if (pq.GetTerms().Length > 1)
-                        flatQueries.Add(pq,pq);
+                        flatQueries.Add(pq, pq);
                     else if (pq.GetTerms().Length == 1)
                     {
                         Query q = new TermQuery(pq.GetTerms()[0]);
-                        flatQueries.Add(q,q);
+                        flatQueries.Add(q, q);
                     }
                 }
             }

Added: lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Readme.txt
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/FastVectorHighlighter.Net/Readme.txt?rev=916415&view=auto
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Readme.txt (added)
+++ lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Readme.txt Thu Feb 25 19:31:42
2010
@@ -0,0 +1,7 @@
+
+Some internal fields/methods are made "public" to be able to use them in Test project. 
+Those fields/methods (starting with a lowercase char) are not intended to be used in your
code.
+
+Rev:916090 25Feb2010
+
+java source: https://svn.apache.org/repos/asf/lucene/java/trunk/contrib/fast-vector-highlighter

Modified: lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Test/AbstractTestCase.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/FastVectorHighlighter.Net/Test/AbstractTestCase.cs?rev=916415&r1=916414&r2=916415&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Test/AbstractTestCase.cs
(original)
+++ lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Test/AbstractTestCase.cs
Thu Feb 25 19:31:42 2010
@@ -43,6 +43,7 @@
         protected Directory dir;
         protected Analyzer analyzerW;
         protected Analyzer analyzerB;
+        protected Analyzer analyzerK;
         protected IndexReader reader;
         protected QueryParser paW;
         protected QueryParser paB;
@@ -64,11 +65,18 @@
             "\nWhen you talk about processing speed, the"
           };
 
+        protected static String[] strMVValues = {
+            "abc",
+            "defg",
+            "hijkl"
+          };
+
         [SetUp]
         public void SetUp()
         {
             analyzerW = new WhitespaceAnalyzer();
             analyzerB = new BigramAnalyzer();
+            analyzerK = new KeywordAnalyzer();
             paW = new QueryParser(F, analyzerW);
             paB = new QueryParser(F, analyzerB);
             dir = new RAMDirectory();
@@ -148,6 +156,21 @@
             return query;
         }
 
+        protected Query Dmq(params Query[] queries)
+        {
+            return Dmq(0.0F, queries);
+        }
+
+        protected Query Dmq(float tieBreakerMultiplier, params Query[] queries)
+        {
+            DisjunctionMaxQuery query = new DisjunctionMaxQuery(tieBreakerMultiplier);
+            foreach (Query q in queries)
+            {
+                query.Add(q);
+            }
+            return query;
+        }
+
         protected void AssertCollectionQueries(Dictionary<Query, Query> actual, params
Query[] expected)
         {
 
@@ -320,6 +343,7 @@
             Make1dmfIndex(analyzerB, values);
         }
 
+        // make 1 doc with multi valued field
         protected void Make1dmfIndex(Analyzer analyzer, params String[] values)
         {
             IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
@@ -329,7 +353,20 @@
             writer.AddDocument(doc);
             writer.Close();
 
-            reader = IndexReader.Open(dir);
+            reader = IndexReader.Open(dir,true);
+        }
+
+        // make 1 doc with multi valued & not analyzed field
+        protected void Make1dmfIndexNA(String[] values)
+        {
+            IndexWriter writer = new IndexWriter(dir, analyzerK, true, IndexWriter.MaxFieldLength.LIMITED);
+            Document doc = new Document();
+            foreach (String value in values)
+                doc.Add(new Field(F, value, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
+            writer.AddDocument(doc);
+            writer.Close();
+
+            reader = IndexReader.Open(dir, true);
         }
 
         protected void MakeIndexShortMV()
@@ -396,5 +433,19 @@
 
             Make1dmfIndexB(biMVValues);
         }
+
+        protected void MakeIndexStrMV()
+        {
+            //  0123
+            // "abc"
+
+            //  34567
+            // "defg"
+
+            //     111
+            //  789012
+            // "hijkl"
+            Make1dmfIndexNA(strMVValues);
+        }
     }
 }

Modified: lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Test/FieldQueryTest.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/FastVectorHighlighter.Net/Test/FieldQueryTest.cs?rev=916415&r1=916414&r2=916415&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Test/FieldQueryTest.cs (original)
+++ lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Test/FieldQueryTest.cs Thu
Feb 25 19:31:42 2010
@@ -43,6 +43,16 @@
         }
 
         [Test]
+        public void testFlattenDisjunctionMaxQuery()
+        {
+            Query query = Dmq(Tq("A"), Tq("B"), PqF("C", "D"));
+            FieldQuery fq = new FieldQuery(query, true, true);
+            HashSet<Query> flatQueries = new HashSet<Query>();
+            fq.flatten(query, flatQueries);
+            AssertCollectionQueries(flatQueries, Tq("A"), Tq("B"), PqF("C", "D"));
+        }
+
+        [Test]
         public void TestFlattenTermAndPhrase()
         {
             Query query = paW.Parse("A AND \"B C\"");

Modified: lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Test/SimpleFragmentsBuilderTest.cs
URL: http://svn.apache.org/viewvc/lucene/lucene.net/trunk/C%23/contrib/FastVectorHighlighter.Net/Test/SimpleFragmentsBuilderTest.cs?rev=916415&r1=916414&r2=916415&view=diff
==============================================================================
--- lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Test/SimpleFragmentsBuilderTest.cs
(original)
+++ lucene/lucene.net/trunk/C#/contrib/FastVectorHighlighter.Net/Test/SimpleFragmentsBuilderTest.cs
Thu Feb 25 19:31:42 2010
@@ -71,7 +71,7 @@
             Assert.AreEqual("<b>c</b> <b>a</b> <b>a</b>
b b", f[2]);
         }
 
-        
+
         private FieldFragList ffl(String queryValue, String indexValue)
         {
             Make1d1fIndex(indexValue);
@@ -147,7 +147,21 @@
             writer.AddDocument(doc);
             writer.Close();
 
-            reader = IndexReader.Open(dir);
+            reader = IndexReader.Open(dir, true);
+        }
+
+        [Test]
+        public void Test1StrMV()
+        {
+            MakeIndexStrMV();
+
+            FieldQuery fq = new FieldQuery(Tq("defg"), true, true);
+            FieldTermStack stack = new FieldTermStack(reader, 0, F, fq);
+            FieldPhraseList fpl = new FieldPhraseList(stack, fq);
+            SimpleFragListBuilder sflb = new SimpleFragListBuilder();
+            FieldFragList ffl = sflb.CreateFieldFragList(fpl, 100);
+            SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
+            Assert.AreEqual("abc<b>defg</b>hijkl", sfb.CreateFragment(reader,
0, F, ffl));
         }
     }
 }



Mime
View raw message