lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d...@apache.org
Subject [Lucene.Net] svn commit: r1140026 - in /incubator/lucene.net/branches/Lucene.Net_2_9_4g: src/contrib/FastVectorHighlighter/FastVectorHighlighter.cs src/contrib/FastVectorHighlighter/FieldPhraseList.cs test/contrib/FastVectorHighlighter/FieldPhraseListTest.cs
Date Mon, 27 Jun 2011 06:45:45 GMT
Author: digy
Date: Mon Jun 27 06:45:45 2011
New Revision: 1140026

URL: http://svn.apache.org/viewvc?rev=1140026&view=rev
Log:
[LUCENENET-427] Provide limit on phrase analysis in FastVectorHighlighter

Modified:
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/FastVectorHighlighter/FastVectorHighlighter.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/FastVectorHighlighter/FieldPhraseList.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/FastVectorHighlighter/FieldPhraseListTest.cs

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/FastVectorHighlighter/FastVectorHighlighter.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/FastVectorHighlighter/FastVectorHighlighter.cs?rev=1140026&r1=1140025&r2=1140026&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/FastVectorHighlighter/FastVectorHighlighter.cs
(original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/FastVectorHighlighter/FastVectorHighlighter.cs
Mon Jun 27 06:45:45 2011
@@ -32,6 +32,7 @@ namespace Lucene.Net.Search.Vectorhighli
         private bool fieldMatch;
         private FragListBuilder fragListBuilder;
         private FragmentsBuilder fragmentsBuilder;
+        private int phraseLimit = Int32.MaxValue;
 
         /// <summary>
         /// the default constructor.
@@ -39,7 +40,7 @@ namespace Lucene.Net.Search.Vectorhighli
         public FastVectorHighlighter():this(DEFAULT_PHRASE_HIGHLIGHT, DEFAULT_FIELD_MATCH)
         {
         }
-        
+
         /// <summary>
         /// a constructor. Using SimpleFragListBuilder and ScoreOrderFragmentsBuilder. 
         /// </summary>
@@ -48,7 +49,7 @@ namespace Lucene.Net.Search.Vectorhighli
         public FastVectorHighlighter(bool phraseHighlight, bool fieldMatch):this(phraseHighlight,
fieldMatch, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder())
         {
         }
-            
+
         /// <summary>
         /// a constructor. A FragListBuilder and a FragmentsBuilder can be specified (plugins).
         /// </summary>
@@ -64,7 +65,7 @@ namespace Lucene.Net.Search.Vectorhighli
             this.fragListBuilder = fragListBuilder;
             this.fragmentsBuilder = fragmentsBuilder;
         }
-                
+
         /// <summary>
         /// create a FieldQuery object. 
         /// </summary>
@@ -75,7 +76,7 @@ namespace Lucene.Net.Search.Vectorhighli
             return new FieldQuery(query, phraseHighlight, fieldMatch);
         }
 
-        
+
         /// <summary>
         /// return the best fragment.
         /// </summary>
@@ -91,7 +92,7 @@ namespace Lucene.Net.Search.Vectorhighli
             FieldFragList fieldFragList = GetFieldFragList(fieldQuery, reader, docId, fieldName,
fragCharSize);
             return fragmentsBuilder.CreateFragment(reader, docId, fieldName, fieldFragList);
         }
-                
+
         /// <summary>
         /// return the best fragments.
         /// </summary>
@@ -113,7 +114,7 @@ namespace Lucene.Net.Search.Vectorhighli
             String fieldName, int fragCharSize)
         {
             FieldTermStack fieldTermStack = new FieldTermStack(reader, docId, fieldName,
fieldQuery);
-            FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery);
+            FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery,
phraseLimit);
             return fragListBuilder.CreateFieldFragList(fieldPhraseList, fragCharSize);
         }
 
@@ -121,18 +122,29 @@ namespace Lucene.Net.Search.Vectorhighli
         /// return whether phraseHighlight or not.
         /// </summary>
         /// <returns>return whether phraseHighlight or not.</returns>
-        public bool IsPhraseHighlight() 
-        { 
-            return phraseHighlight; 
+        public bool IsPhraseHighlight()
+        {
+            return phraseHighlight;
         }
 
         /// <summary>
         /// return whether fieldMatch or not.
         /// </summary>
         /// <returns>return whether fieldMatch or not.</returns>
-        public bool IsFieldMatch() 
-        { 
-            return fieldMatch; 
+        public bool IsFieldMatch()
+        {
+            return fieldMatch;
+        }
+                                
+        /// <summary>
+        /// The maximum number of phrases to analyze when searching for the highest-scoring
phrase.
+        /// The default is 5000.  To ensure that all phrases are analyzed, use a negative
number or Integer.MAX_VALUE.
+        /// </summary>
+        
+        public int PhraseLimit
+        {
+            get{ return phraseLimit; }
+            set{ this.phraseLimit = value; }
         }
     }
 }

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/FastVectorHighlighter/FieldPhraseList.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/FastVectorHighlighter/FieldPhraseList.cs?rev=1140026&r1=1140025&r2=1140026&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/FastVectorHighlighter/FieldPhraseList.cs
(original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/FastVectorHighlighter/FieldPhraseList.cs
Mon Jun 27 06:45:45 2011
@@ -37,18 +37,29 @@ namespace Lucene.Net.Search.Vectorhighli
         public LinkedList<WeightedPhraseInfo> phraseList = new LinkedList<WeightedPhraseInfo>();
         
         /// <summary>
+        /// create a FieldPhraseList that has no limit on the number of phrases to analyze
+        /// <param name="fieldQuery">FieldTermStack object</param>
+        /// <param name="fieldTermStack">FieldQuery object</param>
+        /// </summary>
+        public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery) : this(fieldTermStack,
fieldQuery, Int32.MaxValue)
+        {
+        }
+  
+
+        /// <summary>
         /// a constructor. 
         /// </summary>
         /// <param name="fieldTermStack">FieldTermStack object</param>
         /// <param name="fieldQuery">FieldQuery object</param>
-        public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery)
+        /// <param name="phraseLimit">maximum size of phraseList</param>
+        public FieldPhraseList(FieldTermStack fieldTermStack, FieldQuery fieldQuery, int
phraseLimit)
         {
             String field = fieldTermStack.GetFieldName();
 
             LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
             QueryPhraseMap currMap = null;
             QueryPhraseMap nextMap = null;
-            while (!fieldTermStack.IsEmpty())
+            while (!fieldTermStack.IsEmpty() && (phraseList.Count < phraseLimit)
)
             {
 
                 phraseCandidate.Clear();

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/FastVectorHighlighter/FieldPhraseListTest.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/FastVectorHighlighter/FieldPhraseListTest.cs?rev=1140026&r1=1140025&r2=1140026&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/FastVectorHighlighter/FieldPhraseListTest.cs
(original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/test/contrib/FastVectorHighlighter/FieldPhraseListTest.cs
Mon Jun 27 06:45:45 2011
@@ -222,5 +222,35 @@ namespace Lucene.Net.Search.Vectorhighli
             Assert.AreEqual(1, fpl.phraseList.Count);
             Assert.AreEqual("sppeeeed(1.0)((88,93))", fpl.phraseList.First.Value.ToString());
         }
+        
+      /* This test shows a big speedup from limiting the number of analyzed phrases in 
+       * this bad case for FieldPhraseList */
+      /* But it is not reliable as a unit test since it is timing-dependent
+      public void testManyRepeatedTerms() throws Exception {
+          long t = System.currentTimeMillis();
+          testManyTermsWithLimit (-1);
+          long t1 = System.currentTimeMillis();
+          testManyTermsWithLimit (1);
+          long t2 = System.currentTimeMillis();
+          assertTrue (t2-t1 * 1000 < t1-t);
+      }
+      private void testManyTermsWithLimit (int limit) throws Exception {
+          StringBuilder buf = new StringBuilder ();
+          for (int i = 0; i < 16000; i++) {
+              buf.append("a b c ");
+          }
+          make1d1fIndex( buf.toString());
+
+          Query query = tq("a");
+          FieldQuery fq = new FieldQuery( query, true, true );
+          FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
+          FieldPhraseList fpl = new FieldPhraseList( stack, fq, limit);
+          if (limit < 0 || limit > 16000)
+              assertEquals( 16000, fpl.phraseList.size() );
+          else
+              assertEquals( limit, fpl.phraseList.size() );
+          assertEquals( "a(1.0)((0,1))", fpl.phraseList.get( 0 ).toString() );      
+      }
+      */
     }
 }



Mime
View raw message