lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r928229 - in /lucene/java/branches/flex_1458/src: java/org/apache/lucene/index/ java/org/apache/lucene/index/codecs/preflex/ java/org/apache/lucene/index/codecs/standard/ java/org/apache/lucene/search/ test/org/apache/lucene/
Date Sat, 27 Mar 2010 16:51:12 GMT
Author: mikemccand
Date: Sat Mar 27 16:51:11 2010
New Revision: 928229

URL: http://svn.apache.org/viewvc?rev=928229&view=rev
Log:
LUCENE-2351: some optimizations for MTQs

Modified:
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTermsEnum.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/AutomatonTermsEnum.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FilteredTermsEnum.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
    lucene/java/branches/flex_1458/src/test/org/apache/lucene/TestExternalCodecs.java

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java?rev=928229&r1=928228&r2=928229&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java
Sat Mar 27 16:51:11 2010
@@ -89,7 +89,7 @@ class LegacyFieldsEnum extends FieldsEnu
     }
 
     @Override
-    public SeekStatus seek(BytesRef text) throws IOException {
+    public SeekStatus seek(BytesRef text, boolean useCache) throws IOException {
       if (terms != null) {
         terms.close();
       }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=928229&r1=928228&r2=928229&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTermsEnum.java Sat
Mar 27 16:51:11 2010
@@ -136,11 +136,11 @@ public final class MultiTermsEnum extend
   }
 
   @Override
-  public SeekStatus seek(BytesRef term) throws IOException {
+  public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
     queue.clear();
     numTop = 0;
     for(int i=0;i<numSubs;i++) {
-      final SeekStatus status = currentSubs[i].terms.seek(term);
+      final SeekStatus status = currentSubs[i].terms.seek(term, useCache);
       if (status == SeekStatus.FOUND) {
         top[numTop++] = currentSubs[i];
         currentSubs[i].current = term;

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java?rev=928229&r1=928228&r2=928229&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java Sat Mar
27 16:51:11 2010
@@ -54,11 +54,18 @@ public abstract class TermsEnum {
    *  was hit. */
   public static enum SeekStatus {END, FOUND, NOT_FOUND};
 
+  /** Expert: just like {@link #seek(BytesRef)} but allows
+   *  you to control whether the implementation should
+   *  attempt to use its term cache (if it uses one). */
+  public abstract SeekStatus seek(BytesRef text, boolean useCache) throws IOException;
+
   /** Seeks to the specified term.  Returns SeekResult to
    *  indicate whether exact term was found, a different
    *  term was found, or EOF was hit.  The target term may
    *  be befor or after the current term. */
-  public abstract SeekStatus seek(BytesRef text) throws IOException;
+  public final SeekStatus seek(BytesRef text) throws IOException {
+    return seek(text, true);
+  }
 
   /** Seeks to the specified term by ordinal (position) as
    *  previously returned by {@link #ord}.  The target ord
@@ -124,7 +131,7 @@ public abstract class TermsEnum {
    */
   public static final TermsEnum EMPTY = new TermsEnum() {    
     @Override
-    public SeekStatus seek(BytesRef term) { return SeekStatus.END; }
+    public SeekStatus seek(BytesRef term, boolean useCache) { return SeekStatus.END; }
     
     @Override
     public SeekStatus seek(long ord) { return SeekStatus.END; }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java?rev=928229&r1=928228&r2=928229&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
Sat Mar 27 16:51:11 2010
@@ -271,7 +271,7 @@ public class PreFlexFields extends Field
     }
 
     @Override
-    public SeekStatus seek(BytesRef term) throws IOException {
+    public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
       skipNext = false;
       final TermInfosReader tis = getTermsDict();
       final Term t0 = new Term(fieldInfo.name, term.utf8ToString());

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=928229&r1=928228&r2=928229&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
Sat Mar 27 16:51:11 2010
@@ -283,19 +283,20 @@ public class StandardTermsDictReader ext
        *  is found, SeekStatus.NOT_FOUND if a different term
        *  was found, SeekStatus.END if we hit EOF */
       @Override
-      public SeekStatus seek(BytesRef term) throws IOException {
+      public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
         // Check cache
         fieldTerm.term = term;
-        TermState cachedState = termsCache.get(fieldTerm);
-        
-        if (cachedState != null) {
-
-          state.copy(cachedState);
-
-          seekPending = true;
-          bytesReader.term.copy(term);
-
-          return SeekStatus.FOUND;
+        TermState cachedState;
+        if (useCache) {
+          cachedState = termsCache.get(fieldTerm);
+          if (cachedState != null) {
+            state.copy(cachedState);
+            seekPending = true;
+            bytesReader.term.copy(term);
+            return SeekStatus.FOUND;
+          }
+        } else {
+          cachedState = null;
         }
 
         boolean doSeek = true;
@@ -353,7 +354,7 @@ public class StandardTermsDictReader ext
           final int cmp = termComp.compare(bytesReader.term, term);
           if (cmp == 0) {
 
-            if (doSeek) {
+            if (doSeek && useCache) {
               // Store in cache
               FieldAndTerm entryKey = new FieldAndTerm(fieldTerm);
               cachedState = (TermState) state.clone();

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/AutomatonTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/AutomatonTermsEnum.java?rev=928229&r1=928228&r2=928229&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/AutomatonTermsEnum.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/AutomatonTermsEnum.java
Sat Mar 27 16:51:11 2010
@@ -18,7 +18,6 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
-import java.util.BitSet;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
@@ -69,8 +68,10 @@ public class AutomatonTermsEnum extends 
   private final boolean finite;
   // array of sorted transitions for each state, indexed by state number
   private final Transition[][] allTransitions;
-  // for path tracking: each bit is a numbered state
-  private final BitSet visited;
+  // for path tracking: each long records gen when we last
+  // visited the state; we use gens to avoid having to clear
+  private final long[] visited;
+  private long curGen;
   // used for unicode conversion from BytesRef byte[] to char[]
   private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result();
   // the reference used for seeking forwards through the term dictionary
@@ -126,16 +127,18 @@ public class AutomatonTermsEnum extends 
       // we will seek each time anyway (and take the unicode conversion hit).
       // its also currently expensive to calculate, because getCommonSuffix is 
       // a bit expensive.
-      commonSuffixRef = new BytesRef("");
+      commonSuffixRef = null;
       // build a cache of sorted transitions for every state
       allTransitions = new Transition[runAutomaton.getSize()][];
       for (State state : this.automaton.getStates())
         allTransitions[state.getNumber()] = state.getSortedTransitionArray(false);
       // used for path tracking, where each bit is a numbered state.
-      visited = new BitSet(runAutomaton.getSize());
+      visited = new long[runAutomaton.getSize()];
       NO_MATCH = AcceptStatus.NO_AND_SEEK;
       YES_MATCH = finite ? AcceptStatus.YES_AND_SEEK : AcceptStatus.YES;
     }
+
+    setUseTermsCache(finite);
   }
   
   /**
@@ -196,7 +199,7 @@ public class AutomatonTermsEnum extends 
    */
   @Override
   protected AcceptStatus accept(final BytesRef term) {
-    if (term.endsWith(commonSuffixRef)) {
+    if (commonSuffixRef == null || term.endsWith(commonSuffixRef)) {
       UnicodeUtil.UTF8toUTF16(term.bytes, term.offset, term.length, utf16);
       return runAutomaton.run(utf16.result, 0, utf16.length) ? YES_MATCH : NO_MATCH;
     } else {
@@ -307,9 +310,10 @@ public class AutomatonTermsEnum extends 
         c++;
     }
 
+    curGen++;
+
     utf16.setLength(position);
-    visited.clear();
-    visited.set(state);
+    visited[state] = curGen;
 
     Transition transitions[] = allTransitions[state];
 
@@ -327,8 +331,8 @@ public class AutomatonTermsEnum extends 
          * as long as is possible, continue down the minimal path in
          * lexicographic order. if a loop or accept state is encountered, stop.
          */
-        while (!visited.get(state) && !runAutomaton.isAccept(state)) {
-          visited.set(state);
+        while (visited[state] != curGen && !runAutomaton.isAccept(state)) {
+          visited[state] = curGen;
           /* 
            * Note: we work with a DFA with no transitions to dead states.
            * so the below is ok, if it is not an accept state,

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FilteredTermsEnum.java?rev=928229&r1=928228&r2=928229&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FilteredTermsEnum.java
(original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FilteredTermsEnum.java
Sat Mar 27 16:51:11 2010
@@ -46,6 +46,7 @@ public abstract class FilteredTermsEnum 
   private BytesRef initialSeekTerm = null;
   private boolean doSeek = true;        
   private BytesRef actualTerm = null;
+  private boolean useTermsCache = false;
 
   private final TermsEnum tenum;
 
@@ -115,6 +116,16 @@ public abstract class FilteredTermsEnum 
     return t;
   }
 
+  /** Expert: enable or disable the terms cache when seeking. */
+  protected final void setUseTermsCache(boolean useTermsCache) {
+    this.useTermsCache = useTermsCache;
+  }
+
+  /** Expert: enable or disable the terms cache when seeking. */
+  protected final boolean getUseTermsCache() {
+    return useTermsCache;
+  }
+
   /**
    * Returns the related attributes, the returned {@link AttributeSource}
    * is shared with the delegate {@code TermsEnum}.
@@ -148,7 +159,7 @@ public abstract class FilteredTermsEnum 
    * @throws UnsupportedOperationException
    */
   @Override
-  public SeekStatus seek(BytesRef term) throws IOException {
+  public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
     throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
   }
 
@@ -187,7 +198,7 @@ public abstract class FilteredTermsEnum 
       if (doSeek) {
         doSeek = false;
         final BytesRef t = nextSeekTerm(actualTerm);
-        if (t == null || tenum.seek(t) == SeekStatus.END) {
+        if (t == null || tenum.seek(t, useTermsCache) == SeekStatus.END) {
           // no more terms to seek to or enum exhausted
           return null;
         }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=928229&r1=928228&r2=928229&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Sat
Mar 27 16:51:11 2010
@@ -229,8 +229,8 @@ public final class FuzzyTermsEnum extend
   }
   
   @Override
-  public SeekStatus seek(BytesRef text) throws IOException {
-    return actualEnum.seek(text);
+  public SeekStatus seek(BytesRef text, boolean useCache) throws IOException {
+    return actualEnum.seek(text, useCache);
   }
   
   @Override

Modified: lucene/java/branches/flex_1458/src/test/org/apache/lucene/TestExternalCodecs.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/TestExternalCodecs.java?rev=928229&r1=928228&r2=928229&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/TestExternalCodecs.java (original)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/TestExternalCodecs.java Sat
Mar 27 16:51:11 2010
@@ -284,7 +284,7 @@ public class TestExternalCodecs extends 
       }
 
       @Override
-      public SeekStatus seek(BytesRef term) {
+      public SeekStatus seek(BytesRef term, boolean useCache) {
         current = term.utf8ToString();
         it = null;
         if (ramField.termToDocs.containsKey(current)) {



Mime
View raw message