lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r887693 - in /lucene/java/branches/lucene_2_9: ./ contrib/ contrib/highlighter/src/test/ contrib/spellchecker/src/java/org/apache/lucene/search/spell/ contrib/spellchecker/src/test/org/apache/lucene/search/spell/ src/java/org/apache/lucene/...
Date Sun, 06 Dec 2009 15:24:48 GMT
Author: mikemccand
Date: Sun Dec  6 15:24:48 2009
New Revision: 887693

URL: http://svn.apache.org/viewvc?rev=887693&view=rev
Log:
LUCENE-2108: backport to 2.9 thread safety of spellchecker

Modified:
    lucene/java/branches/lucene_2_9/   (props changed)
    lucene/java/branches/lucene_2_9/CHANGES.txt   (props changed)
    lucene/java/branches/lucene_2_9/contrib/   (props changed)
    lucene/java/branches/lucene_2_9/contrib/CHANGES.txt   (contents, props changed)
    lucene/java/branches/lucene_2_9/contrib/highlighter/src/test/   (props changed)
    lucene/java/branches/lucene_2_9/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
    lucene/java/branches/lucene_2_9/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
    lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
  (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
  (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java
  (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestDateTools.java
  (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestNumberTools.java
  (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
  (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/util/TestAttributeSource.java
  (props changed)

Propchange: lucene/java/branches/lucene_2_9/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Dec  6 15:24:48 2009
@@ -1,3 +1,3 @@
 /lucene/java/branches/lucene_2_4:748824
 /lucene/java/branches/lucene_3_0:886275
-/lucene/java/trunk:824125,826029,826385,830871,833095,833297,833886,882672,883554,884870,886257
+/lucene/java/trunk:824125,826029,826385,830871,833095,833297,833886,882672,883554,884870,886257,887532

Propchange: lucene/java/branches/lucene_2_9/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Dec  6 15:24:48 2009
@@ -1 +1 @@
-/lucene/java/trunk/CHANGES.txt:886257
+/lucene/java/trunk/CHANGES.txt:886257,887532

Propchange: lucene/java/branches/lucene_2_9/contrib/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Dec  6 15:24:48 2009
@@ -1 +1 @@
-/lucene/java/trunk/contrib:886257
+/lucene/java/trunk/contrib:886257,887532

Modified: lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/CHANGES.txt?rev=887693&r1=887692&r2=887693&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/CHANGES.txt (original)
+++ lucene/java/branches/lucene_2_9/contrib/CHANGES.txt Sun Dec  6 15:24:48 2009
@@ -2,6 +2,19 @@
 
 ======================= 2.9 branch (not yet released) =======================
 
+API Changes
+
+ * LUCENE-2108: Add SpellChecker.close, to close the underlying
+   reader.  (Eirik Bjørsnøs via Mike McCandless)
+
+New features
+
+ * LUCENE-2108: Spellchecker now safely supports concurrent modifications to
+   the spell-index. Threads can safely obtain term suggestions while the spell-
+   index is rebuild, cleared or reset. Internal IndexSearcher instances remain
+   open until the last thread accessing them releases the reference.
+   (Simon Willnauer)
+
 ======================= Release 2.9.1 2009-11-06 =======================
 
 Changes in backwards compatibility policy

Propchange: lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
            ('svn:mergeinfo' removed)

Propchange: lucene/java/branches/lucene_2_9/contrib/highlighter/src/test/
            ('svn:mergeinfo' removed)

Modified: lucene/java/branches/lucene_2_9/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java?rev=887693&r1=887692&r2=887693&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
(original)
+++ lucene/java/branches/lucene_2_9/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
Sun Dec  6 15:24:48 2009
@@ -17,6 +17,9 @@
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.util.Iterator;
+
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -25,15 +28,13 @@
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Hits;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 
-import java.io.IOException;
-import java.util.Iterator;
-
 /**
  * <p>
  *   Spell Checker class  (Main class) <br/>
@@ -60,10 +61,14 @@
    * Field name for each word in the ngram index.
    */
   public static final String F_WORD = "word";
+  
+  private static final Term F_WORD_TERM = new Term(F_WORD);
 
   /**
    * the spell index
    */
+  // don't modify the directory directly - see #swapSearcher()
+  // TODO: why is this package private?
   Directory spellIndex;
 
   /**
@@ -72,7 +77,22 @@
   private float bStart = 2.0f;
   private float bEnd = 1.0f;
 
+  // don't use this searcher directly - see #swapSearcher()
   private IndexSearcher searcher;
+  
+  /*
+   * this locks all modifications to the current searcher. 
+   */
+  private final Object searcherLock = new Object();
+  
+  /*
+   * this lock synchronizes all possible modifications to the 
+   * current index directory. It should not be possible to try modifying
+   * the same index concurrently. Note: Do not acquire the searcher lock
+   * before acquiring this lock! 
+   */
+  private final Object modifyCurrentIndexLock = new Object();
+  private volatile boolean closed = false;
 
   // minimum score for hits generated by the spell checker query
   private float minScore = 0.5f;
@@ -82,15 +102,24 @@
   /**
    * Use the given directory as a spell checker index. The directory
    * is created if it doesn't exist yet.
+   * @param spellIndex the spell index directory
+   * @param sd the {@link StringDistance} measurement to use 
+   * @throws IOException if Spellchecker can not open the directory
+   */
+  public SpellChecker(Directory spellIndex, StringDistance sd) throws IOException {
+    setSpellIndex(spellIndex);
+    setStringDistance(sd);
+  }
+  /**
+   * Use the given directory as a spell checker index with a
+   * {@link LevensteinDistance} as the default {@link StringDistance}. The
+   * directory is created if it doesn't exist yet.
    * 
    * @param spellIndex
+   *          the spell index directory
    * @throws IOException
+   *           if spellchecker can not open the directory
    */
-  public SpellChecker(Directory spellIndex,StringDistance sd) throws IOException {
-    this.setSpellIndex(spellIndex);
-    this.setStringDistance(sd);
-  }
-
   public SpellChecker(Directory spellIndex) throws IOException {
     this(spellIndex, new LevensteinDistance());
   }
@@ -99,27 +128,41 @@
    * Use a different index as the spell checker index or re-open
    * the existing index if <code>spellIndex</code> is the same value
    * as given in the constructor.
-   * 
-   * @param spellIndex
-   * @throws IOException
-   */
-  public void setSpellIndex(Directory spellIndex) throws IOException {
-    this.spellIndex = spellIndex;
-    if (!IndexReader.indexExists(spellIndex)) {
-        IndexWriter writer = new IndexWriter(spellIndex, null, true);
-        writer.close();
-    }
-    // close the old searcher, if there was one
-    if (searcher != null) {
-      searcher.close();
+   * @param spellIndexDir the spell directory to use
+   * @throws AlreadyClosedException if the Spellchecker is already closed
+   * @throws  IOException if spellchecker can not open the directory
+   */
+  // TODO: we should make this final as it is called in the constructor
+  public void setSpellIndex(Directory spellIndexDir) throws IOException {
+    // this could be the same directory as the current spellIndex
+    // modifications to the directory should be synchronized 
+    synchronized (modifyCurrentIndexLock) {
+      ensureOpen();
+      if (!IndexReader.indexExists(spellIndexDir)) {
+          IndexWriter writer = new IndexWriter(spellIndexDir, null, true,
+              IndexWriter.MaxFieldLength.UNLIMITED);
+          writer.close();
+      }
+      swapSearcher(spellIndexDir);
     }
-    searcher = new IndexSearcher(this.spellIndex);
   }
-  
+  /**
+   * Sets the {@link StringDistance} implementation for this
+   * {@link SpellChecker} instance.
+   * 
+   * @param sd the {@link StringDistance} implementation for this
+   * {@link SpellChecker} instance
+   */
   public void setStringDistance(StringDistance sd) {
     this.sd = sd;
   }
-
+  /**
+   * Returns the {@link StringDistance} instance used by this
+   * {@link SpellChecker} instance.
+   * 
+   * @return the {@link StringDistance} instance used by this
+   *         {@link SpellChecker} instance.
+   */
   public StringDistance getStringDistance() {
     return sd;
   }
@@ -144,7 +187,8 @@
    *
    * @param word the word you want a spell check done on
    * @param numSug the number of suggested words
-   * @throws IOException
+   * @throws IOException if the underlying index throws an {@link IOException}
+   * @throws AlreadyClosedException if the Spellchecker is already closed
    * @return String[]
    */
   public String[] suggestSimilar(String word, int numSug) throws IOException {
@@ -169,96 +213,104 @@
    * words are restricted to the words present in this field.
    * @param morePopular return only the suggest words that are as frequent or more frequent
than the searched word
    * (only if restricted mode = (indexReader!=null and field!=null)
-   * @throws IOException
+   * @throws IOException if the underlying index throws an {@link IOException}
+   * @throws AlreadyClosedException if the Spellchecker is already closed
    * @return String[] the sorted list of the suggest words with these 2 criteria:
    * first criteria: the edit distance, second criteria (only if restricted mode): the popularity
    * of the suggest words in the field of the user index
    */
   public String[] suggestSimilar(String word, int numSug, IndexReader ir,
       String field, boolean morePopular) throws IOException {
-
-    float min = this.minScore;
-    final int lengthWord = word.length();
-
-    final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word))
: 0;
-    final int goalFreq = (morePopular && ir != null && field != null) ? freq
: 0;
-    // if the word exists in the real index and we don't care for word frequency, return
the word itself
-    if (!morePopular && freq > 0) {
-      return new String[] { word };
-    }
-
-    BooleanQuery query = new BooleanQuery();
-    String[] grams;
-    String key;
-
-    for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++) {
-
-      key = "gram" + ng; // form key
-
-      grams = formGrams(word, ng); // form word into ngrams (allow dups too)
-
-      if (grams.length == 0) {
-        continue; // hmm
-      }
-
-      if (bStart > 0) { // should we boost prefixes?
-        add(query, "start" + ng, grams[0], bStart); // matches start of word
-
-      }
-      if (bEnd > 0) { // should we boost suffixes
-        add(query, "end" + ng, grams[grams.length - 1], bEnd); // matches end of word
-
-      }
-      for (int i = 0; i < grams.length; i++) {
-        add(query, key, grams[i]);
-      }
-    }
-
-//    System.out.println("Q: " + query);
-    Hits hits = searcher.search(query);
-//    System.out.println("HITS: " + hits.length());
-    SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
-
-    // go thru more than 'maxr' matches in case the distance filter triggers
-    int stop = Math.min(hits.length(), 10 * numSug);
-    SuggestWord sugWord = new SuggestWord();
-    for (int i = 0; i < stop; i++) {
-
-      sugWord.string = hits.doc(i).get(F_WORD); // get orig word
-
-      // don't suggest a word for itself, that would be silly
-      if (sugWord.string.equals(word)) {
-        continue;
+    // obtainSearcher calls ensureOpen
+    final IndexSearcher indexSearcher = obtainSearcher();
+    try{
+      float min = this.minScore;
+      final int lengthWord = word.length();
+  
+      final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field,
word)) : 0;
+      final int goalFreq = (morePopular && ir != null && field != null) ?
freq : 0;
+      // if the word exists in the real index and we don't care for word frequency, return
the word itself
+      if (!morePopular && freq > 0) {
+        return new String[] { word };
       }
-
-      // edit distance
-      sugWord.score = sd.getDistance(word,sugWord.string);
-      if (sugWord.score < min) {
-        continue;
+  
+      BooleanQuery query = new BooleanQuery();
+      String[] grams;
+      String key;
+  
+      for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++) {
+  
+        key = "gram" + ng; // form key
+  
+        grams = formGrams(word, ng); // form word into ngrams (allow dups too)
+  
+        if (grams.length == 0) {
+          continue; // hmm
+        }
+  
+        if (bStart > 0) { // should we boost prefixes?
+          add(query, "start" + ng, grams[0], bStart); // matches start of word
+  
+        }
+        if (bEnd > 0) { // should we boost suffixes
+          add(query, "end" + ng, grams[grams.length - 1], bEnd); // matches end of word
+  
+        }
+        for (int i = 0; i < grams.length; i++) {
+          add(query, key, grams[i]);
+        }
       }
-
-      if (ir != null && field != null) { // use the user index
-        sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index
-        // don't suggest a word that is not present in the field
-        if ((morePopular && goalFreq > sugWord.freq) || sugWord.freq < 1) {
+  
+      int maxHits = 10 * numSug;
+      
+  //    System.out.println("Q: " + query);
+      ScoreDoc[] hits = indexSearcher.search(query, null, maxHits).scoreDocs;
+  //    System.out.println("HITS: " + hits.length());
+      SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
+  
+      // go thru more than 'maxr' matches in case the distance filter triggers
+      int stop = Math.min(hits.length, maxHits);
+      SuggestWord sugWord = new SuggestWord();
+      for (int i = 0; i < stop; i++) {
+  
+        sugWord.string = indexSearcher.doc(hits[i].doc).get(F_WORD); // get orig word
+  
+        // don't suggest a word for itself, that would be silly
+        if (sugWord.string.equals(word)) {
           continue;
         }
+  
+        // edit distance
+        sugWord.score = sd.getDistance(word,sugWord.string);
+        if (sugWord.score < min) {
+          continue;
+        }
+  
+        if (ir != null && field != null) { // use the user index
+          sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index
+          // don't suggest a word that is not present in the field
+          if ((morePopular && goalFreq > sugWord.freq) || sugWord.freq < 1)
{
+            continue;
+          }
+        }
+        sugQueue.insertWithOverflow(sugWord);
+        if (sugQueue.size() == numSug) {
+          // if queue full, maintain the minScore score
+          min = ((SuggestWord)sugQueue.top()).score;
+        }
+        sugWord = new SuggestWord();
       }
-      sugQueue.insert(sugWord);
-      if (sugQueue.size() == numSug) {
-        // if queue full, maintain the minScore score
-        min = ((SuggestWord) sugQueue.top()).score;
+  
+      // convert to array string
+      String[] list = new String[sugQueue.size()];
+      for (int i = sugQueue.size() - 1; i >= 0; i--) {
+        list[i] = ((SuggestWord)sugQueue.pop()).string;
       }
-      sugWord = new SuggestWord();
-    }
-
-    // convert to array string
-    String[] list = new String[sugQueue.size()];
-    for (int i = sugQueue.size() - 1; i >= 0; i--) {
-      list[i] = ((SuggestWord) sugQueue.pop()).string;
+  
+      return list;
+    } finally {
+      releaseSearcher(indexSearcher);
     }
-
-    return list;
   }
 
   /**
@@ -295,24 +347,33 @@
   /**
    * Removes all terms from the spell check index.
    * @throws IOException
+   * @throws AlreadyClosedException if the Spellchecker is already closed
    */
   public void clearIndex() throws IOException {
-    IndexWriter writer = new IndexWriter(spellIndex, null, true);
-    writer.close();
-    
-    //close the old searcher
-    searcher.close();
-    searcher = new IndexSearcher(this.spellIndex);
+    synchronized (modifyCurrentIndexLock) {
+      ensureOpen();
+      final Directory dir = this.spellIndex;
+      final IndexWriter writer = new IndexWriter(dir, null, true, IndexWriter.MaxFieldLength.UNLIMITED);
+      writer.close();
+      swapSearcher(dir);
+    }
   }
 
   /**
    * Check whether the word exists in the index.
    * @param word
    * @throws IOException
-   * @return true iff the word exists in the index
+   * @throws AlreadyClosedException if the Spellchecker is already closed
+   * @return true if the word exists in the index
    */
   public boolean exist(String word) throws IOException {
-    return searcher.docFreq(new Term(F_WORD, word)) > 0;
+    // obtainSearcher calls ensureOpen
+    final IndexSearcher indexSearcher = obtainSearcher();
+    try{
+      return indexSearcher.docFreq(F_WORD_TERM.createTerm(word)) > 0;
+    } finally {
+      releaseSearcher(indexSearcher);
+    }
   }
 
   /**
@@ -320,37 +381,42 @@
    * @param dict Dictionary to index
    * @param mergeFactor mergeFactor to use when indexing
    * @param ramMB the max amount or memory in MB to use
+   * @throws AlreadyClosedException if the Spellchecker is already closed
    * @throws IOException
    */
   public void indexDictionary(Dictionary dict, int mergeFactor, int ramMB) throws IOException
{
-    IndexWriter writer = new IndexWriter(spellIndex, true, new WhitespaceAnalyzer());
-    writer.setMergeFactor(mergeFactor);
-    writer.setRAMBufferSizeMB(ramMB);
-
-    Iterator iter = dict.getWordsIterator();
-    while (iter.hasNext()) {
-      String word = (String) iter.next();
-
-      int len = word.length();
-      if (len < 3) {
-        continue; // too short we bail but "too long" is fine...
-      }
-
-      if (this.exist(word)) { // if the word already exist in the gramindex
-        continue;
-      }
-
-      // ok index the word
-      Document doc = createDocument(word, getMin(len), getMax(len));
-      writer.addDocument(doc);
-    }
-    // close writer
-    writer.optimize();
-    writer.close();
-    // also re-open the spell index to see our own changes when the next suggestion
-    // is fetched:
-    searcher.close();
-    searcher = new IndexSearcher(this.spellIndex);
+    synchronized (modifyCurrentIndexLock) {
+      ensureOpen();
+      final Directory dir = this.spellIndex;
+      final IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(),
+          IndexWriter.MaxFieldLength.UNLIMITED);
+      writer.setMergeFactor(mergeFactor);
+      writer.setRAMBufferSizeMB(ramMB);
+  
+      Iterator iter = dict.getWordsIterator();
+      while (iter.hasNext()) {
+        String word = (String)iter.next();
+  
+        int len = word.length();
+        if (len < 3) {
+          continue; // too short we bail but "too long" is fine...
+        }
+  
+        if (this.exist(word)) { // if the word already exist in the gramindex
+          continue;
+        }
+  
+        // ok index the word
+        Document doc = createDocument(word, getMin(len), getMax(len));
+        writer.addDocument(doc);
+      }
+      // close writer
+      writer.optimize();
+      writer.close();
+      // also re-open the spell index to see our own changes when the next suggestion
+      // is fetched:
+      swapSearcher(dir);
+    }
   }
 
   /**
@@ -362,7 +428,7 @@
     indexDictionary(dict, 300, 10);
   }
 
-  private int getMin(int l) {
+  private static int getMin(int l) {
     if (l > 5) {
       return 3;
     }
@@ -372,7 +438,7 @@
     return 1;
   }
 
-  private int getMax(int l) {
+  private static int getMax(int l) {
     if (l > 5) {
       return 4;
     }
@@ -407,4 +473,84 @@
       }
     }
   }
+  
+  private IndexSearcher obtainSearcher() {
+    synchronized (searcherLock) {
+      ensureOpen();
+      searcher.getIndexReader().incRef();
+      return searcher;
+    }
+  }
+  
+  private void releaseSearcher(final IndexSearcher aSearcher) throws IOException{
+      // don't check if open - always decRef 
+      // don't decrement the private searcher - could have been swapped
+      aSearcher.getIndexReader().decRef();      
+  }
+  
+  private void ensureOpen() {
+    if (closed) {
+      throw new AlreadyClosedException("Spellchecker has been closed");
+    }
+  }
+  
+  /**
+   * Close the IndexSearcher used by this SpellChecker
+   * @throws IOException if the close operation causes an {@link IOException}
+   * @throws AlreadyClosedException if the {@link SpellChecker} is already closed
+   */
+  public void close() throws IOException {
+    synchronized (searcherLock) {
+      ensureOpen();
+      closed = true;
+      if (searcher != null) {
+        searcher.close();
+      }
+      searcher = null;
+    }
+  }
+  
+  private void swapSearcher(final Directory dir) throws IOException {
+    /*
+     * opening a searcher is possibly very expensive.
+     * We rather close it again if the Spellchecker was closed during
+     * this operation than block access to the current searcher while opening.
+     */
+    final IndexSearcher indexSearcher = createSearcher(dir);
+    synchronized (searcherLock) {
+      if(closed){
+        indexSearcher.close();
+        throw new AlreadyClosedException("Spellchecker has been closed");
+      }
+      if (searcher != null) {
+        searcher.close();
+      }
+      // set the spellindex in the sync block - ensure consistency.
+      searcher = indexSearcher;
+      this.spellIndex = dir;
+    }
+  }
+  
+  /**
+   * Creates a new read-only IndexSearcher 
+   * @param dir the directory used to open the searcher
+   * @return a new read-only IndexSearcher
+   * @throws IOException f there is a low-level IO error
+   */
+  // for testing purposes
+  IndexSearcher createSearcher(final Directory dir) throws IOException{
+    return new IndexSearcher(dir, true);
+  }
+  
+  /**
+   * Returns <code>true</code> if and only if the {@link SpellChecker} is
+   * closed, otherwise <code>false</code>.
+   * 
+   * @return <code>true</code> if and only if the {@link SpellChecker} is
+   *         closed, otherwise <code>false</code>.
+   */
+  boolean isClosed(){
+    return closed;
+  }
+  
 }

Modified: lucene/java/branches/lucene_2_9/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java?rev=887693&r1=887692&r2=887693&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
(original)
+++ lucene/java/branches/lucene_2_9/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
Sun Dec  6 15:24:48 2009
@@ -18,8 +18,10 @@
  */
 
 import java.io.IOException;
-
-import junit.framework.TestCase;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
 
 import org.apache.lucene.analysis.SimpleAnalyzer;
 import org.apache.lucene.document.Document;
@@ -27,9 +29,12 @@
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.English;
+import org.apache.lucene.util.LuceneTestCase;
 
 
 /**
@@ -37,16 +42,18 @@
  *
  *
  */
-public class TestSpellChecker extends TestCase {
-  private SpellChecker spellChecker;
+public class TestSpellChecker extends LuceneTestCase {
+  private SpellCheckerMock spellChecker;
   private Directory userindex, spellindex;
+  private final Random random = newRandom();
+  private List searchers;
 
   protected void setUp() throws Exception {
     super.setUp();
     
     //create a user index
     userindex = new RAMDirectory();
-    IndexWriter writer = new IndexWriter(userindex, new SimpleAnalyzer(), true);
+    IndexWriter writer = new IndexWriter(userindex, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
 
     for (int i = 0; i < 1000; i++) {
       Document doc = new Document();
@@ -55,15 +62,15 @@
       writer.addDocument(doc);
     }
     writer.close();
-
+    searchers = Collections.synchronizedList(new ArrayList());
     // create the spellChecker
     spellindex = new RAMDirectory();
-    spellChecker = new SpellChecker(spellindex);
+    spellChecker = new SpellCheckerMock(spellindex);
   }
 
 
   public void testBuild() throws CorruptIndexException, IOException {
-    IndexReader r = IndexReader.open(userindex);
+    IndexReader r = IndexReader.open(userindex, true);
 
     spellChecker.clearIndex();
 
@@ -74,7 +81,9 @@
     int num_field2 = this.numdoc();
 
     assertEquals(num_field2, num_field1 + 1);
-
+    
+    assertLastSearcherOpen(4);
+    
     checkCommonSuggestions(r);
     checkLevenshteinSuggestions(r);
     
@@ -192,7 +201,7 @@
   }
 
   private int numdoc() throws IOException {
-    IndexReader rs = IndexReader.open(spellindex);
+    IndexReader rs = IndexReader.open(spellindex, true);
     int num = rs.numDocs();
     assertTrue(num != 0);
     //System.out.println("num docs: " + num);
@@ -200,4 +209,198 @@
     return num;
   }
   
+  public void testClose() throws IOException {
+    IndexReader r = IndexReader.open(userindex, true);
+    spellChecker.clearIndex();
+    String field = "field1";
+    addwords(r, "field1");
+    int num_field1 = this.numdoc();
+    addwords(r, "field2");
+    int num_field2 = this.numdoc();
+    assertEquals(num_field2, num_field1 + 1);
+    checkCommonSuggestions(r);
+    assertLastSearcherOpen(4);
+    spellChecker.close();
+    assertSearchersClosed();
+    try {
+      spellChecker.close();
+      fail("spellchecker was already closed");
+    } catch (AlreadyClosedException e) {
+      // expected
+    }
+    try {
+      checkCommonSuggestions(r);
+      fail("spellchecker was already closed");
+    } catch (AlreadyClosedException e) {
+      // expected
+    }
+    
+    try {
+      spellChecker.clearIndex();
+      fail("spellchecker was already closed");
+    } catch (AlreadyClosedException e) {
+      // expected
+    }
+    
+    try {
+      spellChecker.indexDictionary(new LuceneDictionary(r, field));
+      fail("spellchecker was already closed");
+    } catch (AlreadyClosedException e) {
+      // expected
+    }
+    
+    try {
+      spellChecker.setSpellIndex(spellindex);
+      fail("spellchecker was already closed");
+    } catch (AlreadyClosedException e) {
+      // expected
+    }
+    assertEquals(4, searchers.size());
+    assertSearchersClosed();
+  }
+  
+  /*
+   * tests if the internally shared indexsearcher is correctly closed 
+   * when the spellchecker is concurrently accessed and closed.
+   */
+  public void testConcurrentAccess() throws IOException, InterruptedException {
+    assertEquals(1, searchers.size());
+    final IndexReader r = IndexReader.open(userindex, true);
+    spellChecker.clearIndex();
+    assertEquals(2, searchers.size());
+    addwords(r, "field1");
+    assertEquals(3, searchers.size());
+    int num_field1 = this.numdoc();
+    addwords(r, "field2");
+    assertEquals(4, searchers.size());
+    int num_field2 = this.numdoc();
+    assertEquals(num_field2, num_field1 + 1);
+    int numThreads = 5 + this.random.nextInt(5);
+    SpellCheckWorker[] workers = new SpellCheckWorker[numThreads];
+    for (int i = 0; i < numThreads; i++) {
+      SpellCheckWorker spellCheckWorker = new SpellCheckWorker(r);
+      spellCheckWorker.start();
+      workers[i] = spellCheckWorker;
+      
+    }
+    int iterations = 5 + random.nextInt(5);
+    for (int i = 0; i < iterations; i++) {
+      Thread.sleep(100);
+      // concurrently reset the spell index
+      spellChecker.setSpellIndex(this.spellindex);
+      // for debug - prints the internal open searchers 
+      // showSearchersOpen();
+    }
+    
+    spellChecker.close();
+    joinAll(workers, 5000);
+    
+    for (int i = 0; i < workers.length; i++) {
+      assertFalse(workers[i].failed);
+      assertTrue(workers[i].terminated);
+    }
+    // 4 searchers more than iterations
+    // 1. at creation
+    // 2. clearIndex()
+    // 2. and 3. during addwords
+    assertEquals(iterations + 4, searchers.size());
+    assertSearchersClosed();
+    
+  }
+  private void joinAll(SpellCheckWorker[] workers, long timeout)
+      throws InterruptedException {
+    for (int j = 0; j < workers.length; j++) {
+      final long time = System.currentTimeMillis();
+      if (timeout < 0) {
+        // this could be helpful if it fails one day
+        System.err.println("Warning: " + (workers.length - j)
+            + " threads have not joined but joinall timed out");
+        break;
+      }
+      workers[j].join(timeout);
+      timeout -= System.currentTimeMillis() - time;
+    }
+  }
+  
+  private void assertLastSearcherOpen(int numSearchers) {
+    assertEquals(numSearchers, searchers.size());
+    Object[] searcherArray = searchers.toArray();
+    for (int i = 0; i < searcherArray.length; i++) {
+      if (i == searcherArray.length - 1) {
+        assertTrue("expected last searcher open but was closed",
+            ((IndexSearcher)searcherArray[i]).getIndexReader().getRefCount() > 0);
+      } else {
+        assertFalse("expected closed searcher but was open - Index: " + i,
+            ((IndexSearcher)searcherArray[i]).getIndexReader().getRefCount() > 0);
+      }
+    }
+  }
+  
+  private void assertSearchersClosed() {
+    Object[] searcherArray =  searchers.toArray();
+    for (int i = 0; i < searcherArray.length; i++) {
+      assertEquals(0, ((IndexSearcher)searcherArray[i]).getIndexReader().getRefCount());

+    }
+  }
+  
+  private void showSearchersOpen() {
+    int count = 0;
+    Object[] searcherArray = searchers.toArray();
+    for (int i = 0; i < searcherArray.length; i++) {
+      if(((IndexSearcher)searcherArray[i]).getIndexReader().getRefCount() > 0)
+        ++count;
+    } 
+    System.out.println(count);
+  }
+
+  
+  private class SpellCheckWorker extends Thread {
+    private final IndexReader reader;
+    boolean terminated = false;
+    boolean failed = false;
+    
+    SpellCheckWorker(IndexReader reader) {
+      super();
+      this.reader = reader;
+    }
+    
+    public void run() {
+      try {
+        while (true) {
+          try {
+            checkCommonSuggestions(reader);
+          } catch (AlreadyClosedException e) {
+            
+            return;
+          } catch (Throwable e) {
+            
+            e.printStackTrace();
+            failed = true;
+            return;
+          }
+        }
+      } finally {
+        terminated = true;
+      }
+    }
+    
+  }
+  
+  class SpellCheckerMock extends SpellChecker {
+    public SpellCheckerMock(Directory spellIndex) throws IOException {
+      super(spellIndex);
+    }
+
+    public SpellCheckerMock(Directory spellIndex, StringDistance sd)
+        throws IOException {
+      super(spellIndex, sd);
+    }
+
+    IndexSearcher createSearcher(Directory dir) throws IOException {
+      IndexSearcher searcher = super.createSearcher(dir);
+      TestSpellChecker.this.searchers.add(searcher);
+      return searcher;
+    }
+  }
+  
 }

Propchange: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Dec  6 15:24:48 2009
@@ -1 +1 @@
-/lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:886257
+/lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:886257,887532

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Dec  6 15:24:48 2009
@@ -1,3 +1,3 @@
 /lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:748824
 /lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:886275
-/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:818920,824125,826029,826385,830871,833095,833297,833886,882672,883554,884870
+/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:818920,824125,826029,826385,830871,833095,833297,833886,882672,883554,884870,887532

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Dec  6 15:24:48 2009
@@ -1 +1 @@
-/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:886257
+/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:886257,887532

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestDateTools.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Dec  6 15:24:48 2009
@@ -1 +1 @@
-/lucene/java/trunk/src/test/org/apache/lucene/document/TestDateTools.java:886257
+/lucene/java/trunk/src/test/org/apache/lucene/document/TestDateTools.java:886257,887532

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestNumberTools.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Dec  6 15:24:48 2009
@@ -1 +1 @@
-/lucene/java/trunk/src/test/org/apache/lucene/document/TestNumberTools.java:886257
+/lucene/java/trunk/src/test/org/apache/lucene/document/TestNumberTools.java:886257,887532

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Dec  6 15:24:48 2009
@@ -1 +1 @@
-/lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:886257
+/lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:886257,887532

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/util/TestAttributeSource.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Dec  6 15:24:48 2009
@@ -1 +1 @@
-/lucene/java/trunk/src/test/org/apache/lucene/util/TestAttributeSource.java:886257
+/lucene/java/trunk/src/test/org/apache/lucene/util/TestAttributeSource.java:886257,887532



Mime
View raw message