lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dor...@apache.org
Subject svn commit: r627101 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/search/TimeLimitedCollector.java src/test/org/apache/lucene/search/TestTimeLimitedCollector.java
Date Tue, 12 Feb 2008 20:55:34 GMT
Author: doronc
Date: Tue Feb 12 12:55:32 2008
New Revision: 627101

URL: http://svn.apache.org/viewvc?rev=627101&view=rev
Log:
LUCENE-997: Add search timeout (partial) support.

Added:
    lucene/java/trunk/src/java/org/apache/lucene/search/TimeLimitedCollector.java   (with
props)
    lucene/java/trunk/src/test/org/apache/lucene/search/TestTimeLimitedCollector.java   (with
props)
Modified:
    lucene/java/trunk/CHANGES.txt

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=627101&r1=627100&r2=627101&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue Feb 12 12:55:32 2008
@@ -99,6 +99,13 @@
     with cached writes, the index remains consistent.  Also added
     explicit commit() method to IndexWriter to force a commit without
     having to close.  (Mike McCandless)
+    
+ 7. LUCENE-997: Add search timeout (partial) support.
+    A TimeLimitedCollector was added to allow limiting search time.
+    It is a partial solution since timeout is checked only when 
+    collecting a hit, and therefore a search for rare words in a 
+    huge index might not stop within the specified time.
+    (Sean Timm via Doron Cohen) 
 
 Optimizations
 

Added: lucene/java/trunk/src/java/org/apache/lucene/search/TimeLimitedCollector.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/TimeLimitedCollector.java?rev=627101&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/TimeLimitedCollector.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/TimeLimitedCollector.java Tue Feb
12 12:55:32 2008
@@ -0,0 +1,181 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <p>The TimeLimitedCollector is used to timeout search requests that
+ * take longer than the maximum allowed search time limit.  After this
+ * time is exceeded, the search thread is stopped by throwing a
+ * TimeExceeded Exception.</p>
+ */
+public class TimeLimitedCollector extends HitCollector {
+  
+  /** 
+   * Default timer resolution.
+   * @see #setResolution(long) 
+   */
+  public static final int DEFAULT_RESOLUTION = 20;
+
+  private static long resolution = DEFAULT_RESOLUTION; 
+
+  private static class TimerThread extends Thread  {
+
+    // NOTE: we can avoid explicit synchronization here for several reasons:
+    // * updates to volatile long variables are atomic
+    // * only single thread modifies this value
+    // * use of volatile keyword ensures that it does not reside in
+    //   a register, but in main memory (so that changes are visible to
+    //   other threads).
+    // * visibility of changes does not need to be instantanous, we can
+    //   afford losing a tick or two.
+    //
+    // See section 17 of the Java Language Specification for details.
+    private volatile long time = 0;
+
+    /**
+     * TimerThread provides a pseudo-clock service to all searching
+     * threads, so that they can count elapsed time with less overhead
+     * than repeatedly calling System.currentTimeMillis.  A single
+     * thread should be created to be used for all searches.
+     */
+    private TimerThread() {
+      super("TimeLimitedCollector timer thread");
+      this.setDaemon( true );
+    }
+
+    public void run() {
+      boolean interrupted = false;
+      try {
+        while( true ) {
+          // TODO: Use System.nanoTime() when Lucene moves to Java SE 5.
+          time += resolution;
+          try {
+            Thread.sleep( resolution );
+          } catch( final InterruptedException e ) {
+            interrupted = true;
+          }
+        }
+      }
+      finally {
+        if( interrupted ) {
+          Thread.currentThread().interrupt();
+        }
+      }
+    }
+
+    /**
+     * Get the timer value in milliseconds.
+     */
+    public long getMilliseconds() {
+      return time;
+    }
+  }
+
+  /**
+   * Thrown when elapsed search time exceeds allowed search time. 
+   */
+  public static class TimeExceededException extends RuntimeException {
+    private long timeAllowed;
+    private long timeElapsed;
+    private int lastDocCollected;
+    private TimeExceededException(long timeAllowed, long timeElapsed, int lastDocCollected)
{
+      super("Elapsed time: " + timeElapsed + "Exceeded allowed search time: " + timeAllowed
+ " ms.");
+      this.timeAllowed = timeAllowed;
+      this.timeElapsed = timeElapsed;
+      this.lastDocCollected = lastDocCollected;
+    }
+    /**
+     * Returns allowed time (milliseconds).
+     */
+    public long getTimeAllowed() {
+      return timeAllowed;
+    }
+    /**
+     * Returns elapsed time (milliseconds).
+     */
+    public long getTimeElapsed() {
+      return timeElapsed;
+    }
+    /**
+     * Returns last doc that was collected when the search time exceeded.  
+     */
+    public int getLastDocCollected() {
+      return lastDocCollected;
+    }
+  }
+
+  // Declare and initialize a single static timer thread to be used by
+  // all TimeLimitedCollector instances.  The JVM assures that
+  // this only happens once.
+  private final static TimerThread TIMER_THREAD = new TimerThread();
+  
+  static  {
+    TIMER_THREAD.start();
+  }
+
+  private final long t0;
+  private final long timeout;
+  private final HitCollector hc;
+
+  public TimeLimitedCollector( final HitCollector hc, final long timeAllowed ) {
+    this.hc = hc;
+    t0 = TIMER_THREAD.getMilliseconds();
+    this.timeout = t0 + timeAllowed;
+  }
+
+  /**
+   * Calls collect() on the decorated HitCollector.
+   * 
+   * @throws TimeExceededException if the time allowed has been exceeded.
+   */
+  public void collect( final int doc, final float score ) {
+    long time = TIMER_THREAD.getMilliseconds();
+    if( timeout < time) {
+      //System.out.println(this+"  failing on:  "+doc+"  "+(time-t0));
+      throw new TimeExceededException( timeout-t0, time-t0, doc );
+    }
+    //System.out.println(this+"  collecting: "+doc+"  "+(time-t0));
+    hc.collect( doc, score );
+  }
+
+  /** 
+   * Return the timer resolution.
+   * @see #setResolution(long)
+   */
+  public static long getResolution() {
+    return resolution;
+  }
+
+  /**
+   * Set the timer resolution.
+   * The default timer resolution is 20 milliseconds. 
+   * This means that a search required to take no longer than 
+   * 800 milliseconds may be stopped after 780 to 820 milliseconds.
+   * <br>Note that: 
+   * <ul>
+   * <li>Finer (smaller) resolution is more accurate but less efficient.</li>
+   * <li>Setting resolution to less than 5 milliseconds will be silently modified to
5 milliseconds.</li>
+   * <li>Setting resolution smaller than current resolution might take effect only
after current 
+   * resolution. (Assume current resolution of 20 milliseconds is modified to 5 milliseconds,

+   * then it can take up to 20 milliseconds for the change to have effect.</li>
+   * </ul>      
+   */
+  public static void setResolution(long newResolution) {
+    resolution = Math.max(newResolution,5); // 5 milliseconds is about the minimum reasonable
time for a Object.wait(long) call.
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/TimeLimitedCollector.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/TimeLimitedCollector.java
------------------------------------------------------------------------------
    svn:executable = *

Added: lucene/java/trunk/src/test/org/apache/lucene/search/TestTimeLimitedCollector.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestTimeLimitedCollector.java?rev=627101&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestTimeLimitedCollector.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestTimeLimitedCollector.java Tue
Feb 12 12:55:32 2008
@@ -0,0 +1,264 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+
+import java.io.IOException;
+import java.util.BitSet;
+
+/**
+ * Tests the TimeLimitedCollector.  This test checks (1) search
+ * correctness (regardless of timeout), (2) expected timeout behavior,
+ * and (3) a sanity test with multiple searching threads.
+ */
+public class TestTimeLimitedCollector extends LuceneTestCase {
+  private static final int SLOW_DOWN = 10;
+  private static final int N_DOCS = 2000;
+  private static final int N_THREADS = 50;
+
+  private Searcher searcher;
+  private final String FIELD_NAME = "body";
+  private Query query;
+
+  public TestTimeLimitedCollector(String name) {
+    super(name);
+  }
+
+  /**
+   * initializes searcher with a document set
+   */
+  protected void setUp() throws Exception {
+    final String docText[] = {
+        "one blah three",
+        "one foo three multiOne",
+        "one foobar three multiThree",
+        "blueberry pancakes",
+        "blueberry pie",
+        "blueberry strudel",
+        "blueberry pizza",
+    };
+    Directory directory = new RAMDirectory();
+    IndexWriter iw = new IndexWriter(directory, new StandardAnalyzer(), true, MaxFieldLength.UNLIMITED);
+    
+    for (int i=0; i<N_DOCS; i++) {
+      add(docText[i%docText.length], iw);
+    }
+    iw.close();
+    searcher = new IndexSearcher(directory);
+
+    String qtxt = "one";
+    for (int i = 0; i < docText.length; i++) {
+      qtxt += ' ' + docText[i]; // large query so that search will be longer
+    }
+    QueryParser queryParser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
+    query = queryParser.parse(qtxt);
+
+  }
+
+  public void tearDown() throws Exception {
+    searcher.close();
+  }
+
+  private void add(String value, IndexWriter iw) throws IOException {
+    Document d = new Document();
+    d.add(new Field(FIELD_NAME, value, Field.Store.NO, Field.Index.TOKENIZED));
+    iw.addDocument(d);
+  }
+
+  private void search(HitCollector collector) throws Exception {
+    searcher.search(query, collector);
+  }
+
+  /**
+   * test search correctness with no timeout
+   */
+  public void testSearch() {
+    doTestSearch();
+  }
+  
+  private void doTestSearch() {
+    int totalResults = 0;
+    int totalTLCResults = 0;
+    try {
+      MyHitCollector myHc = new MyHitCollector();
+      search(myHc);
+      totalResults = myHc.hitCount();
+      
+      myHc = new MyHitCollector();
+      HitCollector tlCollector = new TimeLimitedCollector(myHc, 3600000); // 1 hour
+      search(tlCollector);
+      totalTLCResults = myHc.hitCount();
+    } catch (Exception e) {
+      assertTrue("Unexpected exception: "+e, false); //==fail
+    }
+    assertEquals( "Wrong number of results!", totalResults, totalTLCResults );
+  }
+
+  /**
+   * Test that timeout is obtained, and soon enough!
+   */
+  public void testTimeout() {
+    doTestTimeout(false);
+  }
+  
+  private void doTestTimeout(boolean multiThreaded) {
+    MyHitCollector myHc = new MyHitCollector();
+    myHc.setSlowDown(SLOW_DOWN);
+    long timeAllowed = timeAllowed(multiThreaded);
+    HitCollector tlCollector = new TimeLimitedCollector(myHc, timeAllowed);
+
+    TimeLimitedCollector.TimeExceededException exception = null;
+    try {
+      search(tlCollector);
+    } catch (TimeLimitedCollector.TimeExceededException x) {
+      exception = x;
+    } catch (Exception e) {
+      assertTrue("Unexpected exception: "+e, false); //==fail
+    }
+    assertNotNull( "Timeout expected!", exception );
+    assertTrue( "no hits found!", myHc.hitCount() > 0 );
+    assertTrue( "last doc collected cannot be 0!", exception.getLastDocCollected() > 0
);
+    assertEquals( exception.getTimeAllowed(), timeAllowed);
+    assertTrue ( "elapsed="+exception.getTimeElapsed()+" <= (allowed-resolution)="+(timeAllowed-TimeLimitedCollector.getResolution()),
+        exception.getTimeElapsed() > timeAllowed-TimeLimitedCollector.getResolution());
+    assertTrue ( "lastDoc="+exception.getLastDocCollected()+" ,&& elapsed="+exception.getTimeElapsed()+
+        " >= (allowed+resolution+slowdown)="+(timeAllowed+TimeLimitedCollector.getResolution()+SLOW_DOWN),
+        exception.getTimeElapsed() < timeAllowed+TimeLimitedCollector.getResolution()+SLOW_DOWN);
+  }
+
+  private long timeAllowed(boolean multiThreaded) {
+    if (!multiThreaded) {
+      return 2 * TimeLimitedCollector.getResolution() + SLOW_DOWN; // be on the safe side
with this test
+    }
+    return 3 * (TimeLimitedCollector.getResolution() + SLOW_DOWN); // even safer (avoid noise
in tests)
+  }
+
+  /**
+   * Test timeout behavior when resolution is modified. 
+   */
+  public void testModifyResolution() {
+    try {
+      // increase and test
+      long resolution = 20 * TimeLimitedCollector.DEFAULT_RESOLUTION; //400
+      TimeLimitedCollector.setResolution(resolution);
+      assertEquals(resolution, TimeLimitedCollector.getResolution());
+      doTestTimeout(false);
+      // decrease much and test
+      resolution = 5;
+      TimeLimitedCollector.setResolution(resolution);
+      assertEquals(resolution, TimeLimitedCollector.getResolution());
+      doTestTimeout(false);
+      // return to default and test
+      resolution = TimeLimitedCollector.DEFAULT_RESOLUTION;
+      TimeLimitedCollector.setResolution(resolution);
+      assertEquals(resolution, TimeLimitedCollector.getResolution());
+      doTestTimeout(false);
+    } finally {
+      TimeLimitedCollector.setResolution(TimeLimitedCollector.DEFAULT_RESOLUTION);
+    }
+  }
+  
+  /** 
+   * Test correctness with multiple searching threads.
+   */
+  public void testSearchMultiThreaded() {
+    doTestMultiThreads(false);
+  }
+
+  /** 
+   * Test correctness with multiple searching threads.
+   */
+  public void testTimeoutMultiThreaded() {
+    doTestMultiThreads(true);
+  }
+  
+  private void doTestMultiThreads(final boolean withTimeout) {
+    Thread [] threadArray = new Thread[N_THREADS];
+    final BitSet success = new BitSet(N_THREADS);
+    for( int i = 0; i < threadArray.length; ++i ) {
+      final int num = i;
+      threadArray[num] = new Thread() {
+          public void run() {
+            if (withTimeout) {
+              doTestTimeout(true);
+            } else {
+              doTestSearch();
+            }
+            success.set(num);
+          }
+      };
+    }
+    for( int i = 0; i < threadArray.length; ++i ) {
+      threadArray[i].start();
+    }
+    boolean interrupted = false;
+    for( int i = 0; i < threadArray.length; ++i ) {
+      try {
+        threadArray[i].join();
+      } catch (InterruptedException e) {
+        interrupted = true;
+      }
+    }
+    if (interrupted) {
+      Thread.currentThread().interrupt();
+    }
+    assertEquals(N_THREADS,success.cardinality());
+  }
+  
+  // counting hit collector that can slow down at collect().
+  private class MyHitCollector extends HitCollector
+  {
+    private final BitSet bits = new BitSet();
+    private int slowdown = 0;
+
+    /**
+     * amount of time to wait on each collect to simulate a long iteration
+     */
+    public void setSlowDown( int milliseconds ) {
+      slowdown = milliseconds;
+    }
+    
+    public void collect( final int doc, final float score ) {
+      if( slowdown > 0 ) {
+        try {
+          Thread.sleep(slowdown);
+        }
+        catch(InterruptedException x) {
+          System.out.println("caught " + x);
+        }
+      }
+      bits.set( doc );
+    }
+    
+    public int hitCount() {
+      return bits.cardinality();
+    }
+    
+  }
+
+}
+  

Propchange: lucene/java/trunk/src/test/org/apache/lucene/search/TestTimeLimitedCollector.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/trunk/src/test/org/apache/lucene/search/TestTimeLimitedCollector.java
------------------------------------------------------------------------------
    svn:executable = *



Mime
View raw message