lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jpou...@apache.org
Subject svn commit: r1719490 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/search/ lucene/core/src/java/org/apache/lucene/util/ lucene/core/src/test/org/apache/lucene/util/
Date Fri, 11 Dec 2015 18:43:38 GMT
Author: jpountz
Date: Fri Dec 11 18:43:38 2015
New Revision: 1719490

URL: http://svn.apache.org/viewvc?rev=1719490&view=rev
Log:
LUCENE-6815: Make DisjunctionScorer advance lazily.

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1719490&r1=1719489&r2=1719490&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Fri Dec 11 18:43:38 2015
@@ -34,6 +34,10 @@ Optimizations
 * LUCENE-6912: Grouping's Collectors now calculate a response to needsScores()
   instead of always 'true'. (David Smiley)
 
+* LUCENE-6815: DisjunctionScorer now advances two-phased iterators lazily,
+  stopping to evaluate them as soon as a single one matches. The other iterators
+  will be confirmed lazily when computing score() or freq(). (Adrien Grand)
+
 Bug Fixes
 
 * LUCENE-6918: LRUQueryCache.onDocIdSetEviction is only called when at least

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java?rev=1719490&r1=1719489&r2=1719490&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java
(original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java
Fri Dec 11 18:43:38 2015
@@ -27,6 +27,7 @@ public class DisiWrapper {
   public final DocIdSetIterator iterator;
   public final Scorer scorer;
   public final long cost;
+  public final float matchCost; // the match cost for two-phase iterators, 0 otherwise
   public int doc; // the current doc, used for comparison
   public DisiWrapper next; // reference to a next element, see #topList
 
@@ -52,8 +53,10 @@ public class DisiWrapper {
       
     if (twoPhaseView != null) {
       approximation = twoPhaseView.approximation();
+      matchCost = twoPhaseView.matchCost();
     } else {
       approximation = iterator;
+      matchCost = 0f;
     }
   }
 
@@ -67,8 +70,10 @@ public class DisiWrapper {
       
     if (twoPhaseView != null) {
       approximation = twoPhaseView.approximation();
+      matchCost = twoPhaseView.matchCost();
     } else {
       approximation = iterator;
+      matchCost = 0f;
     }
     this.lastApproxNonMatchDoc = -2;
     this.lastApproxMatchDoc = -2;

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java?rev=1719490&r1=1719489&r2=1719490&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
(original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
Fri Dec 11 18:43:38 2015
@@ -22,17 +22,18 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 
+import org.apache.lucene.util.PriorityQueue;
+
 /**
  * Base class for Scorers that score disjunctions.
  */
 abstract class DisjunctionScorer extends Scorer {
 
   private final boolean needsScores;
-  private final DisiPriorityQueue subScorers;
-  private final long cost;
 
-  /** Linked list of scorers which are on the current doc */
-  private DisiWrapper topScorers;
+  private final DisiPriorityQueue subScorers;
+  private final DisjunctionDISIApproximation approximation;
+  private final TwoPhase twoPhase;
 
   protected DisjunctionScorer(Weight weight, List<Scorer> subScorers, boolean needsScores)
{
     super(weight);
@@ -40,125 +41,125 @@ abstract class DisjunctionScorer extends
       throw new IllegalArgumentException("There must be at least 2 subScorers");
     }
     this.subScorers = new DisiPriorityQueue(subScorers.size());
-    long cost = 0;
     for (Scorer scorer : subScorers) {
       final DisiWrapper w = new DisiWrapper(scorer);
-      cost += w.cost;
       this.subScorers.add(w);
     }
-    this.cost = cost;
     this.needsScores = needsScores;
-  }
-
-  @Override
-  public DocIdSetIterator iterator() {
-    return new DocIdSetIterator() {
-
-      @Override
-      public int docID() {
-        return subScorers.top().doc;
-      }
-
-      @Override
-      public final int nextDoc() throws IOException {
-        topScorers = null;
-        DisiWrapper top = subScorers.top();
-        final int doc = top.doc;
-        do {
-          top.doc = top.iterator.nextDoc();
-          top = subScorers.updateTop();
-        } while (top.doc == doc);
-
-        return top.doc;
-      }
+    this.approximation = new DisjunctionDISIApproximation(this.subScorers);
 
-      @Override
-      public final int advance(int target) throws IOException {
-        topScorers = null;
-        DisiWrapper top = subScorers.top();
-        do {
-          top.doc = top.iterator.advance(target);
-          top = subScorers.updateTop();
-        } while (top.doc < target);
-
-        return top.doc;
+    boolean hasApproximation = false;
+    float sumMatchCost = 0;
+    long sumApproxCost = 0;
+    // Compute matchCost as the average over the matchCost of the subScorers.
+    // This is weighted by the cost, which is an expected number of matching documents.
+    for (DisiWrapper w : this.subScorers) {
+      long costWeight = (w.cost <= 1) ? 1 : w.cost;
+      sumApproxCost += costWeight;
+      if (w.twoPhaseView != null) {
+        hasApproximation = true;
+        sumMatchCost += w.matchCost * costWeight;
       }
+    }
 
-      @Override
-      public final long cost() {
-        return cost;
-      }
+    if (hasApproximation == false) { // no sub scorer supports approximations
+      twoPhase = null;
+    } else {
+      final float matchCost = sumMatchCost / sumApproxCost;
+      twoPhase = new TwoPhase(approximation, matchCost);
+    }
+  }
 
-    };
+  @Override
+  public DocIdSetIterator iterator() {
+    if (twoPhase != null) {
+      return TwoPhaseIterator.asDocIdSetIterator(twoPhase);
+    } else {
+      return approximation;
+    }
   }
 
   @Override
   public TwoPhaseIterator twoPhaseIterator() {
-    float sumMatchCost = 0;
-    long sumApproxCost = 0;
+    return twoPhase;
+  }
 
-    // Compute matchCost as the avarage over the matchCost of the subScorers.
-    // This is weighted by the cost, which is an expected number of matching documents.
-    for (DisiWrapper w : subScorers) {
-      if (w.twoPhaseView != null) {
-        long costWeight = (w.cost <= 1) ? 1 : w.cost;
-        sumMatchCost += w.twoPhaseView.matchCost() * costWeight;
-        sumApproxCost += costWeight;
-      }
-    }
+  private class TwoPhase extends TwoPhaseIterator {
 
-    if (sumApproxCost == 0) { // no sub scorer supports approximations
-      return null;
+    private final float matchCost;
+    // list of verified matches on the current doc
+    DisiWrapper verifiedMatches;
+    // priority queue of approximations on the current doc that have not been verified yet
+    final PriorityQueue<DisiWrapper> unverifiedMatches;
+
+    private TwoPhase(DocIdSetIterator approximation, float matchCost) {
+      super(approximation);
+      this.matchCost = matchCost;
+      unverifiedMatches = new PriorityQueue<DisiWrapper>(DisjunctionScorer.this.subScorers.size())
{
+        @Override
+        protected boolean lessThan(DisiWrapper a, DisiWrapper b) {
+          return a.matchCost < b.matchCost;
+        }
+      };
     }
 
-    final float matchCost = sumMatchCost / sumApproxCost;
-
-    // note it is important to share the same pq as this scorer so that
-    // rebalancing the pq through the approximation will also rebalance
-    // the pq in this scorer.
-    return new TwoPhaseIterator(new DisjunctionDISIApproximation(subScorers)) {
-
-      @Override
-      public boolean matches() throws IOException {
-        DisiWrapper topScorers = subScorers.topList();
-        // remove the head of the list as long as it does not match
-        while (topScorers.twoPhaseView != null && ! topScorers.twoPhaseView.matches())
{
-          topScorers = topScorers.next;
-          if (topScorers == null) {
-            return false;
-          }
+    DisiWrapper getSubMatches() throws IOException {
+      // iteration order does not matter
+      for (DisiWrapper w : unverifiedMatches) {
+        if (w.twoPhaseView.matches()) {
+          w.next = verifiedMatches;
+          verifiedMatches = w;
         }
-        // now we know we have at least one match since the first element of 'matchList'
matches
-        if (needsScores) {
-          // if scores or freqs are needed, we also need to remove scorers
-          // from the top list that do not actually match
-          DisiWrapper previous = topScorers;
-          for (DisiWrapper w = topScorers.next; w != null; w = w.next) {
-            if (w.twoPhaseView != null && ! w.twoPhaseView.matches()) {
-              // w does not match, remove it
-              previous.next = w.next;
-            } else {
-              previous = w;
-            }
+      }
+      unverifiedMatches.clear();
+      return verifiedMatches;
+    }
+    
+    @Override
+    public boolean matches() throws IOException {
+      verifiedMatches = null;
+      unverifiedMatches.clear();
+      
+      for (DisiWrapper w = subScorers.topList(); w != null; ) {
+        DisiWrapper next = w.next;
+        
+        if (w.twoPhaseView == null) {
+          // implicitly verified, move it to verifiedMatches
+          w.next = verifiedMatches;
+          verifiedMatches = w;
+          
+          if (needsScores == false) {
+            // we can stop here
+            return true;
           }
         } else {
-          // since we don't need scores, let's pretend we have a single match
-          topScorers.next = null;
+          unverifiedMatches.add(w);
         }
-
-        // We need to explicitely set the list of top scorers to avoid the
-        // laziness of DisjunctionScorer.score() that would take all scorers
-        // positioned on the same doc as the top of the pq, including
-        // non-matching scorers
-        DisjunctionScorer.this.topScorers = topScorers;
+        w = next;
+      }
+      
+      if (verifiedMatches != null) {
         return true;
       }
-
-      @Override
-      public float matchCost() {
-        return matchCost;
+      
+      // verify subs that have an two-phase iterator
+      // least-costly ones first
+      while (unverifiedMatches.size() > 0) {
+        DisiWrapper w = unverifiedMatches.pop();
+        if (w.twoPhaseView.matches()) {
+          w.next = null;
+          verifiedMatches = w;
+          return true;
+        }
       }
-    };
+      
+      return false;
+    }
+    
+    @Override
+    public float matchCost() {
+      return matchCost;
+    }
   }
 
   @Override
@@ -166,13 +167,19 @@ abstract class DisjunctionScorer extends
    return subScorers.top().doc;
   }
 
+  DisiWrapper getSubMatches() throws IOException {
+    if (twoPhase == null) {
+      return subScorers.topList();
+    } else {
+      return twoPhase.getSubMatches();
+    }
+  }
+
   @Override
   public final int freq() throws IOException {
-    if (topScorers == null) {
-      topScorers = subScorers.topList();
-    }
+    DisiWrapper subMatches = getSubMatches();
     int freq = 1;
-    for (DisiWrapper w = topScorers.next; w != null; w = w.next) {
+    for (DisiWrapper w = subMatches.next; w != null; w = w.next) {
       freq += 1;
     }
     return freq;
@@ -180,10 +187,7 @@ abstract class DisjunctionScorer extends
 
   @Override
   public final float score() throws IOException {
-    if (topScorers == null) {
-      topScorers = subScorers.topList();
-    }
-    return score(topScorers);
+    return score(getSubMatches());
   }
 
   /** Compute the score for the given linked list of scorers. */

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java?rev=1719490&r1=1719489&r2=1719490&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java
(original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java
Fri Dec 11 18:43:38 2015
@@ -1,5 +1,8 @@
 package org.apache.lucene.util;
 
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -27,10 +30,12 @@ package org.apache.lucene.util;
  * <code>maxSize+1</code> if instantiated via the
  * {@link #PriorityQueue(int,boolean)} constructor with <code>prepopulate</code>
  * set to <code>true</code>.
- * 
+ *
+ * <b>NOTE</b>: Iteration order is not specified.
+ *
  * @lucene.internal
  */
-public abstract class PriorityQueue<T> {
+public abstract class PriorityQueue<T> implements Iterable<T> {
   private int size = 0;
   private final int maxSize;
   private final T[] heap;
@@ -58,7 +63,7 @@ public abstract class PriorityQueue<T> {
     @SuppressWarnings("unchecked") final T[] h = (T[]) new Object[heapSize];
     this.heap = h;
     this.maxSize = maxSize;
-    
+
     if (prepopulate) {
       // If sentinel objects are supported, populate the queue with them
       T sentinel = getSentinelObject();
@@ -80,41 +85,41 @@ public abstract class PriorityQueue<T> {
 
   /**
    * This method can be overridden by extending classes to return a sentinel
-   * object which will be used by the {@link PriorityQueue#PriorityQueue(int,boolean)} 
+   * object which will be used by the {@link PriorityQueue#PriorityQueue(int,boolean)}
    * constructor to fill the queue, so that the code which uses that queue can always
    * assume it's full and only change the top without attempting to insert any new
    * object.<br>
-   * 
+   *
    * Those sentinel values should always compare worse than any non-sentinel
    * value (i.e., {@link #lessThan} should always favor the
    * non-sentinel values).<br>
-   * 
+   *
    * By default, this method returns null, which means the queue will not be
    * filled with sentinel values. Otherwise, the value returned will be used to
    * pre-populate the queue. Adds sentinel values to the queue.<br>
-   * 
+   *
    * If this method is extended to return a non-null value, then the following
    * usage pattern is recommended:
-   * 
+   *
    * <pre class="prettyprint">
    * // extends getSentinelObject() to return a non-null value.
    * PriorityQueue&lt;MyObject&gt; pq = new MyQueue&lt;MyObject&gt;(numHits);
    * // save the 'top' element, which is guaranteed to not be null.
    * MyObject pqTop = pq.top();
    * &lt;...&gt;
-   * // now in order to add a new element, which is 'better' than top (after 
+   * // now in order to add a new element, which is 'better' than top (after
    * // you've verified it is better), it is as simple as:
    * pqTop.change().
    * pqTop = pq.updateTop();
    * </pre>
-   * 
+   *
    * <b>NOTE:</b> if this method returns a non-null value, it will be called
by
-   * the {@link PriorityQueue#PriorityQueue(int,boolean)} constructor 
+   * the {@link PriorityQueue#PriorityQueue(int,boolean)} constructor
    * {@link #size()} times, relying on a new object to be returned and will not
    * check if it's null again. Therefore you should ensure any call to this
    * method creates a new instance and behaves consistently, e.g., it cannot
    * return null if it previously returned non-null.
-   * 
+   *
    * @return the sentinel object to use to pre-populate the queue, or null if
    *         sentinel objects are not supported.
    */
@@ -126,7 +131,7 @@ public abstract class PriorityQueue<T> {
    * Adds an Object to a PriorityQueue in log(size) time. If one tries to add
    * more objects than maxSize from initialize an
    * {@link ArrayIndexOutOfBoundsException} is thrown.
-   * 
+   *
    * @return the new 'top' element in the queue.
    */
   public final T add(T element) {
@@ -182,24 +187,24 @@ public abstract class PriorityQueue<T> {
       return null;
     }
   }
-  
+
   /**
    * Should be called when the Object at top changes values. Still log(n) worst
    * case, but it's at least twice as fast to
-   * 
+   *
    * <pre class="prettyprint">
    * pq.top().change();
    * pq.updateTop();
    * </pre>
-   * 
+   *
    * instead of
-   * 
+   *
    * <pre class="prettyprint">
    * o = pq.pop();
    * o.change();
    * pq.push(o);
    * </pre>
-   * 
+   *
    * @return the new 'top' element.
    */
   public final T updateTop() {
@@ -263,7 +268,7 @@ public abstract class PriorityQueue<T> {
     heap[i] = node;            // install saved node
     return i != origPos;
   }
-  
+
   private final void downHeap(int i) {
     T node = heap[i];          // save top node
     int j = i << 1;            // find smaller child
@@ -289,4 +294,26 @@ public abstract class PriorityQueue<T> {
   protected final Object[] getHeapArray() {
     return (Object[]) heap;
   }
+
+  @Override
+  public Iterator<T> iterator() {
+    return new Iterator<T>() {
+
+      int i = 1;
+
+      @Override
+      public boolean hasNext() {
+        return i <= size;
+      }
+
+      @Override
+      public T next() {
+        if (hasNext() == false) {
+          throw new NoSuchElementException();
+        }
+        return heap[i++];
+      }
+
+    };
+  }
 }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java?rev=1719490&r1=1719489&r2=1719490&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java
(original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java
Fri Dec 11 18:43:38 2015
@@ -18,6 +18,9 @@ package org.apache.lucene.util;
  */
 
 import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
 import java.util.Random;
 
 public class TestPriorityQueue extends LuceneTestCase {
@@ -188,4 +191,65 @@ public class TestPriorityQueue extends L
     }
   }
 
+  public void testIterator() {
+    IntegerQueue queue = new IntegerQueue(3);
+    
+    Iterator<Integer> it = queue.iterator();
+    assertFalse(it.hasNext());
+    try {
+      it.next();
+      fail();
+    } catch (NoSuchElementException e) {
+      // ok
+    }
+
+    queue.add(1);
+    it = queue.iterator();
+    assertTrue(it.hasNext());
+    assertEquals(Integer.valueOf(1), it.next());
+    assertFalse(it.hasNext());
+    try {
+      it.next();
+      fail();
+    } catch (NoSuchElementException e) {
+      // ok
+    }
+
+    queue.add(2);
+    it = queue.iterator();
+    assertTrue(it.hasNext());
+    assertEquals(Integer.valueOf(1), it.next());
+    assertTrue(it.hasNext());
+    assertEquals(Integer.valueOf(2), it.next());
+    assertFalse(it.hasNext());
+    try {
+      it.next();
+      fail();
+    } catch (NoSuchElementException e) {
+      // ok
+    }
+  }
+
+  public void testIteratorRandom() {
+    final int maxSize = TestUtil.nextInt(random(), 1, 20);
+    IntegerQueue queue = new IntegerQueue(maxSize);
+    final int iters = atLeast(100);
+    final List<Integer> expected = new ArrayList<>();
+    for (int iter = 0; iter < iters; ++iter) {
+      if (queue.size() == 0 || (queue.size() < maxSize && random().nextBoolean()))
{
+        final Integer value = new Integer(random().nextInt(10));
+        queue.add(value);
+        expected.add(value);
+      } else {
+        expected.remove(queue.pop());
+      }
+      List<Integer> actual = new ArrayList<>();
+      for (Integer value : queue) {
+        actual.add(value);
+      }
+      CollectionUtil.introSort(expected);
+      CollectionUtil.introSort(actual);
+      assertEquals(expected, actual);
+    }
+  }
 }



Mime
View raw message