lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jpou...@apache.org
Subject svn commit: r1678164 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/index/ core/src/java/org/apache/lucene/store/ queries/src/java/org/apache/lucene/queries/ queries/src/test/org/apache/lucene/queries/
Date Thu, 07 May 2015 10:51:52 GMT
Author: jpountz
Date: Thu May  7 10:51:52 2015
New Revision: 1678164

URL: http://svn.apache.org/r1678164
Log:
LUCENE-6350: TermsQuery is now compressed with PrefixCodedTerms.

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/RAMFile.java
    lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java
    lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1678164&r1=1678163&r2=1678164&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu May  7 10:51:52 2015
@@ -123,6 +123,9 @@ Optimizations
 * LUCENE-6330: BooleanScorer (used for top-level disjunctions) does not decode
   norms when not necessary anymore. (Adrien Grand)
 
+* LUCENE-6350: TermsQuery is now compressed with PrefixCodedTerms.
+  (Robert Muir, Mike McCandless, Adrien Grand)
+
 Bug Fixes
 
 * LUCENE-6378: Fix all RuntimeExceptions to throw the underlying root cause.

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java?rev=1678164&r1=1678163&r2=1678164&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java Thu
May  7 10:51:52 2015
@@ -19,8 +19,6 @@ package org.apache.lucene.index;
 
 import org.apache.lucene.util.BytesRefIterator;
 
-// TODO: maybe TermsFilter could use this?
-
 /** Iterates over terms in across multiple fields.  The caller must
  *  check {@link #field} after each {@link #next} to see if the field
  *  changed, but {@code ==} can be used since the iterator

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java?rev=1678164&r1=1678163&r2=1678164&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java Thu
May  7 10:51:52 2015
@@ -18,6 +18,7 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
+import java.util.Objects;
 
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.RAMFile;
@@ -29,14 +30,14 @@ import org.apache.lucene.util.BytesRefBu
 
 /**
  * Prefix codes term instances (prefixes are shared)
- * @lucene.experimental
+ * @lucene.internal
  */
-class PrefixCodedTerms implements Accountable {
+public class PrefixCodedTerms implements Accountable {
   final RAMFile buffer;
   private long delGen;
 
   private PrefixCodedTerms(RAMFile buffer) {
-    this.buffer = buffer;
+    this.buffer = Objects.requireNonNull(buffer);
   }
 
   @Override
@@ -56,6 +57,9 @@ class PrefixCodedTerms implements Accoun
     private Term lastTerm = new Term("");
     private BytesRefBuilder lastTermBytes = new BytesRefBuilder();
 
+    /** Sole constructor. */
+    public Builder() {}
+
     /** add a term */
     public void add(Term term) {
       assert lastTerm.equals(new Term("")) || term.compareTo(lastTerm) > 0;
@@ -104,6 +108,7 @@ class PrefixCodedTerms implements Accoun
     }
   }
 
+  /** An iterator over the list of terms stored in a {@link PrefixCodedTerms}. */
   public static class TermIterator extends FieldTermIterator {
     final IndexInput input;
     final BytesRefBuilder builder = new BytesRefBuilder();
@@ -112,7 +117,7 @@ class PrefixCodedTerms implements Accoun
     final long delGen;
     String field = "";
 
-    public TermIterator(long delGen, RAMFile buffer) {
+    private TermIterator(long delGen, RAMFile buffer) {
       try {
         input = new RAMInputStream("MergedPrefixCodedTermsIterator", buffer);
       } catch (IOException e) {
@@ -162,7 +167,24 @@ class PrefixCodedTerms implements Accoun
     }
   }
 
+  /** Return an iterator over the terms stored in this {@link PrefixCodedTerms}. */
   public TermIterator iterator() {
     return new TermIterator(delGen, buffer);
   }
+
+  @Override
+  public int hashCode() {
+    int h = buffer.hashCode();
+    h = 31 * h + (int) (delGen ^ (delGen >>> 32));
+    return h;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) return true;
+    if (obj == null) return false;
+    if (getClass() != obj.getClass()) return false;
+    PrefixCodedTerms other = (PrefixCodedTerms) obj;
+    return buffer.equals(other.buffer) && delGen == other.delGen;
+  }
 }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/RAMFile.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/RAMFile.java?rev=1678164&r1=1678163&r2=1678164&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/RAMFile.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/store/RAMFile.java Thu May  7
10:51:52 2015
@@ -18,6 +18,7 @@ package org.apache.lucene.store;
  */
 
 import java.util.ArrayList;
+import java.util.Arrays;
 
 import org.apache.lucene.util.Accountable;
 
@@ -25,7 +26,7 @@ import org.apache.lucene.util.Accountabl
  * Represents a file in RAM as a list of byte[] buffers.
  * @lucene.internal */
 public class RAMFile implements Accountable {
-  protected ArrayList<byte[]> buffers = new ArrayList<>();
+  protected final ArrayList<byte[]> buffers = new ArrayList<>();
   long length;
   RAMDirectory directory;
   protected long sizeInBytes;
@@ -86,4 +87,31 @@ public class RAMFile implements Accounta
   public String toString() {
     return getClass().getSimpleName() + "(length=" + length + ")";
   }
+
+  @Override
+  public int hashCode() {
+    int h = (int) (length ^ (length >>> 32));
+    for (byte[] block : buffers) {
+      h = 31 * h + Arrays.hashCode(block);
+    }
+    return h;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) return true;
+    if (obj == null) return false;
+    if (getClass() != obj.getClass()) return false;
+    RAMFile other = (RAMFile) obj;
+    if (length != other.length) return false;
+    if (buffers.size() != other.buffers.size()) {
+      return false;
+    }
+    for (int i = 0; i < buffers.size(); i++) {
+      if (!Arrays.equals(buffers.get(i), other.buffers.get(i))) {
+        return false;
+      }
+    }
+    return true;
+  }
 }

Modified: lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java?rev=1678164&r1=1678163&r2=1678164&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java (original)
+++ lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java Thu
May  7 10:51:52 2015
@@ -18,11 +18,9 @@ package org.apache.lucene.queries;
  */
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 
@@ -30,13 +28,15 @@ import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.PrefixCodedTerms;
+import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.ConstantScoreWeight;
 import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
@@ -73,38 +73,35 @@ public class TermsQuery extends Query im
 
   private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(TermsQuery.class);
 
-  /*
-   * this class is often used for large number of terms in a single field.
-   * to optimize for this case and to be filter-cache friendly we
-   * serialize all terms into a single byte array and store offsets
-   * in a parallel array to keep the # of object constant and speed up
-   * equals / hashcode.
-   *
-   * This adds quite a bit of complexity but allows large term queries to
-   * be efficient for GC and cache-lookups
-   */
-  private final int[] offsets;
-  private final byte[] termsBytes;
-  private final TermsAndField[] termsAndFields;
-  private final int hashCode; // cached hashcode for fast cache lookups, not including the
boost
+  private final PrefixCodedTerms termData;
+  private final int termDataHashCode; // cached hashcode of termData
+
+  private static Term[] toTermArray(String field, List<BytesRef> termBytes) {
+    Term[] array = new Term[termBytes.size()];
+    int i = 0;
+    for (BytesRef t : termBytes) {
+      array[i++] = new Term(field, t);
+    }
+    return array;
+  }
 
   /**
    * Creates a new {@link TermsQuery} from the given list. The list
    * can contain duplicate terms and multiple fields.
    */
   public TermsQuery(final List<Term> terms) {
-    this(new FieldAndTermEnum() {
-      // we need to sort for deduplication and to have a common cache key
-      final Iterator<Term> iter = sort(terms).iterator();
-      @Override
-      public BytesRef next() {
-        if (iter.hasNext()) {
-          Term next = iter.next();
-          field = next.field();
-          return next.bytes();
-        }
-        return null;
-      }}, terms.size());
+    Term[] sortedTerms = terms.toArray(new Term[terms.size()]);
+    ArrayUtil.timSort(sortedTerms);
+    PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
+    Term previous = null;
+    for (Term term : sortedTerms) {
+      if (term.equals(previous) == false) {
+        builder.add(term);
+      }
+      previous = term;
+    }
+    termData = builder.finish();
+    termDataHashCode = termData.hashCode();
   }
 
   /**
@@ -112,17 +109,7 @@ public class TermsQuery extends Query im
    * a single field.
    */
   public TermsQuery(final String field, final List<BytesRef> terms) {
-    this(new FieldAndTermEnum(field) {
-      // we need to sort for deduplication and to have a common cache key
-      final Iterator<BytesRef> iter = sort(terms).iterator();
-      @Override
-      public BytesRef next() {
-        if (iter.hasNext()) {
-          return iter.next();
-        }
-        return null;
-      }
-    }, terms.size());
+    this(toTermArray(field, terms));
   }
 
   /**
@@ -142,106 +129,37 @@ public class TermsQuery extends Query im
     this(Arrays.asList(terms));
   }
 
-  private TermsQuery(FieldAndTermEnum iter, int length) {
-    // TODO: maybe use oal.index.PrefixCodedTerms instead?
-    // If number of terms is more than a few hundred it
-    // should be a win
-
-    // TODO: we also pack terms in FieldCache/DocValues
-    // ... maybe we can refactor to share that code
-
-    // TODO: yet another option is to build the union of the terms in
-    // an automaton an call intersect on the termsenum if the density is high
-
-    int hash = 9;
-    byte[] serializedTerms = new byte[0];
-    this.offsets = new int[length+1];
-    int lastEndOffset = 0;
-    int index = 0;
-    ArrayList<TermsAndField> termsAndFields = new ArrayList<>();
-    TermsAndField lastTermsAndField = null;
-    BytesRef previousTerm = null;
-    String previousField = null;
-    BytesRef currentTerm;
-    String currentField;
-    while((currentTerm = iter.next()) != null) {
-      currentField = iter.field();
-      if (currentField == null) {
-        throw new IllegalArgumentException("Field must not be null");
-      }
-      if (previousField != null) {
-        // deduplicate
-        if (previousField.equals(currentField)) {
-          if (previousTerm.bytesEquals(currentTerm)){
-            continue;
-          }
-        } else {
-          final int start = lastTermsAndField == null ? 0 : lastTermsAndField.end;
-          lastTermsAndField = new TermsAndField(start, index, previousField);
-          termsAndFields.add(lastTermsAndField);
-        }
-      }
-      hash = 31 *  hash + currentField.hashCode();
-      hash = 31 *  hash + currentTerm.hashCode();
-      if (serializedTerms.length < lastEndOffset+currentTerm.length) {
-        serializedTerms = ArrayUtil.grow(serializedTerms, lastEndOffset+currentTerm.length);
-      }
-      System.arraycopy(currentTerm.bytes, currentTerm.offset, serializedTerms, lastEndOffset,
currentTerm.length);
-      offsets[index] = lastEndOffset;
-      lastEndOffset += currentTerm.length;
-      index++;
-      previousTerm = currentTerm;
-      previousField = currentField;
-    }
-    offsets[index] = lastEndOffset;
-    final int start = lastTermsAndField == null ? 0 : lastTermsAndField.end;
-    lastTermsAndField = new TermsAndField(start, index, previousField);
-    termsAndFields.add(lastTermsAndField);
-    this.termsBytes = ArrayUtil.shrink(serializedTerms, lastEndOffset);
-    this.termsAndFields = termsAndFields.toArray(new TermsAndField[termsAndFields.size()]);
-    this.hashCode = hash;
-  }
-
   @Override
   public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
     if (!super.equals(obj)) {
       return false;
     }
     TermsQuery that = (TermsQuery) obj;
-    // first check the fields before even comparing the bytes
-    if (that.hashCode == hashCode && getBoost() == that.getBoost() && Arrays.equals(termsAndFields,
that.termsAndFields)) {
-      int lastOffset = termsAndFields[termsAndFields.length - 1].end;
-      // compare offsets since we sort they must be identical
-      if (ArrayUtil.equals(offsets, 0, that.offsets, 0, lastOffset + 1)) {
-        // straight byte comparison since we sort they must be identical
-        return  ArrayUtil.equals(termsBytes, 0, that.termsBytes, 0, offsets[lastOffset]);
-      }
-    }
-    return false;
+    // termData might be heavy to compare so check the hash code first
+    return termDataHashCode == that.termDataHashCode
+        && termData.equals(that.termData);
   }
 
   @Override
   public int hashCode() {
-      return super.hashCode() ^ this.hashCode;
+    return 31 * super.hashCode() + termDataHashCode;
   }
 
   @Override
   public String toString(String defaultField) {
     StringBuilder builder = new StringBuilder();
-    BytesRef spare = new BytesRef(termsBytes);
     boolean first = true;
-    for (int i = 0; i < termsAndFields.length; i++) {
-      TermsAndField current = termsAndFields[i];
-      for (int j = current.start; j < current.end; j++) {
-        spare.offset = offsets[j];
-        spare.length = offsets[j+1] - offsets[j];
-        if (!first) {
-          builder.append(' ');
-        }
-        first = false;
-        builder.append(current.field).append(':');
-        builder.append(spare.utf8ToString());
-      }
+    TermIterator iterator = termData.iterator();
+    for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
+      if (!first) {
+        builder.append(' ');
+      }
+      first = false;
+      builder.append(iterator.field()).append(':');
+      builder.append(term.utf8ToString());
     }
     builder.append(ToStringUtils.boost(getBoost()));
 
@@ -250,10 +168,7 @@ public class TermsQuery extends Query im
 
   @Override
   public long ramBytesUsed() {
-    return BASE_RAM_BYTES_USED
-        + RamUsageEstimator.sizeOf(termsAndFields)
-        + RamUsageEstimator.sizeOf(termsBytes)
-        + RamUsageEstimator.sizeOf(offsets);
+    return BASE_RAM_BYTES_USED + termData.ramBytesUsed();
   }
 
   @Override
@@ -261,95 +176,9 @@ public class TermsQuery extends Query im
     return Collections.emptyList();
   }
 
-  private static final class TermsAndField implements Accountable {
-
-    private static final long BASE_RAM_BYTES_USED =
-        RamUsageEstimator.shallowSizeOfInstance(TermsAndField.class)
-        + RamUsageEstimator.shallowSizeOfInstance(String.class)
-        + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; // header of the array held by the String
-
-    final int start;
-    final int end;
-    final String field;
-
-
-    TermsAndField(int start, int end, String field) {
-      super();
-      this.start = start;
-      this.end = end;
-      this.field = field;
-    }
-
-    @Override
-    public long ramBytesUsed() {
-      // this is an approximation since we don't actually know how strings store
-      // their data, which can be JVM-dependent
-      return BASE_RAM_BYTES_USED + field.length() * RamUsageEstimator.NUM_BYTES_CHAR;
-    }
-
-    @Override
-    public Collection<Accountable> getChildResources() {
-      return Collections.emptyList();
-    }
-
-    @Override
-    public int hashCode() {
-      final int prime = 31;
-      int result = 1;
-      result = prime * result + ((field == null) ? 0 : field.hashCode());
-      result = prime * result + end;
-      result = prime * result + start;
-      return result;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      if (this == obj) return true;
-      if (obj == null) return false;
-      if (getClass() != obj.getClass()) return false;
-      TermsAndField other = (TermsAndField) obj;
-      if (field == null) {
-        if (other.field != null) return false;
-      } else if (!field.equals(other.field)) return false;
-      if (end != other.end) return false;
-      if (start != other.start) return false;
-      return true;
-    }
-
-  }
-
-  private static abstract class FieldAndTermEnum {
-    protected String field;
-
-    public abstract BytesRef next();
-
-    public FieldAndTermEnum() {}
-
-    public FieldAndTermEnum(String field) { this.field = field; }
-
-    public String field() {
-      return field;
-    }
-  }
-
-  /*
-   * simple utility that returns the in-place sorted list
-   */
-  private static <T extends Comparable<? super T>> List<T> sort(List<T>
toSort) {
-    if (toSort.isEmpty()) {
-      throw new IllegalArgumentException("no terms provided");
-    }
-    Collections.sort(toSort);
-    return toSort;
-  }
-
   @Override
-  public Weight createWeight(IndexSearcher searcher, boolean needsScores)
-      throws IOException {
-    return new Weight(this) {
-
-      private float queryNorm;
-      private float queryWeight;
+  public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException
{
+    return new ConstantScoreWeight(this) {
 
       @Override
       public void extractTerms(Set<Term> terms) {
@@ -360,51 +189,30 @@ public class TermsQuery extends Query im
       }
 
       @Override
-      public float getValueForNormalization() throws IOException {
-        queryWeight = getBoost();
-        return queryWeight * queryWeight;
-      }
-
-      @Override
-      public void normalize(float norm, float topLevelBoost) {
-        queryNorm = norm * topLevelBoost;
-        queryWeight *= queryNorm;
-      }
-
-      @Override
-      public Explanation explain(LeafReaderContext context, int doc) throws IOException {
-        final Scorer s = scorer(context, context.reader().getLiveDocs());
-        final boolean exists = (s != null && s.advance(doc) == doc);
-
-        if (exists) {
-          return Explanation.match(queryWeight, TermsQuery.this.toString() + ", product of:",
-              Explanation.match(getBoost(), "boost"), Explanation.match(queryNorm, "queryNorm"));
-        } else {
-          return Explanation.noMatch(TermsQuery.this.toString() + " doesn't match id " +
doc);
-        }
-      }
-
-      @Override
-      public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException
{
+      public Scorer scorer(LeafReaderContext context, Bits acceptDocs, float score) throws
IOException {
         final LeafReader reader = context.reader();
         BitDocIdSet.Builder builder = new BitDocIdSet.Builder(reader.maxDoc());
         final Fields fields = reader.fields();
-        final BytesRef spare = new BytesRef(termsBytes);
+        String lastField = null;
         Terms terms = null;
         TermsEnum termsEnum = null;
         PostingsEnum docs = null;
-        for (TermsAndField termsAndField : termsAndFields) {
-          if ((terms = fields.terms(termsAndField.field)) != null) {
-            termsEnum = terms.iterator(); // this won't return null
-            for (int i = termsAndField.start; i < termsAndField.end; i++) {
-              spare.offset = offsets[i];
-              spare.length = offsets[i+1] - offsets[i];
-              if (termsEnum.seekExact(spare)) {
-                docs = termsEnum.postings(acceptDocs, docs, PostingsEnum.NONE); // no freq
since we don't need them
-                builder.or(docs);
-              }
+        TermIterator iterator = termData.iterator();
+        for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
+          String field = iterator.field();
+          // comparing references is fine here
+          if (field != lastField) {
+            terms = fields.terms(field);
+            if (terms == null) {
+              termsEnum = null;
+            } else {
+              termsEnum = terms.iterator();
             }
           }
+          if (termsEnum != null && termsEnum.seekExact(term)) {
+            docs = termsEnum.postings(acceptDocs, docs, PostingsEnum.NONE);
+            builder.or(docs);
+          }
         }
         BitDocIdSet result = builder.build();
         if (result == null) {
@@ -412,11 +220,15 @@ public class TermsQuery extends Query im
         }
 
         final DocIdSetIterator disi = result.iterator();
+        if (disi == null) {
+          return null;
+        }
+
         return new Scorer(this) {
 
           @Override
           public float score() throws IOException {
-            return queryWeight;
+            return score;
           }
 
           @Override

Modified: lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java?rev=1678164&r1=1678163&r2=1678164&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java
(original)
+++ lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java
Thu May  7 10:51:52 2015
@@ -37,6 +37,7 @@ import org.apache.lucene.search.BooleanQ
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryUtils;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
@@ -185,30 +186,6 @@ public class TermsQueryTest extends Luce
     assertFalse(left.equals(right));
   }
 
-  public void testNoTerms() {
-    List<Term> emptyTerms = Collections.emptyList();
-    List<BytesRef> emptyBytesRef = Collections.emptyList();
-    try {
-      new TermsQuery(emptyTerms);
-      fail("must fail - no terms!");
-    } catch (IllegalArgumentException e) {}
-
-    try {
-      new TermsQuery(emptyTerms.toArray(new Term[0]));
-      fail("must fail - no terms!");
-    } catch (IllegalArgumentException e) {}
-
-    try {
-      new TermsQuery(null, emptyBytesRef.toArray(new BytesRef[0]));
-      fail("must fail - no terms!");
-    } catch (IllegalArgumentException e) {}
-
-    try {
-      new TermsQuery(null, emptyBytesRef);
-      fail("must fail - no terms!");
-    } catch (IllegalArgumentException e) {}
-  }
-
   public void testToString() {
     TermsQuery termsQuery = new TermsQuery(new Term("field1", "a"),
                                               new Term("field1", "b"),
@@ -216,6 +193,24 @@ public class TermsQueryTest extends Luce
     assertEquals("field1:a field1:b field1:c", termsQuery.toString());
   }
 
+  public void testDedup() {
+    Query query1 = new TermsQuery(new Term("foo", "bar"));
+    Query query2 = new TermsQuery(new Term("foo", "bar"), new Term("foo", "bar"));
+    QueryUtils.checkEqual(query1, query2);
+  }
+
+  public void testOrderDoesNotMatter() {
+    // order of terms if different
+    Query query1 = new TermsQuery(new Term("foo", "bar"), new Term("foo", "baz"));
+    Query query2 = new TermsQuery(new Term("foo", "baz"), new Term("foo", "bar"));
+    QueryUtils.checkEqual(query1, query2);
+
+    // order of fields is different
+    query1 = new TermsQuery(new Term("foo", "bar"), new Term("bar", "bar"));
+    query2 = new TermsQuery(new Term("bar", "bar"), new Term("foo", "bar"));
+    QueryUtils.checkEqual(query1, query2);
+  }
+
   public void testRamBytesUsed() {
     List<Term> terms = new ArrayList<>();
     final int numTerms = 1000 + random().nextInt(1000);
@@ -225,8 +220,7 @@ public class TermsQueryTest extends Luce
     TermsQuery query = new TermsQuery(terms);
     final long actualRamBytesUsed = RamUsageTester.sizeOf(query);
     final long expectedRamBytesUsed = query.ramBytesUsed();
-    // error margin within 1%
-    assertEquals(actualRamBytesUsed, expectedRamBytesUsed, actualRamBytesUsed / 100);
+    // error margin within 5%
+    assertEquals(actualRamBytesUsed, expectedRamBytesUsed, actualRamBytesUsed / 20);
   }
-
 }



Mime
View raw message