lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jpou...@apache.org
Subject [1/3] lucene-solr:branch_6x: LUCENE-7637: Require that all terms of a TermsQuery come from the same field.
Date Tue, 17 Jan 2017 13:32:37 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 9224065cb -> 5c2f5bd02


LUCENE-7637: Require that all terms of a TermsQuery come from the same field.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5c2f5bd0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5c2f5bd0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5c2f5bd0

Branch: refs/heads/branch_6x
Commit: 5c2f5bd028d370f43fbeee07fc6d197449609af3
Parents: aba6539
Author: Adrien Grand <jpountz@gmail.com>
Authored: Tue Jan 17 14:03:46 2017 +0100
Committer: Adrien Grand <jpountz@gmail.com>
Committed: Tue Jan 17 14:28:12 2017 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   8 +
 .../apache/lucene/search/TermInSetQuery.java    |  93 ++----
 .../lucene/search/TermInSetQueryTest.java       | 123 +++-----
 .../apache/lucene/facet/MultiFacetQuery.java    |  13 +-
 .../org/apache/lucene/queries/TermsQuery.java   |  71 ++++-
 .../apache/lucene/queries/TermsQueryTest.java   | 310 +++++++++++++++++++
 6 files changed, 458 insertions(+), 160 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5c2f5bd0/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 392c3f0..9743459 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -5,6 +5,14 @@ http://s.apache.org/luceneversions
 
 ======================= Lucene 6.5.0 =======================
 
+API Changes
+
+* LUCENE-7624: TermsQuery has been renamed as TermInSetQuery and moved to core.
+  (Alan Woodward)
+
+* LUCENE-7637: TermInSetQuery requires that all terms come from the same field.
+  (Adrien Grand)
+
 New Features
 
 * LUCENE-7623: Add FunctionScoreQuery and FunctionMatchQuery (Alan Woodward,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5c2f5bd0/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
index 978bd2d..100513b 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
@@ -21,7 +21,6 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Objects;
 import java.util.Set;
@@ -73,39 +72,12 @@ public class TermInSetQuery extends Query implements Accountable {
   // Same threshold as MultiTermQueryConstantScoreWrapper
   static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
 
-  private final boolean singleField; // whether all terms are from the same field
+  private final String field;
   private final PrefixCodedTerms termData;
   private final int termDataHashCode; // cached hashcode of termData
 
   /**
-   * Creates a new {@link TermInSetQuery} from the given collection. It
-   * can contain duplicate terms and multiple fields.
-   */
-  public TermInSetQuery(Collection<Term> terms) {
-    Term[] sortedTerms = terms.toArray(new Term[terms.size()]);
-    // already sorted if we are a SortedSet with natural order
-    boolean sorted = terms instanceof SortedSet && ((SortedSet<Term>)terms).comparator()
== null;
-    if (!sorted) {
-      ArrayUtil.timSort(sortedTerms);
-    }
-    PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
-    Set<String> fields = new HashSet<>();
-    Term previous = null;
-    for (Term term : sortedTerms) {
-      if (term.equals(previous) == false) {
-        fields.add(term.field());
-        builder.add(term);
-      }
-      previous = term;
-    }
-    singleField = fields.size() == 1;
-    termData = builder.finish();
-    termDataHashCode = termData.hashCode();
-  }
-
-  /**
-   * Creates a new {@link TermInSetQuery} from the given collection for
-   * a single field. It can contain duplicate terms.
+   * Creates a new {@link TermInSetQuery} from the given collection of terms.
    */
   public TermInSetQuery(String field, Collection<BytesRef> terms) {
     BytesRef[] sortedTerms = terms.toArray(new BytesRef[terms.size()]);
@@ -125,27 +97,18 @@ public class TermInSetQuery extends Query implements Accountable {
       builder.add(field, term);
       previous.copyBytes(term);
     }
-    singleField = true;
+    this.field = field;
     termData = builder.finish();
     termDataHashCode = termData.hashCode();
   }
 
   /**
-   * Creates a new {@link TermInSetQuery} from the given {@link BytesRef} array for
-   * a single field.
+   * Creates a new {@link TermInSetQuery} from the given array of terms.
    */
   public TermInSetQuery(String field, BytesRef...terms) {
     this(field, Arrays.asList(terms));
   }
 
-  /**
-   * Creates a new {@link TermInSetQuery} from the given array. The array can
-   * contain duplicate terms and multiple fields.
-   */
-  public TermInSetQuery(final Term... terms) {
-    this(Arrays.asList(terms));
-  }
-
   @Override
   public Query rewrite(IndexReader reader) throws IOException {
     final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
@@ -167,6 +130,7 @@ public class TermInSetQuery extends Query implements Accountable {
   }
 
   private boolean equalsTo(TermInSetQuery other) {
+    // no need to check 'field' explicitly since it is encoded in 'termData'
     // termData might be heavy to compare so check the hash code first
     return termDataHashCode == other.termDataHashCode &&
         termData.equals(other.termData);
@@ -260,6 +224,15 @@ public class TermInSetQuery extends Query implements Accountable {
       private WeightOrDocIdSet rewrite(LeafReaderContext context) throws IOException {
         final LeafReader reader = context.reader();
 
+        final Fields fields = reader.fields();
+        Terms terms = fields.terms(field);
+        if (terms == null) {
+          return null;
+        }
+        TermsEnum termsEnum = terms.iterator();
+        PostingsEnum docs = null;
+        TermIterator iterator = termData.iterator();
+
         // We will first try to collect up to 'threshold' terms into 'matchingTerms'
         // if there are two many terms, we will fall back to building the 'builder'
         final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
@@ -267,25 +240,9 @@ public class TermInSetQuery extends Query implements Accountable {
         List<TermAndState> matchingTerms = new ArrayList<>(threshold);
         DocIdSetBuilder builder = null;
 
-        final Fields fields = reader.fields();
-        String lastField = null;
-        Terms terms = null;
-        TermsEnum termsEnum = null;
-        PostingsEnum docs = null;
-        TermIterator iterator = termData.iterator();
         for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
-          String field = iterator.field();
-          // comparing references is fine here
-          if (field != lastField) {
-            terms = fields.terms(field);
-            if (terms == null) {
-              termsEnum = null;
-            } else {
-              termsEnum = terms.iterator();
-            }
-            lastField = field;
-          }
-          if (termsEnum != null && termsEnum.seekExact(term)) {
+          assert field.equals(iterator.field());
+          if (termsEnum.seekExact(term)) {
             if (matchingTerms == null) {
               docs = termsEnum.postings(docs, PostingsEnum.NONE);
               builder.add(docs);
@@ -293,15 +250,7 @@ public class TermInSetQuery extends Query implements Accountable {
               matchingTerms.add(new TermAndState(field, termsEnum));
             } else {
               assert matchingTerms.size() == threshold;
-              if (singleField) {
-                // common case: all terms are in the same field
-                // use an optimized builder that leverages terms stats to be more efficient
-                builder = new DocIdSetBuilder(reader.maxDoc(), terms);
-              } else {
-                // corner case: different fields
-                // don't make assumptions about the docs we will get
-                builder = new DocIdSetBuilder(reader.maxDoc());
-              }
+              builder = new DocIdSetBuilder(reader.maxDoc(), terms);
               docs = termsEnum.postings(docs, PostingsEnum.NONE);
               builder.add(docs);
               for (TermAndState t : matchingTerms) {
@@ -345,7 +294,9 @@ public class TermInSetQuery extends Query implements Accountable {
       @Override
       public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
         final WeightOrDocIdSet weightOrBitSet = rewrite(context);
-        if (weightOrBitSet.weight != null) {
+        if (weightOrBitSet == null) {
+          return null;
+        } else if (weightOrBitSet.weight != null) {
           return weightOrBitSet.weight.bulkScorer(context);
         } else {
           final Scorer scorer = scorer(weightOrBitSet.set);
@@ -359,7 +310,9 @@ public class TermInSetQuery extends Query implements Accountable {
       @Override
       public Scorer scorer(LeafReaderContext context) throws IOException {
         final WeightOrDocIdSet weightOrBitSet = rewrite(context);
-        if (weightOrBitSet.weight != null) {
+        if (weightOrBitSet == null) {
+          return null;
+        } else if (weightOrBitSet.weight != null) {
           return weightOrBitSet.weight.scorer(context);
         } else {
           return scorer(weightOrBitSet.set);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5c2f5bd0/lucene/core/src/test/org/apache/lucene/search/TermInSetQueryTest.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TermInSetQueryTest.java b/lucene/core/src/test/org/apache/lucene/search/TermInSetQueryTest.java
index e694d97..3878d59 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TermInSetQueryTest.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TermInSetQueryTest.java
@@ -18,15 +18,12 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 
-import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 import com.carrotsearch.randomizedtesting.generators.RandomStrings;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field.Store;
@@ -53,25 +50,25 @@ public class TermInSetQueryTest extends LuceneTestCase {
 
   public void testDuel() throws IOException {
     final int iters = atLeast(2);
+    final String field = "f";
     for (int iter = 0; iter < iters; ++iter) {
-      final List<Term> allTerms = new ArrayList<>();
+      final List<BytesRef> allTerms = new ArrayList<>();
       final int numTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(),
1, 10));
       for (int i = 0; i < numTerms; ++i) {
-        final String field = usually() ? "f" : "g";
         final String value = TestUtil.randomAnalysisString(random(), 10, true);
-        allTerms.add(new Term(field, value));
+        allTerms.add(new BytesRef(value));
       }
       Directory dir = newDirectory();
       RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
       final int numDocs = atLeast(100);
       for (int i = 0; i < numDocs; ++i) {
         Document doc = new Document();
-        final Term term = allTerms.get(random().nextInt(allTerms.size()));
-        doc.add(new StringField(term.field(), term.text(), Store.NO));
+        final BytesRef term = allTerms.get(random().nextInt(allTerms.size()));
+        doc.add(new StringField(field, term, Store.NO));
         iw.addDocument(doc);
       }
       if (numTerms > 1 && random().nextBoolean()) {
-        iw.deleteDocuments(new TermQuery(allTerms.get(0)));
+        iw.deleteDocuments(new TermQuery(new Term(field, allTerms.get(0))));
       }
       iw.commit();
       final IndexReader reader = iw.getReader();
@@ -87,16 +84,16 @@ public class TermInSetQueryTest extends LuceneTestCase {
       for (int i = 0; i < 100; ++i) {
         final float boost = random().nextFloat() * 10;
         final int numQueryTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(),
1, 8));
-        List<Term> queryTerms = new ArrayList<>();
+        List<BytesRef> queryTerms = new ArrayList<>();
         for (int j = 0; j < numQueryTerms; ++j) {
           queryTerms.add(allTerms.get(random().nextInt(allTerms.size())));
         }
         final BooleanQuery.Builder bq = new BooleanQuery.Builder();
-        for (Term t : queryTerms) {
-          bq.add(new TermQuery(t), Occur.SHOULD);
+        for (BytesRef t : queryTerms) {
+          bq.add(new TermQuery(new Term(field, t)), Occur.SHOULD);
         }
         final Query q1 = new ConstantScoreQuery(bq.build());
-        final Query q2 = new TermInSetQuery(queryTerms);
+        final Query q2 = new TermInSetQuery(field, queryTerms);
         assertSameMatches(searcher, new BoostQuery(q1, boost), new BoostQuery(q2, boost),
true);
       }
 
@@ -118,103 +115,72 @@ public class TermInSetQueryTest extends LuceneTestCase {
     }
   }
 
-  private TermInSetQuery termsQuery(boolean singleField, Term...terms) {
-    return termsQuery(singleField, Arrays.asList(terms));
-  }
-
-  private TermInSetQuery termsQuery(boolean singleField, Collection<Term> termList)
{
-    if (!singleField) {
-      return new TermInSetQuery(new ArrayList<>(termList));
-    }
-    final TermInSetQuery filter;
-    List<BytesRef> bytes = new ArrayList<>();
-    String field = null;
-    for (Term term : termList) {
-        bytes.add(term.bytes());
-        if (field != null) {
-          assertEquals(term.field(), field);
-        }
-        field = term.field();
-    }
-    assertNotNull(field);
-    filter = new TermInSetQuery(field, bytes);
-    return filter;
-  }
-
   public void testHashCodeAndEquals() {
     int num = atLeast(100);
-    final boolean singleField = random().nextBoolean();
-    List<Term> terms = new ArrayList<>();
-    Set<Term> uniqueTerms = new HashSet<>();
+    List<BytesRef> terms = new ArrayList<>();
+    Set<BytesRef> uniqueTerms = new HashSet<>();
     for (int i = 0; i < num; i++) {
-      String field = "field" + (singleField ? "1" : random().nextInt(100));
       String string = TestUtil.randomRealisticUnicodeString(random());
-      terms.add(new Term(field, string));
-      uniqueTerms.add(new Term(field, string));
-      TermInSetQuery left = termsQuery(singleField ? random().nextBoolean() : false, uniqueTerms);
+      terms.add(new BytesRef(string));
+      uniqueTerms.add(new BytesRef(string));
+      TermInSetQuery left = new TermInSetQuery("field", uniqueTerms);
       Collections.shuffle(terms, random());
-      TermInSetQuery right = termsQuery(singleField ? random().nextBoolean() : false, terms);
+      TermInSetQuery right = new TermInSetQuery("field", terms);
       assertEquals(right, left);
       assertEquals(right.hashCode(), left.hashCode());
       if (uniqueTerms.size() > 1) {
-        List<Term> asList = new ArrayList<>(uniqueTerms);
+        List<BytesRef> asList = new ArrayList<>(uniqueTerms);
         asList.remove(0);
-        TermInSetQuery notEqual = termsQuery(singleField ? random().nextBoolean() : false,
asList);
+        TermInSetQuery notEqual = new TermInSetQuery("field", asList);
         assertFalse(left.equals(notEqual));
         assertFalse(right.equals(notEqual));
       }
     }
 
-    TermInSetQuery tq1 = new TermInSetQuery(new Term("thing", "apple"));
-    TermInSetQuery tq2 = new TermInSetQuery(new Term("thing", "orange"));
+    TermInSetQuery tq1 = new TermInSetQuery("thing", new BytesRef("apple"));
+    TermInSetQuery tq2 = new TermInSetQuery("thing", new BytesRef("orange"));
     assertFalse(tq1.hashCode() == tq2.hashCode());
 
     // different fields with the same term should have differing hashcodes
-    tq1 = new TermInSetQuery(new Term("thing1", "apple"));
-    tq2 = new TermInSetQuery(new Term("thing2", "apple"));
+    tq1 = new TermInSetQuery("thing", new BytesRef("apple"));
+    tq2 = new TermInSetQuery("thing2", new BytesRef("apple"));
     assertFalse(tq1.hashCode() == tq2.hashCode());
   }
 
-  public void testSingleFieldEquals() {
+  public void testSimpleEquals() {
     // Two terms with the same hash code
     assertEquals("AaAaBB".hashCode(), "BBBBBB".hashCode());
-    TermInSetQuery left = termsQuery(true, new Term("id", "AaAaAa"), new Term("id", "AaAaBB"));
-    TermInSetQuery right = termsQuery(true, new Term("id", "AaAaAa"), new Term("id", "BBBBBB"));
+    TermInSetQuery left = new TermInSetQuery("id", new BytesRef("AaAaAa"), new BytesRef("AaAaBB"));
+    TermInSetQuery right = new TermInSetQuery("id", new BytesRef("AaAaAa"), new BytesRef("BBBBBB"));
     assertFalse(left.equals(right));
   }
 
   public void testToString() {
-    TermInSetQuery termsQuery = new TermInSetQuery(new Term("field1", "a"),
-                                              new Term("field1", "b"),
-                                              new Term("field1", "c"));
+    TermInSetQuery termsQuery = new TermInSetQuery("field1",
+        new BytesRef("a"), new BytesRef("b"), new BytesRef("c"));
     assertEquals("field1:a field1:b field1:c", termsQuery.toString());
   }
 
   public void testDedup() {
-    Query query1 = new TermInSetQuery(new Term("foo", "bar"));
-    Query query2 = new TermInSetQuery(new Term("foo", "bar"), new Term("foo", "bar"));
+    Query query1 = new TermInSetQuery("foo", new BytesRef("bar"));
+    Query query2 = new TermInSetQuery("foo", new BytesRef("bar"), new BytesRef("bar"));
     QueryUtils.checkEqual(query1, query2);
   }
 
   public void testOrderDoesNotMatter() {
     // order of terms if different
-    Query query1 = new TermInSetQuery(new Term("foo", "bar"), new Term("foo", "baz"));
-    Query query2 = new TermInSetQuery(new Term("foo", "baz"), new Term("foo", "bar"));
-    QueryUtils.checkEqual(query1, query2);
-
-    // order of fields is different
-    query1 = new TermInSetQuery(new Term("foo", "bar"), new Term("bar", "bar"));
-    query2 = new TermInSetQuery(new Term("bar", "bar"), new Term("foo", "bar"));
+    Query query1 = new TermInSetQuery("foo", new BytesRef("bar"), new BytesRef("baz"));
+    Query query2 = new TermInSetQuery("foo", new BytesRef("baz"), new BytesRef("bar"));
     QueryUtils.checkEqual(query1, query2);
   }
 
   public void testRamBytesUsed() {
-    List<Term> terms = new ArrayList<>();
+    List<BytesRef> terms = new ArrayList<>();
     final int numTerms = 1000 + random().nextInt(1000);
     for (int i = 0; i < numTerms; ++i) {
-      terms.add(new Term("f", RandomStrings.randomUnicodeOfLength(random(), 10)));
+      terms.add(new BytesRef(RandomStrings.randomUnicodeOfLength(random(), 10)));
     }
-    TermInSetQuery query = new TermInSetQuery(terms);
+    TermInSetQuery query = new TermInSetQuery("f", terms);
     final long actualRamBytesUsed = RamUsageTester.sizeOf(query);
     final long expectedRamBytesUsed = query.ramBytesUsed();
     // error margin within 5%
@@ -281,43 +247,40 @@ public class TermInSetQueryTest extends LuceneTestCase {
 
   }
 
-  public void testPullOneTermsEnumPerField() throws Exception {
+  public void testPullOneTermsEnum() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter w = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
     doc.add(new StringField("foo", "1", Store.NO));
-    doc.add(new StringField("bar", "2", Store.NO));
-    doc.add(new StringField("baz", "3", Store.NO));
     w.addDocument(doc);
     DirectoryReader reader = w.getReader();
     w.close();
     final AtomicInteger counter = new AtomicInteger();
     DirectoryReader wrapped = new TermsCountingDirectoryReaderWrapper(reader, counter);
 
-    final List<Term> terms = new ArrayList<>();
-    final Set<String> fields = new HashSet<>();
+    final List<BytesRef> terms = new ArrayList<>();
     // enough terms to avoid the rewrite
     final int numTerms = TestUtil.nextInt(random(), TermInSetQuery.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD
+ 1, 100);
     for (int i = 0; i < numTerms; ++i) {
-      final String field = RandomPicks.randomFrom(random(), new String[] {"foo", "bar", "baz"});
       final BytesRef term = new BytesRef(RandomStrings.randomUnicodeOfCodepointLength(random(),
10));
-      fields.add(field);
-      terms.add(new Term(field, term));
+      terms.add(term);
     }
 
-    new IndexSearcher(wrapped).count(new TermInSetQuery(terms));
-    assertEquals(fields.size(), counter.get());
+    assertEquals(0, new IndexSearcher(wrapped).count(new TermInSetQuery("bar", terms)));
+    assertEquals(0, counter.get()); // missing field
+    new IndexSearcher(wrapped).count(new TermInSetQuery("foo", terms));
+    assertEquals(1, counter.get());
     wrapped.close();
     dir.close();
   }
   
   public void testBinaryToString() {
-    TermInSetQuery query = new TermInSetQuery(new Term("field", new BytesRef(new byte[] {
(byte) 0xff, (byte) 0xfe })));
+    TermInSetQuery query = new TermInSetQuery("field", new BytesRef(new byte[] { (byte) 0xff,
(byte) 0xfe }));
     assertEquals("field:[ff fe]", query.toString());
   }
 
   public void testIsConsideredCostlyByQueryCache() throws IOException {
-    TermInSetQuery query = new TermInSetQuery(new Term("foo", "bar"), new Term("foo", "baz"));
+    TermInSetQuery query = new TermInSetQuery("foo", new BytesRef("bar"), new BytesRef("baz"));
     UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy();
     assertFalse(policy.shouldCache(query));
     policy.onUse(query);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5c2f5bd0/lucene/facet/src/java/org/apache/lucene/facet/MultiFacetQuery.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/MultiFacetQuery.java b/lucene/facet/src/java/org/apache/lucene/facet/MultiFacetQuery.java
index a010709..72c2773 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/MultiFacetQuery.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/MultiFacetQuery.java
@@ -19,9 +19,9 @@ package org.apache.lucene.facet;
 import java.util.ArrayList;
 import java.util.Collection;
 
-import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermInSetQuery;
+import org.apache.lucene.util.BytesRef;
 
 /**
  * A multi-terms {@link Query} over a {@link FacetField}.
@@ -38,7 +38,7 @@ public class MultiFacetQuery extends TermInSetQuery {
    * Creates a new {@code MultiFacetQuery} filtering the query on the given dimension.
    */
   public MultiFacetQuery(final FacetsConfig facetsConfig, final String dimension, final String[]...
paths) {
-    super(toTerms(facetsConfig.getDimConfig(dimension), dimension, paths));
+    super(facetsConfig.getDimConfig(dimension).indexFieldName, toTerms(dimension, paths));
   }
 
   /**
@@ -47,14 +47,13 @@ public class MultiFacetQuery extends TermInSetQuery {
    * <b>NOTE:</b>Uses FacetsConfig.DEFAULT_DIM_CONFIG.
    */
   public MultiFacetQuery(final String dimension, final String[]... paths) {
-    super(toTerms(FacetsConfig.DEFAULT_DIM_CONFIG, dimension, paths));
+    super(FacetsConfig.DEFAULT_DIM_CONFIG.indexFieldName, toTerms(dimension, paths));
   }
 
-  static Collection<Term> toTerms(final FacetsConfig.DimConfig dimConfig, final String
dimension,
-          final String[]... paths) {
-    final Collection<Term> terms = new ArrayList<>(paths.length);
+  static Collection<BytesRef> toTerms(final String dimension, final String[]... paths)
{
+    final Collection<BytesRef> terms = new ArrayList<>(paths.length);
     for (String[] path : paths)
-      terms.add(FacetQuery.toTerm(dimConfig, dimension, path));
+      terms.add(new BytesRef(FacetsConfig.pathToString(dimension, path)));
     return terms;
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5c2f5bd0/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java
----------------------------------------------------------------------
diff --git a/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java
index 994c60f..9b45a9c 100644
--- a/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java
@@ -16,25 +16,61 @@
  */
 package org.apache.lucene.queries;
 
+import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
 
+import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermInSetQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.BytesRef;
 
 /**
  * @deprecated Use {@link org.apache.lucene.search.TermInSetQuery}
  */
 @Deprecated
-public class TermsQuery extends TermInSetQuery {
+public class TermsQuery extends Query implements Accountable {
+
+  private final Query rewritten;
+  private final long ramBytesUsed;
 
   /**
    * Creates a new {@link TermsQuery} from the given collection. It
    * can contain duplicate terms and multiple fields.
    */
   public TermsQuery(Collection<Term> terms) {
-    super(terms);
+    Map<String, List<BytesRef>> termsPerField = new HashMap<>();
+    for (Term term : terms) {
+      List<BytesRef> t = termsPerField.computeIfAbsent(term.field(), s -> new ArrayList<>());
+      t.add(term.bytes());
+    }
+    if (termsPerField.size() == 1) {
+      Map.Entry<String, List<BytesRef>> entry = termsPerField.entrySet().iterator().next();
+      TermInSetQuery tisq = new TermInSetQuery(entry.getKey(), entry.getValue());
+      rewritten = tisq;
+      ramBytesUsed = tisq.ramBytesUsed();
+    } else {
+      BooleanQuery.Builder bq = new BooleanQuery.Builder()
+          .setDisableCoord(true);
+      long ramBytesUsed = 0;
+      for (Map.Entry<String, List<BytesRef>> entry : termsPerField.entrySet())
{
+        TermInSetQuery tisq = new TermInSetQuery(entry.getKey(), entry.getValue());
+        bq.add(tisq, Occur.SHOULD);
+        ramBytesUsed += tisq.ramBytesUsed();
+      }
+      rewritten = new ConstantScoreQuery(bq.build());
+      this.ramBytesUsed = ramBytesUsed;
+    }
   }
 
   /**
@@ -42,7 +78,9 @@ public class TermsQuery extends TermInSetQuery {
    * a single field. It can contain duplicate terms.
    */
   public TermsQuery(String field, Collection<BytesRef> terms) {
-    super(field, terms);
+    TermInSetQuery tisq = new TermInSetQuery(field, terms);
+    rewritten = tisq;
+    ramBytesUsed = tisq.ramBytesUsed();
   }
 
   /**
@@ -61,4 +99,31 @@ public class TermsQuery extends TermInSetQuery {
     this(Arrays.asList(terms));
   }
 
+  @Override
+  public Query rewrite(IndexReader reader) throws IOException {
+    return rewritten;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (sameClassAs(obj) == false) {
+      return false;
+    }
+    return Objects.equals(rewritten, ((TermsQuery) obj).rewritten);
+  }
+
+  @Override
+  public int hashCode() {
+    return 31 * classHash() + rewritten.hashCode();
+  }
+
+  @Override
+  public String toString(String field) {
+    return rewritten.toString(field);
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    return ramBytesUsed;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5c2f5bd0/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java
----------------------------------------------------------------------
diff --git a/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java b/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java
new file mode 100644
index 0000000..c5549da
--- /dev/null
+++ b/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.queries;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterDirectoryReader;
+import org.apache.lucene.index.FilterLeafReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BoostQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryUtils;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.UsageTrackingQueryCachingPolicy;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.RamUsageTester;
+import org.apache.lucene.util.TestUtil;
+
+import com.carrotsearch.randomizedtesting.generators.RandomStrings;
+
+public class TermsQueryTest extends LuceneTestCase {
+
+  public void testDuel() throws IOException {
+    final int iters = atLeast(2);
+    for (int iter = 0; iter < iters; ++iter) {
+      final List<Term> allTerms = new ArrayList<>();
+      final int numTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(),
1, 10));
+      for (int i = 0; i < numTerms; ++i) {
+        final String field = usually() ? "f" : "g";
+        final String value = TestUtil.randomAnalysisString(random(), 10, true);
+        allTerms.add(new Term(field, value));
+      }
+      Directory dir = newDirectory();
+      RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+      final int numDocs = atLeast(100);
+      for (int i = 0; i < numDocs; ++i) {
+        Document doc = new Document();
+        final Term term = allTerms.get(random().nextInt(allTerms.size()));
+        doc.add(new StringField(term.field(), term.text(), Store.NO));
+        iw.addDocument(doc);
+      }
+      if (numTerms > 1 && random().nextBoolean()) {
+        iw.deleteDocuments(new TermQuery(allTerms.get(0)));
+      }
+      iw.commit();
+      final IndexReader reader = iw.getReader();
+      final IndexSearcher searcher = newSearcher(reader);
+      iw.close();
+
+      if (reader.numDocs() == 0) {
+        // may occasionally happen if all documents got the same term
+        IOUtils.close(reader, dir);
+        continue;
+      }
+
+      for (int i = 0; i < 100; ++i) {
+        final float boost = random().nextFloat() * 10;
+        final int numQueryTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(),
1, 8));
+        List<Term> queryTerms = new ArrayList<>();
+        for (int j = 0; j < numQueryTerms; ++j) {
+          queryTerms.add(allTerms.get(random().nextInt(allTerms.size())));
+        }
+        final BooleanQuery.Builder bq = new BooleanQuery.Builder();
+        for (Term t : queryTerms) {
+          bq.add(new TermQuery(t), Occur.SHOULD);
+        }
+        final Query q1 = new ConstantScoreQuery(bq.build());
+        final Query q2 = new TermsQuery(queryTerms);
+        assertSameMatches(searcher, new BoostQuery(q1, boost), new BoostQuery(q2, boost),
true);
+      }
+
+      reader.close();
+      dir.close();
+    }
+  }
+
+  private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores)
throws IOException {
+    final int maxDoc = searcher.getIndexReader().maxDoc();
+    final TopDocs td1 = searcher.search(q1, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
+    final TopDocs td2 = searcher.search(q2, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
+    assertEquals(td1.totalHits, td2.totalHits);
+    for (int i = 0; i < td1.scoreDocs.length; ++i) {
+      assertEquals(td1.scoreDocs[i].doc, td2.scoreDocs[i].doc);
+      if (scores) {
+        assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 10e-7);
+      }
+    }
+  }
+
+  private TermsQuery termsQuery(boolean singleField, Term...terms) {
+    return termsQuery(singleField, Arrays.asList(terms));
+  }
+
+  private TermsQuery termsQuery(boolean singleField, Collection<Term> termList) {
+    if (!singleField) {
+      return new TermsQuery(new ArrayList<>(termList));
+    }
+    final TermsQuery filter;
+    List<BytesRef> bytes = new ArrayList<>();
+    String field = null;
+    for (Term term : termList) {
+        bytes.add(term.bytes());
+        if (field != null) {
+          assertEquals(term.field(), field);
+        }
+        field = term.field();
+    }
+    assertNotNull(field);
+    filter = new TermsQuery(field, bytes);
+    return filter;
+  }
+
+  public void testHashCodeAndEquals() {
+    int num = atLeast(100);
+    final boolean singleField = random().nextBoolean();
+    List<Term> terms = new ArrayList<>();
+    Set<Term> uniqueTerms = new HashSet<>();
+    for (int i = 0; i < num; i++) {
+      String field = "field" + (singleField ? "1" : random().nextInt(100));
+      String string = TestUtil.randomRealisticUnicodeString(random());
+      terms.add(new Term(field, string));
+      uniqueTerms.add(new Term(field, string));
+      TermsQuery left = termsQuery(singleField ? random().nextBoolean() : false, uniqueTerms);
+      Collections.shuffle(terms, random());
+      TermsQuery right = termsQuery(singleField ? random().nextBoolean() : false, terms);
+      assertEquals(right, left);
+      assertEquals(right.hashCode(), left.hashCode());
+      if (uniqueTerms.size() > 1) {
+        List<Term> asList = new ArrayList<>(uniqueTerms);
+        asList.remove(0);
+        TermsQuery notEqual = termsQuery(singleField ? random().nextBoolean() : false, asList);
+        assertFalse(left.equals(notEqual));
+        assertFalse(right.equals(notEqual));
+      }
+    }
+
+    TermsQuery tq1 = new TermsQuery(new Term("thing", "apple"));
+    TermsQuery tq2 = new TermsQuery(new Term("thing", "orange"));
+    assertFalse(tq1.hashCode() == tq2.hashCode());
+
+    // different fields with the same term should have differing hashcodes
+    tq1 = new TermsQuery(new Term("thing1", "apple"));
+    tq2 = new TermsQuery(new Term("thing2", "apple"));
+    assertFalse(tq1.hashCode() == tq2.hashCode());
+  }
+
+  public void testSingleFieldEquals() {
+    // Two terms with the same hash code
+    assertEquals("AaAaBB".hashCode(), "BBBBBB".hashCode());
+    TermsQuery left = termsQuery(true, new Term("id", "AaAaAa"), new Term("id", "AaAaBB"));
+    TermsQuery right = termsQuery(true, new Term("id", "AaAaAa"), new Term("id", "BBBBBB"));
+    assertFalse(left.equals(right));
+  }
+
+  public void testToString() {
+    TermsQuery termsQuery = new TermsQuery(new Term("field1", "a"),
+                                              new Term("field1", "b"),
+                                              new Term("field1", "c"));
+    assertEquals("field1:a field1:b field1:c", termsQuery.toString());
+  }
+
+  public void testDedup() {
+    Query query1 = new TermsQuery(new Term("foo", "bar"));
+    Query query2 = new TermsQuery(new Term("foo", "bar"), new Term("foo", "bar"));
+    QueryUtils.checkEqual(query1, query2);
+  }
+
+  public void testOrderDoesNotMatter() {
+    // order of terms if different
+    Query query1 = new TermsQuery(new Term("foo", "bar"), new Term("foo", "baz"));
+    Query query2 = new TermsQuery(new Term("foo", "baz"), new Term("foo", "bar"));
+    QueryUtils.checkEqual(query1, query2);
+
+    // order of fields is different
+    query1 = new TermsQuery(new Term("foo", "bar"), new Term("bar", "bar"));
+    query2 = new TermsQuery(new Term("bar", "bar"), new Term("foo", "bar"));
+    QueryUtils.checkEqual(query1, query2);
+  }
+
+  public void testRamBytesUsed() {
+    List<Term> terms = new ArrayList<>();
+    final int numTerms = 1000 + random().nextInt(1000);
+    for (int i = 0; i < numTerms; ++i) {
+      terms.add(new Term("f", RandomStrings.randomUnicodeOfLength(random(), 10)));
+    }
+    TermsQuery query = new TermsQuery(terms);
+    final long actualRamBytesUsed = RamUsageTester.sizeOf(query);
+    final long expectedRamBytesUsed = query.ramBytesUsed();
+    // error margin within 5%
+    assertEquals(actualRamBytesUsed, expectedRamBytesUsed, actualRamBytesUsed / 20);
+  }
+
+  private static class TermsCountingDirectoryReaderWrapper extends FilterDirectoryReader
{
+
+    private final AtomicInteger counter;
+    
+    public TermsCountingDirectoryReaderWrapper(DirectoryReader in, AtomicInteger counter)
throws IOException {
+      super(in, new TermsCountingSubReaderWrapper(counter));
+      this.counter = counter;
+    }
+
+    private static class TermsCountingSubReaderWrapper extends SubReaderWrapper {
+      private final AtomicInteger counter;
+
+      public TermsCountingSubReaderWrapper(AtomicInteger counter) {
+        this.counter = counter;
+      }
+
+      @Override
+      public LeafReader wrap(LeafReader reader) {
+        return new TermsCountingLeafReaderWrapper(reader, counter);
+      }
+    }
+
+    private static class TermsCountingLeafReaderWrapper extends FilterLeafReader {
+
+      private final AtomicInteger counter;
+
+      public TermsCountingLeafReaderWrapper(LeafReader in, AtomicInteger counter) {
+        super(in);
+        this.counter = counter;
+      }
+
+      @Override
+      public Fields fields() throws IOException {
+        return new FilterFields(in.fields()) {
+          @Override
+          public Terms terms(String field) throws IOException {
+            final Terms in = this.in.terms(field);
+            if (in == null) {
+              return null;
+            }
+            return new FilterTerms(in) {
+              @Override
+              public TermsEnum iterator() throws IOException {
+                counter.incrementAndGet();
+                return super.iterator();
+              }
+            };
+          }
+        };
+      }
+      
+    }
+
+    @Override
+    protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException
{
+      return new TermsCountingDirectoryReaderWrapper(in, counter);
+    }
+
+  }
+  
+  public void testBinaryToString() {
+    TermsQuery query = new TermsQuery(new Term("field", new BytesRef(new byte[] { (byte)
0xff, (byte) 0xfe })));
+    assertEquals("field:[ff fe]", query.toString());
+  }
+
+  public void testIsConsideredCostlyByQueryCache() throws IOException {
+    Query query = new TermsQuery(new Term("foo", "bar"), new Term("foo", "baz"));
+    query = query.rewrite(new MultiReader());
+    UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy();
+    assertFalse(policy.shouldCache(query));
+    policy.onUse(query);
+    policy.onUse(query);
+    // cached after two uses
+    assertTrue(policy.shouldCache(query));
+  }
+}


Mime
View raw message