Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm
Precedence: bulk
Reply-To: dev@lucene.apache.org
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: ab@apache.org
To: commits@lucene.apache.org
Date: Thu, 05 Oct 2017 11:56:57 -0000
Message-Id: <dc6f39745d004bed929de1838272cbfa@git.apache.org>
In-Reply-To: <37920e774e8342a9839dc9955c9e23a7@git.apache.org>
References: <37920e774e8342a9839dc9955c9e23a7@git.apache.org>
Subject: [36/50] lucene-solr:feature/autoscaling_72: LUCENE-7982: add
 NormsFieldExistsQuery
archived-at: Thu, 05 Oct 2017 12:13:04 -0000

LUCENE-7982: add NormsFieldExistsQuery


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0b11ee55
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0b11ee55
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0b11ee55

Branch: refs/heads/feature/autoscaling_72
Commit: 0b11ee5578c7930137d32c424d1173e23e3e158c
Parents: 3012239
Author: Mike McCandless <mikemccand@apache.org>
Authored: Wed Oct 4 10:20:54 2017 -0400
Committer: Mike McCandless <mikemccand@apache.org>
Committed: Wed Oct 4 10:20:54 2017 -0400

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   3 +
 .../lucene/search/NormsFieldExistsQuery.java    |  79 ++++++++
 .../search/TestNormsFieldExistsQuery.java       | 197 +++++++++++++++++++
 3 files changed, 279 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0b11ee55/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index f3f04ed..7c35503 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -41,6 +41,9 @@ New Features
 * LUCENE-7975: Change the default taxonomy facets cache to a faster
   byte[] (UTF-8) based cache.
 
+* LUCENE-7982: A new NormsFieldExistsQuery matches documents that have
+  norms in a specified field (Colin Goodheart-Smithe via Mike McCandless)
+
 Optimizations
 
 * LUCENE-7905: Optimize how OrdinalMap (used by

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0b11ee55/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java
new file mode 100644
index 0000000..be0311e
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+
+import java.io.IOException;
+import java.util.Objects;
+
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+
+/**
+ * A {@link Query} that matches documents that have a value for a given field
+ * as reported by field norms.  This will not work for fields that omit norms,
+ * e.g. {@link StringField}.
+ */
+public final class NormsFieldExistsQuery extends Query {
+
+  private final String field;
+
+  /** Create a query that will match that have a value for the given
+   *  {@code field}. */
+  public NormsFieldExistsQuery(String field) {
+    this.field = Objects.requireNonNull(field);
+  }
+
+  public String getField() {
+    return field;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    return sameClassAs(other) &&
+           field.equals(((NormsFieldExistsQuery) other).field);
+  }
+
+  @Override
+  public int hashCode() {
+    return 31 * classHash() + field.hashCode();
+  }
+
+  @Override
+  public String toString(String field) {
+    return "NormsFieldExistsQuery [field=" + this.field + "]";
+  }
+
+  @Override
+  public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
+    return new ConstantScoreWeight(this, boost) {
+      @Override
+      public Scorer scorer(LeafReaderContext context) throws IOException {
+        FieldInfos fieldInfos = context.reader().getFieldInfos();
+        FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+        if (fieldInfo == null || fieldInfo.hasNorms() == false) {
+          return null;
+        }
+        LeafReader reader = context.reader();
+        DocIdSetIterator iterator = reader.getNormValues(field);
+        return new ConstantScoreScorer(this, score(), iterator);
+      }
+    };
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0b11ee55/lucene/core/src/test/org/apache/lucene/search/TestNormsFieldExistsQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestNormsFieldExistsQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestNormsFieldExistsQuery.java
new file mode 100644
index 0000000..fba9e2f
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/TestNormsFieldExistsQuery.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+
+import java.io.IOException;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestNormsFieldExistsQuery extends LuceneTestCase {
+
+  public void testRandom() throws IOException {
+    final int iters = atLeast(10);
+    for (int iter = 0; iter < iters; ++iter) {
+      Directory dir = newDirectory();
+      RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+      final int numDocs = atLeast(100);
+      for (int i = 0; i < numDocs; ++i) {
+        Document doc = new Document();
+        final boolean hasValue = random().nextBoolean();
+        if (hasValue) {
+          doc.add(new TextField("text1", "value", Store.NO));
+          doc.add(new StringField("has_value", "yes", Store.NO));
+        }
+        doc.add(new StringField("f", random().nextBoolean() ? "yes" : "no", Store.NO));
+        iw.addDocument(doc);
+      }
+      if (random().nextBoolean()) {
+        iw.deleteDocuments(new TermQuery(new Term("f", "no")));
+      }
+      iw.commit();
+      final IndexReader reader = iw.getReader();
+      final IndexSearcher searcher = newSearcher(reader);
+      iw.close();
+
+      assertSameMatches(searcher, new TermQuery(new Term("has_value", "yes")), new NormsFieldExistsQuery("text1"), false);
+
+      reader.close();
+      dir.close();
+    }
+  }
+
+  public void testApproximation() throws IOException {
+    final int iters = atLeast(10);
+    for (int iter = 0; iter < iters; ++iter) {
+      Directory dir = newDirectory();
+      RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+      final int numDocs = atLeast(100);
+      for (int i = 0; i < numDocs; ++i) {
+        Document doc = new Document();
+        final boolean hasValue = random().nextBoolean();
+        if (hasValue) {
+          doc.add(new TextField("text1", "value", Store.NO));
+          doc.add(new StringField("has_value", "yes", Store.NO));
+        }
+        doc.add(new StringField("f", random().nextBoolean() ? "yes" : "no", Store.NO));
+        iw.addDocument(doc);
+      }
+      if (random().nextBoolean()) {
+        iw.deleteDocuments(new TermQuery(new Term("f", "no")));
+      }
+      iw.commit();
+      final IndexReader reader = iw.getReader();
+      final IndexSearcher searcher = newSearcher(reader);
+      iw.close();
+
+      BooleanQuery.Builder ref = new BooleanQuery.Builder();
+      ref.add(new TermQuery(new Term("f", "yes")), Occur.MUST);
+      ref.add(new TermQuery(new Term("has_value", "yes")), Occur.FILTER);
+
+      BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
+      bq1.add(new TermQuery(new Term("f", "yes")), Occur.MUST);
+      bq1.add(new NormsFieldExistsQuery("text1"), Occur.FILTER);
+      assertSameMatches(searcher, ref.build(), bq1.build(), true);
+
+      reader.close();
+      dir.close();
+    }
+  }
+
+  public void testScore() throws IOException {
+    final int iters = atLeast(10);
+    for (int iter = 0; iter < iters; ++iter) {
+      Directory dir = newDirectory();
+      RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+      final int numDocs = atLeast(100);
+      for (int i = 0; i < numDocs; ++i) {
+        Document doc = new Document();
+        final boolean hasValue = random().nextBoolean();
+        if (hasValue) {
+          doc.add(new TextField("text1", "value", Store.NO));
+          doc.add(new StringField("has_value", "yes", Store.NO));
+        }
+        doc.add(new StringField("f", random().nextBoolean() ? "yes" : "no", Store.NO));
+        iw.addDocument(doc);
+      }
+      if (random().nextBoolean()) {
+        iw.deleteDocuments(new TermQuery(new Term("f", "no")));
+      }
+      iw.commit();
+      final IndexReader reader = iw.getReader();
+      final IndexSearcher searcher = newSearcher(reader);
+      iw.close();
+
+      final float boost = random().nextFloat() * 10;
+      final Query ref = new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("has_value", "yes"))), boost);
+
+      final Query q1 = new BoostQuery(new NormsFieldExistsQuery("text1"), boost);
+      assertSameMatches(searcher, ref, q1, true);
+
+      reader.close();
+      dir.close();
+    }
+  }
+
+  public void testMissingField() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    iw.addDocument(new Document());
+    iw.commit();
+    final IndexReader reader = iw.getReader();
+    final IndexSearcher searcher = newSearcher(reader);
+    iw.close();
+    assertEquals(0, searcher.search(new NormsFieldExistsQuery("f"), 1).totalHits);
+    reader.close();
+    dir.close();
+  }
+
+  public void testAllDocsHaveField() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    doc.add(new TextField("f", "value", Store.NO));
+    iw.addDocument(doc);
+    iw.commit();
+    final IndexReader reader = iw.getReader();
+    final IndexSearcher searcher = newSearcher(reader);
+    iw.close();
+    assertEquals(1, searcher.search(new NormsFieldExistsQuery("f"), 1).totalHits);
+    reader.close();
+    dir.close();
+  }
+
+  public void testFieldExistsButNoDocsHaveField() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    // 1st segment has the field, but 2nd one does not
+    Document doc = new Document();
+    doc.add(new TextField("f", "value", Store.NO));
+    iw.addDocument(doc);
+    iw.commit();
+    iw.addDocument(new Document());
+    iw.commit();
+    final IndexReader reader = iw.getReader();
+    final IndexSearcher searcher = newSearcher(reader);
+    iw.close();
+    assertEquals(1, searcher.search(new NormsFieldExistsQuery("f"), 1).totalHits);
+    reader.close();
+    dir.close();
+  }
+
+  private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores) throws IOException {
+    final int maxDoc = searcher.getIndexReader().maxDoc();
+    final TopDocs td1 = searcher.search(q1, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
+    final TopDocs td2 = searcher.search(q2, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
+    assertEquals(td1.totalHits, td2.totalHits);
+    for (int i = 0; i < td1.scoreDocs.length; ++i) {
+      assertEquals(td1.scoreDocs[i].doc, td2.scoreDocs[i].doc);
+      if (scores) {
+        assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 10e-7);
+      }
+    }
+  }
+}