Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 1A64B200D26 for ; Thu, 5 Oct 2017 14:13:04 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 18C281609E1; Thu, 5 Oct 2017 12:13:04 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 0FC39160BDA for ; Thu, 5 Oct 2017 14:13:02 +0200 (CEST) Received: (qmail 61463 invoked by uid 500); 5 Oct 2017 11:57:01 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 61323 invoked by uid 99); 5 Oct 2017 11:57:01 -0000 Received: from Unknown (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 05 Oct 2017 11:57:01 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 83E9FF5CB2; Thu, 5 Oct 2017 11:56:23 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: ab@apache.org To: commits@lucene.apache.org Date: Thu, 05 Oct 2017 11:56:57 -0000 Message-Id: In-Reply-To: <37920e774e8342a9839dc9955c9e23a7@git.apache.org> References: <37920e774e8342a9839dc9955c9e23a7@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [36/50] lucene-solr:feature/autoscaling_72: LUCENE-7982: add NormsFieldExistsQuery archived-at: Thu, 05 Oct 2017 12:13:04 -0000 LUCENE-7982: add NormsFieldExistsQuery Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0b11ee55 Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0b11ee55 Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0b11ee55 Branch: refs/heads/feature/autoscaling_72 Commit: 0b11ee5578c7930137d32c424d1173e23e3e158c Parents: 3012239 Author: Mike McCandless Authored: Wed Oct 4 10:20:54 2017 -0400 Committer: Mike McCandless Committed: Wed Oct 4 10:20:54 2017 -0400 ---------------------------------------------------------------------- lucene/CHANGES.txt | 3 + .../lucene/search/NormsFieldExistsQuery.java | 79 ++++++++ .../search/TestNormsFieldExistsQuery.java | 197 +++++++++++++++++++ 3 files changed, 279 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0b11ee55/lucene/CHANGES.txt ---------------------------------------------------------------------- diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index f3f04ed..7c35503 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -41,6 +41,9 @@ New Features * LUCENE-7975: Change the default taxonomy facets cache to a faster byte[] (UTF-8) based cache. +* LUCENE-7982: A new NormsFieldExistsQuery matches documents that have + norms in a specified field (Colin Goodheart-Smithe via Mike McCandless) + Optimizations * LUCENE-7905: Optimize how OrdinalMap (used by http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0b11ee55/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java ---------------------------------------------------------------------- diff --git a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java new file mode 100644 index 0000000..be0311e --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + + +import java.io.IOException; +import java.util.Objects; + +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; + +/** + * A {@link Query} that matches documents that have a value for a given field + * as reported by field norms. This will not work for fields that omit norms, + * e.g. {@link StringField}. + */ +public final class NormsFieldExistsQuery extends Query { + + private final String field; + + /** Create a query that will match that have a value for the given + * {@code field}. */ + public NormsFieldExistsQuery(String field) { + this.field = Objects.requireNonNull(field); + } + + public String getField() { + return field; + } + + @Override + public boolean equals(Object other) { + return sameClassAs(other) && + field.equals(((NormsFieldExistsQuery) other).field); + } + + @Override + public int hashCode() { + return 31 * classHash() + field.hashCode(); + } + + @Override + public String toString(String field) { + return "NormsFieldExistsQuery [field=" + this.field + "]"; + } + + @Override + public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { + return new ConstantScoreWeight(this, boost) { + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + FieldInfos fieldInfos = context.reader().getFieldInfos(); + FieldInfo fieldInfo = fieldInfos.fieldInfo(field); + if (fieldInfo == null || fieldInfo.hasNorms() == false) { + return null; + } + LeafReader reader = context.reader(); + DocIdSetIterator iterator = reader.getNormValues(field); + return new ConstantScoreScorer(this, score(), iterator); + } + }; + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0b11ee55/lucene/core/src/test/org/apache/lucene/search/TestNormsFieldExistsQuery.java ---------------------------------------------------------------------- diff --git a/lucene/core/src/test/org/apache/lucene/search/TestNormsFieldExistsQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestNormsFieldExistsQuery.java new file mode 100644 index 0000000..fba9e2f --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/search/TestNormsFieldExistsQuery.java @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + + +import java.io.IOException; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestNormsFieldExistsQuery extends LuceneTestCase { + + public void testRandom() throws IOException { + final int iters = atLeast(10); + for (int iter = 0; iter < iters; ++iter) { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + final int numDocs = atLeast(100); + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + final boolean hasValue = random().nextBoolean(); + if (hasValue) { + doc.add(new TextField("text1", "value", Store.NO)); + doc.add(new StringField("has_value", "yes", Store.NO)); + } + doc.add(new StringField("f", random().nextBoolean() ? "yes" : "no", Store.NO)); + iw.addDocument(doc); + } + if (random().nextBoolean()) { + iw.deleteDocuments(new TermQuery(new Term("f", "no"))); + } + iw.commit(); + final IndexReader reader = iw.getReader(); + final IndexSearcher searcher = newSearcher(reader); + iw.close(); + + assertSameMatches(searcher, new TermQuery(new Term("has_value", "yes")), new NormsFieldExistsQuery("text1"), false); + + reader.close(); + dir.close(); + } + } + + public void testApproximation() throws IOException { + final int iters = atLeast(10); + for (int iter = 0; iter < iters; ++iter) { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + final int numDocs = atLeast(100); + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + final boolean hasValue = random().nextBoolean(); + if (hasValue) { + doc.add(new TextField("text1", "value", Store.NO)); + doc.add(new StringField("has_value", "yes", Store.NO)); + } + doc.add(new StringField("f", random().nextBoolean() ? "yes" : "no", Store.NO)); + iw.addDocument(doc); + } + if (random().nextBoolean()) { + iw.deleteDocuments(new TermQuery(new Term("f", "no"))); + } + iw.commit(); + final IndexReader reader = iw.getReader(); + final IndexSearcher searcher = newSearcher(reader); + iw.close(); + + BooleanQuery.Builder ref = new BooleanQuery.Builder(); + ref.add(new TermQuery(new Term("f", "yes")), Occur.MUST); + ref.add(new TermQuery(new Term("has_value", "yes")), Occur.FILTER); + + BooleanQuery.Builder bq1 = new BooleanQuery.Builder(); + bq1.add(new TermQuery(new Term("f", "yes")), Occur.MUST); + bq1.add(new NormsFieldExistsQuery("text1"), Occur.FILTER); + assertSameMatches(searcher, ref.build(), bq1.build(), true); + + reader.close(); + dir.close(); + } + } + + public void testScore() throws IOException { + final int iters = atLeast(10); + for (int iter = 0; iter < iters; ++iter) { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + final int numDocs = atLeast(100); + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + final boolean hasValue = random().nextBoolean(); + if (hasValue) { + doc.add(new TextField("text1", "value", Store.NO)); + doc.add(new StringField("has_value", "yes", Store.NO)); + } + doc.add(new StringField("f", random().nextBoolean() ? "yes" : "no", Store.NO)); + iw.addDocument(doc); + } + if (random().nextBoolean()) { + iw.deleteDocuments(new TermQuery(new Term("f", "no"))); + } + iw.commit(); + final IndexReader reader = iw.getReader(); + final IndexSearcher searcher = newSearcher(reader); + iw.close(); + + final float boost = random().nextFloat() * 10; + final Query ref = new BoostQuery(new ConstantScoreQuery(new TermQuery(new Term("has_value", "yes"))), boost); + + final Query q1 = new BoostQuery(new NormsFieldExistsQuery("text1"), boost); + assertSameMatches(searcher, ref, q1, true); + + reader.close(); + dir.close(); + } + } + + public void testMissingField() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + iw.addDocument(new Document()); + iw.commit(); + final IndexReader reader = iw.getReader(); + final IndexSearcher searcher = newSearcher(reader); + iw.close(); + assertEquals(0, searcher.search(new NormsFieldExistsQuery("f"), 1).totalHits); + reader.close(); + dir.close(); + } + + public void testAllDocsHaveField() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(new TextField("f", "value", Store.NO)); + iw.addDocument(doc); + iw.commit(); + final IndexReader reader = iw.getReader(); + final IndexSearcher searcher = newSearcher(reader); + iw.close(); + assertEquals(1, searcher.search(new NormsFieldExistsQuery("f"), 1).totalHits); + reader.close(); + dir.close(); + } + + public void testFieldExistsButNoDocsHaveField() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir); + // 1st segment has the field, but 2nd one does not + Document doc = new Document(); + doc.add(new TextField("f", "value", Store.NO)); + iw.addDocument(doc); + iw.commit(); + iw.addDocument(new Document()); + iw.commit(); + final IndexReader reader = iw.getReader(); + final IndexSearcher searcher = newSearcher(reader); + iw.close(); + assertEquals(1, searcher.search(new NormsFieldExistsQuery("f"), 1).totalHits); + reader.close(); + dir.close(); + } + + private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores) throws IOException { + final int maxDoc = searcher.getIndexReader().maxDoc(); + final TopDocs td1 = searcher.search(q1, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER); + final TopDocs td2 = searcher.search(q2, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER); + assertEquals(td1.totalHits, td2.totalHits); + for (int i = 0; i < td1.scoreDocs.length; ++i) { + assertEquals(td1.scoreDocs[i].doc, td2.scoreDocs[i].doc); + if (scores) { + assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 10e-7); + } + } + } +}