lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Wei Wang <welshw...@gmail.com>
Subject Re: Filter based on the sum of values of two fields
Date Wed, 27 Mar 2013 10:46:09 GMT
Hi Yann-Erwan,

Thank you for the detailed reply. Your idea seems reasonable. I will
give it a try for out environment settings.

Wei

On Tue, Mar 26, 2013 at 5:22 PM, Yann-Erwan Perio <ye.perio@gmail.com> wrote:
> On Sun, Mar 24, 2013 at 10:46 AM, Wei Wang <welshwang@gmail.com> wrote:
>
> Hi,
>
>> For example, assume we have fields F1 and F2, we would like to find
>> all documents with condition F1+F2 > 5.0. This filter may be combined
>> with other filters to form a BooleanFilter.
>>
>> The question is, is there any way to construct an efficient filter to do this?
>
> I don't know - but the API looked interesting, so I gave it a try (see
> below). I had never worked with search filters before writing that
> code, so please proceed with caution, as I am not sure of many things
> (iteration of all documents, treatment of deleted documents, what is
> that "acceptDocs" variable, what threading constraints to respect...).
>
> ---
> // add your package declaration
>
>
> import static org.junit.Assert.assertEquals;
> import static org.junit.Assert.assertTrue;
>
> import java.io.IOException;
> import java.util.Arrays;
> import java.util.HashSet;
> import java.util.Iterator;
> import java.util.Set;
> import java.util.TreeSet;
>
> import org.apache.lucene.analysis.Analyzer;
> import org.apache.lucene.analysis.standard.StandardAnalyzer;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.document.Field;
> import org.apache.lucene.document.IntField;
> import org.apache.lucene.index.AtomicReader;
> import org.apache.lucene.index.AtomicReaderContext;
> import org.apache.lucene.index.DirectoryReader;
> import org.apache.lucene.index.IndexReader;
> import org.apache.lucene.index.IndexWriter;
> import org.apache.lucene.index.IndexWriterConfig;
> import org.apache.lucene.search.DocIdSet;
> import org.apache.lucene.search.DocIdSetIterator;
> import org.apache.lucene.search.FieldCache;
> import org.apache.lucene.search.FieldCache.Ints;
> import org.apache.lucene.search.Filter;
> import org.apache.lucene.search.IndexSearcher;
> import org.apache.lucene.search.MatchAllDocsQuery;
> import org.apache.lucene.search.TopDocs;
> import org.apache.lucene.store.Directory;
> import org.apache.lucene.store.RAMDirectory;
> import org.apache.lucene.util.Bits;
> import org.apache.lucene.util.Version;
> import org.junit.Before;
> import org.junit.Test;
>
> public class FilterTest {
>
>         private static final Version VERSION = Version.LUCENE_42;
>
>         private static final String FIELD_ID = "id";
>         private static final String FIELD_ALPHA = "alpha";
>         private static final String FIELD_OMEGA = "omega";
>
>         private static final int SUM_THRESHOLD = 5;
>         private static final int[] VALUES_ALPHA = new int[] { 1, 2, 3, 4, 5 };
>         private static final int[] VALUES_OMEGA = new int[] { 5, 0, 5, 0, 5 };
>         private static final Set<Integer> EXPECTED_MATCHED_DOCUMENT_IDS = new
> HashSet<Integer>(Arrays.asList(0, 2, 4));
>
>         private Directory directory;
>
>         @Before
>         public void setUp() throws IOException {
>                 directory = new RAMDirectory();
>
>                 Analyzer analyzer = new StandardAnalyzer(VERSION);
>                 IndexWriterConfig config = new IndexWriterConfig(VERSION, analyzer);
>                 config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
>                 IndexWriter writer = new IndexWriter(directory, config);
>
>                 for (int ii = 0; ii < VALUES_ALPHA.length; ii++) {
>                         Document doc = new Document();
>                         Field id = new IntField(FIELD_ID, ii, IntField.Store.YES);
>                         Field alpha = new IntField(FIELD_ALPHA, VALUES_ALPHA[ii],
> IntField.Store.YES);
>                         Field omega = new IntField(FIELD_OMEGA, VALUES_OMEGA[ii],
> IntField.Store.YES);
>                         doc.add(id);
>                         doc.add(alpha);
>                         doc.add(omega);
>                         writer.addDocument(doc);
>                 }
>
>                 writer.close();
>         }
>
>         @Test
>         public void testSumFilter() throws IOException {
>                 IndexReader reader = DirectoryReader.open(directory);
>                 IndexSearcher searcher = new IndexSearcher(reader);
>                 TopDocs results = searcher.search(new MatchAllDocsQuery(), new
> SumFilter(SUM_THRESHOLD), VALUES_ALPHA.length);
>
>                 try {
>                         assertEquals(EXPECTED_MATCHED_DOCUMENT_IDS.size(), results.totalHits);
>                         for (int ii = 0; ii < results.scoreDocs.length; ii++) {
>                                 int docId = results.scoreDocs[ii].doc;
>                                 Document doc = reader.document(docId);
>                                 int idValue = doc.getField(FIELD_ID).numericValue().intValue();
>                                 int alphaValue = doc.getField(FIELD_ALPHA).numericValue().intValue();
>                                 int omegaValue = doc.getField(FIELD_OMEGA).numericValue().intValue();
>
>                                 assertTrue(EXPECTED_MATCHED_DOCUMENT_IDS.contains(idValue));
>                                 assertTrue(alphaValue + omegaValue > SUM_THRESHOLD);
>                         }
>                 } finally {
>                         reader.close();
>                 }
>         }
>
>         private class SumFilter extends Filter {
>
>                 private int minValue;
>
>                 public SumFilter(int minValue) {
>                         this.minValue = minValue;
>                 }
>
>                 @Override
>                 public DocIdSet getDocIdSet(AtomicReaderContext context, Bits
> acceptDocs) throws IOException {
>                         AtomicReader reader = context.reader();
>                         Ints alphaCache = FieldCache.DEFAULT.getInts(reader, FIELD_ALPHA,
false);
>                         Ints omegaCache = FieldCache.DEFAULT.getInts(reader, FIELD_OMEGA,
false);
>                         SimpleDocIdSet docIdSet = new SimpleDocIdSet();
>
>                         int maxDoc = reader.maxDoc();
>                         for (int docId = 0; docId < maxDoc; docId++) {
>                                 int sum = alphaCache.get(docId) + omegaCache.get(docId);
>                                 if (sum > minValue) {
>                                         docIdSet.add(docId);
>                                 }
>                         }
>
>                         return docIdSet;
>                 }
>         }
>
>         private class SimpleDocIdSet extends DocIdSet {
>
>                 private final TreeSet<Integer> sortedDocIdSet = new TreeSet<Integer>();
>
>                 public void add(int docId) {
>                         sortedDocIdSet.add(docId);
>                 }
>
>                 @Override
>                 public DocIdSetIterator iterator() throws IOException {
>                         return new DocIdSetIterator() {
>
>                                 private Iterator<Integer> sortedDocIdSetIterator
=
> sortedDocIdSet.iterator();
>                                 private int currentDocId = -1;
>
>                                 @Override
>                                 public int advance(int target) throws IOException {
>                                         while ((currentDocId = nextDoc()) < target)
{
>                                         }
>                                         return currentDocId;
>                                 }
>
>                                 @Override
>                                 public int docID() {
>                                         if (currentDocId == -1) {
>                                                 return -1;
>                                         }
>                                         if (!sortedDocIdSetIterator.hasNext()) {
>                                                 return NO_MORE_DOCS;
>                                         }
>                                         return currentDocId;
>                                 }
>
>                                 @Override
>                                 public int nextDoc() throws IOException {
>                                         if (!sortedDocIdSetIterator.hasNext()) {
>                                                 return NO_MORE_DOCS;
>                                         }
>                                         currentDocId = sortedDocIdSetIterator.next();
>                                         return currentDocId;
>                                 }
>                         };
>                 }
>         }
>
> }
>
> ---
>
> Regards,
> Yep.
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-user-help@lucene.apache.org
>

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message