lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Brendan Grainger <brendan.grain...@gmail.com>
Subject Re: lucene 4.0.0
Date Fri, 24 May 2013 22:59:13 GMT
According to:
http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/index/IndexReader.html#leaves()it
is.


On Fri, May 24, 2013 at 6:49 PM, mary meriem <mel-meriem@hotmail.fr> wrote:

>
>
> I have a problem with  AtomicReader reader =
> indexReader.leaves().get(0).reader(); it is topical in Lucene 4.0.0??
>
> > Date: Fri, 24 May 2013 13:41:05 -0400
> > Subject: Re: lucene 4.0.0
> > From: brendan.grainger@gmail.com
> > To: java-user@lucene.apache.org
> >
> > Hi Mary,
> >
> > I've been out of the loop with Lucene and java for a bit so this is might
> > not be too correct, but here is an example of how it might be
> accomplished
> > (also you can see it in this gist:
> https://gist.github.com/rainkinz/5645139).
> > The output looks like this:
> >
> > ** Also note I'm using Lucene 4.3, however I set the version to be
> > Version.LUCENE_40 for you. I don't think the APIs are different in this
> > case.
> >
> > ---------------------------------------------------
> > Term 'mary' appears 5 in the index
> > in doc 0 the term mary appears 1 times at positions 1
> > in doc 2 the term mary appears 1 times at positions 3
> > in doc 4 the term mary appears 1 times at positions 1
> > in doc 8 the term mary appears 1 times at positions 3
> > in doc 9 the term mary appears 1 times at positions 6
> > etc
> >
> >
> >
> > import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
> > import org.apache.lucene.document.Document;
> > import org.apache.lucene.document.Field;
> > import org.apache.lucene.document.TextField;
> > import org.apache.lucene.index.*;
> > import org.apache.lucene.store.Directory;
> > import org.apache.lucene.store.RAMDirectory;
> > import org.apache.lucene.util.BytesRef;
> > import org.apache.lucene.util.Version;
> >
> > import java.io.IOException;
> > import java.util.Random;
> >
> > public class CountingTerms {
> >
> >   private static final Version VERSION = Version.LUCENE_40;
> >
> >   private static final String[] terms = "hi am mary and i have a problem
> > with lucene".split(" ");
> >
> >   private final Directory indexDir = new RAMDirectory();
> >
> >   private String randomTerms() {
> >     Random rand = new Random();
> >     StringBuilder sb = new StringBuilder();
> >     int numTerms = rand.nextInt(terms.length);
> >     for (int i = 0; i < numTerms; i++) {
> >       sb.append(terms[rand.nextInt(terms.length)]).append(" ");
> >     }
> >     return sb.toString();
> >   }
> >
> >   private void addDocs(IndexWriter writer) throws IOException {
> >     for (int i = 0; i < 10; i++) {
> >       Document doc = new Document();
> >       String randomStr = randomTerms();
> >       puts("Adding random str: " + randomStr);
> >       IndexableField field = new TextField("text", randomStr,
> > Field.Store.YES);
> >       doc.add(field);
> >       writer.addDocument(doc);
> >     }
> >   }
> >
> >   private void countTerms() throws IOException {
> >     DirectoryReader indexReader = DirectoryReader.open(indexDir);
> >     AtomicReader reader = indexReader.leaves().get(0).reader();
> >
> >     Fields fields = reader.fields();
> >     Terms terms = fields.terms("text");
> >     TermsEnum termsEnum = terms.iterator(null);
> >     BytesRef term;
> >
> >     while ((term = termsEnum.next()) != null) {
> >       puts("---------------------------------------------------");
> >       puts("Term '" + term.utf8ToString() + "' appears " +
> > termsEnum.totalTermFreq() + " in the index");
> >       DocsAndPositionsEnum docPosEnum =
> > termsEnum.docsAndPositions(reader.getLiveDocs(),
> >               null,
> >               DocsAndPositionsEnum.FLAG_OFFSETS);
> >       int docid;
> >       while ((docid = docPosEnum.nextDoc()) !=
> > DocsAndPositionsEnum.NO_MORE_DOCS) {
> >
> >         int freq = docPosEnum.freq();
> >         int[] positions = new int[freq];
> >         for (int i = 0; i < freq; i++) {
> >           int position = docPosEnum.nextPosition();
> >           positions[i]=position;
> >         }
> >
> >         puts("in doc " + docid + " the term " + term.utf8ToString() + "
> > appears " + freq + " times at positions " + ppArray(positions));
> >       }
> >
> >     }
> >
> >     indexReader.close();
> >   }
> >
> >   private String ppArray(int[] arr) {
> >     StringBuilder sb = new StringBuilder();
> >     for (int i = 0; i < arr.length; i++) {
> >       sb.append(arr[i]);
> >       if (i + 1 < arr.length) sb.append(", ");
> >     }
> >     return sb.toString();
> >   }
> >
> >   private void puts(Object msg) {
> >     System.out.println(msg);
> >   }
> >
> >   private void index() throws IOException {
> >     IndexWriter indexWriter = new IndexWriter(indexDir,
> >             new IndexWriterConfig(VERSION, new
> > WhitespaceAnalyzer(VERSION)));
> >     addDocs(indexWriter);
> >     indexWriter.commit();
> >     indexWriter.close();
> >   }
> >
> >   public static void main(String[] args) throws Exception {
> >     CountingTerms ct = new CountingTerms();
> >     ct.index();
> >     ct.countTerms();
> >   }
> >
> > }
> >
> >
> >
> > On Fri, May 24, 2013 at 12:14 PM, mary meriem <mel-meriem@hotmail.fr>
> wrote:
> >
> > > hii am mary and i have a problem with lucene, Actually a work with
> lucene
> > > 4.0.0, my problem is, how can I more listed all the terms, the display
> > > position for each term in each document and their frequency?please help
> > >
> >
> >
> >
> >
> > --
> > Brendan Grainger
> > www.kuripai.com
>
>



-- 
Brendan Grainger
www.kuripai.com

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message