lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mary meriem <mel-mer...@hotmail.fr>
Subject RE: lucene 4.0.0
Date Fri, 24 May 2013 23:19:48 GMT
yes thank you

> Date: Fri, 24 May 2013 18:59:13 -0400
> Subject: Re: lucene 4.0.0
> From: brendan.grainger@gmail.com
> To: java-user@lucene.apache.org
> 
> According to:
> http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/index/IndexReader.html#leaves()it
> is.
> 
> 
> On Fri, May 24, 2013 at 6:49 PM, mary meriem <mel-meriem@hotmail.fr> wrote:
> 
> >
> >
> > I have a problem with  AtomicReader reader =
> > indexReader.leaves().get(0).reader(); it is topical in Lucene 4.0.0??
> >
> > > Date: Fri, 24 May 2013 13:41:05 -0400
> > > Subject: Re: lucene 4.0.0
> > > From: brendan.grainger@gmail.com
> > > To: java-user@lucene.apache.org
> > >
> > > Hi Mary,
> > >
> > > I've been out of the loop with Lucene and java for a bit so this is might
> > > not be too correct, but here is an example of how it might be
> > accomplished
> > > (also you can see it in this gist:
> > https://gist.github.com/rainkinz/5645139).
> > > The output looks like this:
> > >
> > > ** Also note I'm using Lucene 4.3, however I set the version to be
> > > Version.LUCENE_40 for you. I don't think the APIs are different in this
> > > case.
> > >
> > > ---------------------------------------------------
> > > Term 'mary' appears 5 in the index
> > > in doc 0 the term mary appears 1 times at positions 1
> > > in doc 2 the term mary appears 1 times at positions 3
> > > in doc 4 the term mary appears 1 times at positions 1
> > > in doc 8 the term mary appears 1 times at positions 3
> > > in doc 9 the term mary appears 1 times at positions 6
> > > etc
> > >
> > >
> > >
> > > import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
> > > import org.apache.lucene.document.Document;
> > > import org.apache.lucene.document.Field;
> > > import org.apache.lucene.document.TextField;
> > > import org.apache.lucene.index.*;
> > > import org.apache.lucene.store.Directory;
> > > import org.apache.lucene.store.RAMDirectory;
> > > import org.apache.lucene.util.BytesRef;
> > > import org.apache.lucene.util.Version;
> > >
> > > import java.io.IOException;
> > > import java.util.Random;
> > >
> > > public class CountingTerms {
> > >
> > >   private static final Version VERSION = Version.LUCENE_40;
> > >
> > >   private static final String[] terms = "hi am mary and i have a problem
> > > with lucene".split(" ");
> > >
> > >   private final Directory indexDir = new RAMDirectory();
> > >
> > >   private String randomTerms() {
> > >     Random rand = new Random();
> > >     StringBuilder sb = new StringBuilder();
> > >     int numTerms = rand.nextInt(terms.length);
> > >     for (int i = 0; i < numTerms; i++) {
> > >       sb.append(terms[rand.nextInt(terms.length)]).append(" ");
> > >     }
> > >     return sb.toString();
> > >   }
> > >
> > >   private void addDocs(IndexWriter writer) throws IOException {
> > >     for (int i = 0; i < 10; i++) {
> > >       Document doc = new Document();
> > >       String randomStr = randomTerms();
> > >       puts("Adding random str: " + randomStr);
> > >       IndexableField field = new TextField("text", randomStr,
> > > Field.Store.YES);
> > >       doc.add(field);
> > >       writer.addDocument(doc);
> > >     }
> > >   }
> > >
> > >   private void countTerms() throws IOException {
> > >     DirectoryReader indexReader = DirectoryReader.open(indexDir);
> > >     AtomicReader reader = indexReader.leaves().get(0).reader();
> > >
> > >     Fields fields = reader.fields();
> > >     Terms terms = fields.terms("text");
> > >     TermsEnum termsEnum = terms.iterator(null);
> > >     BytesRef term;
> > >
> > >     while ((term = termsEnum.next()) != null) {
> > >       puts("---------------------------------------------------");
> > >       puts("Term '" + term.utf8ToString() + "' appears " +
> > > termsEnum.totalTermFreq() + " in the index");
> > >       DocsAndPositionsEnum docPosEnum =
> > > termsEnum.docsAndPositions(reader.getLiveDocs(),
> > >               null,
> > >               DocsAndPositionsEnum.FLAG_OFFSETS);
> > >       int docid;
> > >       while ((docid = docPosEnum.nextDoc()) !=
> > > DocsAndPositionsEnum.NO_MORE_DOCS) {
> > >
> > >         int freq = docPosEnum.freq();
> > >         int[] positions = new int[freq];
> > >         for (int i = 0; i < freq; i++) {
> > >           int position = docPosEnum.nextPosition();
> > >           positions[i]=position;
> > >         }
> > >
> > >         puts("in doc " + docid + " the term " + term.utf8ToString() + "
> > > appears " + freq + " times at positions " + ppArray(positions));
> > >       }
> > >
> > >     }
> > >
> > >     indexReader.close();
> > >   }
> > >
> > >   private String ppArray(int[] arr) {
> > >     StringBuilder sb = new StringBuilder();
> > >     for (int i = 0; i < arr.length; i++) {
> > >       sb.append(arr[i]);
> > >       if (i + 1 < arr.length) sb.append(", ");
> > >     }
> > >     return sb.toString();
> > >   }
> > >
> > >   private void puts(Object msg) {
> > >     System.out.println(msg);
> > >   }
> > >
> > >   private void index() throws IOException {
> > >     IndexWriter indexWriter = new IndexWriter(indexDir,
> > >             new IndexWriterConfig(VERSION, new
> > > WhitespaceAnalyzer(VERSION)));
> > >     addDocs(indexWriter);
> > >     indexWriter.commit();
> > >     indexWriter.close();
> > >   }
> > >
> > >   public static void main(String[] args) throws Exception {
> > >     CountingTerms ct = new CountingTerms();
> > >     ct.index();
> > >     ct.countTerms();
> > >   }
> > >
> > > }
> > >
> > >
> > >
> > > On Fri, May 24, 2013 at 12:14 PM, mary meriem <mel-meriem@hotmail.fr>
> > wrote:
> > >
> > > > hii am mary and i have a problem with lucene, Actually a work with
> > lucene
> > > > 4.0.0, my problem is, how can I more listed all the terms, the display
> > > > position for each term in each document and their frequency?please help
> > > >
> > >
> > >
> > >
> > >
> > > --
> > > Brendan Grainger
> > > www.kuripai.com
> >
> >
> 
> 
> 
> -- 
> Brendan Grainger
> www.kuripai.com
 		 	   		  
Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message