lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mary meriem <mel-mer...@hotmail.fr>
Subject RE: lucene 4.0.0
Date Fri, 24 May 2013 22:49:36 GMT


I have a problem with  AtomicReader reader = indexReader.leaves().get(0).reader(); it is topical
in Lucene 4.0.0??

> Date: Fri, 24 May 2013 13:41:05 -0400
> Subject: Re: lucene 4.0.0
> From: brendan.grainger@gmail.com
> To: java-user@lucene.apache.org
> 
> Hi Mary,
> 
> I've been out of the loop with Lucene and java for a bit so this is might
> not be too correct, but here is an example of how it might be accomplished
> (also you can see it in this gist: https://gist.github.com/rainkinz/5645139).
> The output looks like this:
> 
> ** Also note I'm using Lucene 4.3, however I set the version to be
> Version.LUCENE_40 for you. I don't think the APIs are different in this
> case.
> 
> ---------------------------------------------------
> Term 'mary' appears 5 in the index
> in doc 0 the term mary appears 1 times at positions 1
> in doc 2 the term mary appears 1 times at positions 3
> in doc 4 the term mary appears 1 times at positions 1
> in doc 8 the term mary appears 1 times at positions 3
> in doc 9 the term mary appears 1 times at positions 6
> etc
> 
> 
> 
> import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.document.Field;
> import org.apache.lucene.document.TextField;
> import org.apache.lucene.index.*;
> import org.apache.lucene.store.Directory;
> import org.apache.lucene.store.RAMDirectory;
> import org.apache.lucene.util.BytesRef;
> import org.apache.lucene.util.Version;
> 
> import java.io.IOException;
> import java.util.Random;
> 
> public class CountingTerms {
> 
>   private static final Version VERSION = Version.LUCENE_40;
> 
>   private static final String[] terms = "hi am mary and i have a problem
> with lucene".split(" ");
> 
>   private final Directory indexDir = new RAMDirectory();
> 
>   private String randomTerms() {
>     Random rand = new Random();
>     StringBuilder sb = new StringBuilder();
>     int numTerms = rand.nextInt(terms.length);
>     for (int i = 0; i < numTerms; i++) {
>       sb.append(terms[rand.nextInt(terms.length)]).append(" ");
>     }
>     return sb.toString();
>   }
> 
>   private void addDocs(IndexWriter writer) throws IOException {
>     for (int i = 0; i < 10; i++) {
>       Document doc = new Document();
>       String randomStr = randomTerms();
>       puts("Adding random str: " + randomStr);
>       IndexableField field = new TextField("text", randomStr,
> Field.Store.YES);
>       doc.add(field);
>       writer.addDocument(doc);
>     }
>   }
> 
>   private void countTerms() throws IOException {
>     DirectoryReader indexReader = DirectoryReader.open(indexDir);
>     AtomicReader reader = indexReader.leaves().get(0).reader();
> 
>     Fields fields = reader.fields();
>     Terms terms = fields.terms("text");
>     TermsEnum termsEnum = terms.iterator(null);
>     BytesRef term;
> 
>     while ((term = termsEnum.next()) != null) {
>       puts("---------------------------------------------------");
>       puts("Term '" + term.utf8ToString() + "' appears " +
> termsEnum.totalTermFreq() + " in the index");
>       DocsAndPositionsEnum docPosEnum =
> termsEnum.docsAndPositions(reader.getLiveDocs(),
>               null,
>               DocsAndPositionsEnum.FLAG_OFFSETS);
>       int docid;
>       while ((docid = docPosEnum.nextDoc()) !=
> DocsAndPositionsEnum.NO_MORE_DOCS) {
> 
>         int freq = docPosEnum.freq();
>         int[] positions = new int[freq];
>         for (int i = 0; i < freq; i++) {
>           int position = docPosEnum.nextPosition();
>           positions[i]=position;
>         }
> 
>         puts("in doc " + docid + " the term " + term.utf8ToString() + "
> appears " + freq + " times at positions " + ppArray(positions));
>       }
> 
>     }
> 
>     indexReader.close();
>   }
> 
>   private String ppArray(int[] arr) {
>     StringBuilder sb = new StringBuilder();
>     for (int i = 0; i < arr.length; i++) {
>       sb.append(arr[i]);
>       if (i + 1 < arr.length) sb.append(", ");
>     }
>     return sb.toString();
>   }
> 
>   private void puts(Object msg) {
>     System.out.println(msg);
>   }
> 
>   private void index() throws IOException {
>     IndexWriter indexWriter = new IndexWriter(indexDir,
>             new IndexWriterConfig(VERSION, new
> WhitespaceAnalyzer(VERSION)));
>     addDocs(indexWriter);
>     indexWriter.commit();
>     indexWriter.close();
>   }
> 
>   public static void main(String[] args) throws Exception {
>     CountingTerms ct = new CountingTerms();
>     ct.index();
>     ct.countTerms();
>   }
> 
> }
> 
> 
> 
> On Fri, May 24, 2013 at 12:14 PM, mary meriem <mel-meriem@hotmail.fr> wrote:
> 
> > hii am mary and i have a problem with lucene, Actually a work with lucene
> > 4.0.0, my problem is, how can I more listed all the terms, the display
> > position for each term in each document and their frequency?please help
> >
> 
> 
> 
> 
> -- 
> Brendan Grainger
> www.kuripai.com
 		 	   		  
Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message