lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Thomas D'Silva <>
Subject Using TermVectorMapper to compute term frequency across documents
Date Tue, 13 Oct 2009 02:46:36 GMT

I am trying to compute the counts of terms of the documents returned  
by running a query using a TermVectorMapper.
I was wondering if anyone knew if there was a faster way to do this  
rather than using a HashMap with a TermVectorMapper to store the  
counts of the terms and calling getTermFreqVector().
I do not require the term frequency within a document.


HashMap termDocCount = new HashMap();
TermQuery tagQuery = new TermQuery(tagTerm);
TopDocs docs =, numDocs);
for (int i=0 ; i<docs.scoreDocs.length; ++i) {
	ScoreDoc sdoc=docs.scoreDocs[i];
	Document doc = ir.document(sdoc.doc);
	//iterate over a subset of index fields
	for (int j=0; j <fieldNames.length; ++j) {
		String fieldName=fieldNames[j];
		DocTermVectorMapper vMapper=new DocTermVectorMapper(termDocCount);
		ir.getTermFreqVector(sdoc.doc, fieldName,vMapper);

  private class DocTermVectorMapper extends TermVectorMapper {
     	private HashMap termDocCount;
     	private String currField;
     	DocTermVectorMapper(HashMap termDocCount) {
     	public boolean isIgnoringOffsets() {
     		return true;
     	public boolean isIgnoringPositions() {
     		return true;

	public void map(String term, int frequency, TermVectorOffsetInfo[]  
offsets, int[] positions) {
		Term t=new Term(currField,term);
		if (!termDocCount.containsKey(t))
			termDocCount.put(t, new Int());
		else {

	public void setExpectations(String field, int numTerms, boolean  
storeOffsets, boolean storePositions) {

  private class Int {
       	int x;
         Int() {
           x = 1;
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message