lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mark harwood <markharw...@yahoo.co.uk>
Subject Re: Proposed Lucene modification - FieldCollector
Date Thu, 10 Mar 2005 07:36:15 GMT
>>To get complete statistics like
>>above, you currently have to iterate through the
result
>> set and pull each Document from the Hits.

Not necessarily true.  You can use TermVectors or an
indexed field eg "doctype" to derive this stuff
without stored fields. Here's an example of how I've
done it before using indexed fields. I've been meaning
to tidy this up and contribute this as it looks like
it could be generally useful. The "GroupKeyFactory" is
an abstraction which allows you to process a term
before using it for totalling eg to group dates on a
year rather than a full date.


    protected GroupTotal[] 
groupByIndexTokens(GroupQueryParams params)throws
ParseException, IOException
    {
        final HashMap totals = new HashMap();
        final GroupingKeyFactory groupKeyFactory =
params.getGroupKeyFactory();
        String groupFieldName =
params.getGroupFieldName();
        //TODO IndexSearcher should be passed in and
resused?
        IndexSearcher searcher = new
IndexSearcher(reader);
        float minScore = params.getMinDocScore();
        final float scores[] = new
float[reader.numDocs()];
        String queryString=params.getQuery();
       
if((queryString==null)||(queryString.trim().length()==0))
        {
            //TODO if query is null then we could
optimise counting by just taking docFreq             
            // from TermEnum and avoding use of
TermDocs? 
            Arrays.fill(scores,1);
        }
        else
        {
	        Query query = null;
	        query = QueryParser.parse(params.getQuery(),
"contents", analyzer);
	        searcher.search(query, null, new
HitCollector()
	        {
	            public void collect(int docID, float
score)
	            {
	                scores[docID] = score;
	            }
	        });
        } 

        TermEnum te = reader.terms(new
Term(groupFieldName, ""));
        Term term = te.term();
        while (term!=null)
        {            
            if (term.field().equals(groupFieldName))
            {
                TermDocs termDocs =
reader.termDocs(term);
                GroupTotal groupTotal = null;

                boolean continueThisTerm = true;
                while ((continueThisTerm) &&
(termDocs.next()))
                {
                    int docID = termDocs.doc();
                    float docScore = scores[docID];
                    //TODO include logic to test
queryParams.includeZeroScore groups
                    if ((docScore > 0) && (docScore >
minScore))
                    //                       
if(docScore>minScore)
                    {
                        if (groupTotal == null)
                        {
                            //look up the group key
and initialize
                            String termText =
term.text();
                            Object key = termText;
                            if (groupKeyFactory !=
null)
                            {
                                key =
groupKeyFactory.getGroupingKey(termText,docID);
                                if (key == null)
                                {
                                    continueThisTerm =
false;
                                    continue;
                                }
                            }
                            groupTotal = (GroupTotal)
totals.get(key);
                            if (groupTotal == null)
                            {
                                //no totals exist yet,
create new one.
                                groupTotal = new
GroupTotal(params
                                       
.getReturnDocIdsWithGroups());
                               
groupTotal.setGroupKey(key);
                                totals.put(key,
groupTotal);
                               
groupTotal.addToTotalDocFreq(te.docFreq());
                            }
                        }
                       
groupTotal.addQueryMatchDoc(docID, scores[docID]);
                    }
                }
            } else
            {
                break;
            }
           if(te.next())
           {
               term=te.term();
           }
           else
           {
               break;
           }
        }
        Collection result = totals.values();
        GroupTotal[] results = (GroupTotal[])
result.toArray(new GroupTotal[result.size()]);
        return results;
    }


Send instant messages to your online friends http://uk.messenger.yahoo.com 

---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org


Mime
View raw message