Return-Path: Delivered-To: apmail-lucene-java-user-archive@www.apache.org Received: (qmail 73273 invoked from network); 20 Sep 2005 06:56:08 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (209.237.227.199) by minotaur.apache.org with SMTP; 20 Sep 2005 06:56:08 -0000 Received: (qmail 74948 invoked by uid 500); 20 Sep 2005 06:56:02 -0000 Delivered-To: apmail-lucene-java-user-archive@lucene.apache.org Received: (qmail 74040 invoked by uid 500); 20 Sep 2005 06:55:54 -0000 Mailing-List: contact java-user-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-user@lucene.apache.org Delivered-To: mailing list java-user@lucene.apache.org Received: (qmail 74027 invoked by uid 99); 20 Sep 2005 06:55:53 -0000 Received: from asf.osuosl.org (HELO asf.osuosl.org) (140.211.166.49) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 19 Sep 2005 23:55:53 -0700 X-ASF-Spam-Status: No, hits=0.4 required=10.0 tests=DNS_FROM_RFC_ABUSE X-Spam-Check-By: apache.org Received: from [206.190.38.233] (HELO web51802.mail.yahoo.com) (206.190.38.233) by apache.org (qpsmtpd/0.29) with SMTP; Mon, 19 Sep 2005 23:56:01 -0700 Received: (qmail 58976 invoked by uid 60001); 20 Sep 2005 06:55:39 -0000 DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws; s=s1024; d=yahoo.com; h=Message-ID:Received:Date:From:Subject:To:In-Reply-To:MIME-Version:Content-Type:Content-Transfer-Encoding; b=2Ntc7UT6sRh/KsWKktdC6TPKQ2Lt3pn9wCSjwc8jpCyqriQem9puf/IlabbDJNvfeTdWiWkYrBoRbFYDtigfUO38UIICyoaTIWtiCHSUpIXSjMh9Avtey7pdB5iCjPFqXeIYopWw3YIp6VAgQhVM+vQiKpnIXNr33Pc32Czy7Ug= ; Message-ID: <20050920065539.58974.qmail@web51802.mail.yahoo.com> Received: from [66.127.61.92] by web51802.mail.yahoo.com via HTTP; Mon, 19 Sep 2005 23:55:38 PDT Date: Mon, 19 Sep 2005 23:55:38 -0700 (PDT) From: James Huang Subject: Re: Sort by relevance+distance To: java-user@lucene.apache.org In-Reply-To: <432F387E.10209@yahoo.co.uk> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-1 Content-Transfer-Encoding: 8bit X-Virus-Checked: Checked by ClamAV on apache.org X-Spam-Rating: minotaur.apache.org 1.6.2 0/1000/N Cool! Only one question: if we have class RelevanceAndDistanceCollector extends HitCollector { public ScoreDoc[] getMatches(int start, int size) { ... } } and a call of getMatches(10000, 25); would not cache as many as 10000+ docs, would it? Remember this is the whole point of this exercise -- scalability -- just want to make sure. Thanks, -James --- markharw00d wrote: > Here's an example I put together to illustrate the > point. > > > package distance; > > import java.io.IOException; > import java.util.ArrayList; > > import org.apache.lucene.analysis.Analyzer; > import > org.apache.lucene.analysis.WhitespaceAnalyzer; > import org.apache.lucene.document.Document; > import org.apache.lucene.document.Field; > import org.apache.lucene.index.IndexReader; > import org.apache.lucene.index.IndexWriter; > import org.apache.lucene.queryParser.ParseException; > import org.apache.lucene.queryParser.QueryParser; > import org.apache.lucene.search.HitCollector; > import org.apache.lucene.search.IndexSearcher; > import org.apache.lucene.search.Query; > import org.apache.lucene.search.ScoreDoc; > import org.apache.lucene.store.RAMDirectory; > import org.apache.lucene.util.PriorityQueue; > > public class TestDistance > { > > private static QueryParser parser; > private static IndexReader reader; > private static Location[] locsCache; > private static IndexSearcher searcher; > /** > * @param args > */ > public static void main(String[] args) throws > Exception > { > Analyzer analyzer=new WhitespaceAnalyzer(); > RAMDirectory dir=new RAMDirectory(); > IndexWriter writer=new > IndexWriter(dir,analyzer,true); > addDoc(writer,"the faraway mouse", 500,500); > addDoc(writer,"the semilocal cat", 50,50); > addDoc(writer,"the local dog", 20,20); > writer.close(); > searcher=new IndexSearcher(dir); > parser=new QueryParser("description", > analyzer); > > //create location cache > reader = searcher.getIndexReader(); > ArrayList allLocs=new ArrayList(); > int docCount=reader.numDocs(); > for (int i = 0; i < docCount; i++) > { > Document doc=reader.document(i); > allLocs.add(new Location( > > Float.parseFloat(doc.get("lat")), > > Float.parseFloat(doc.get("lon")) > ) > ); > } > locsCache=new Location[reader.numDocs()]; > locsCache= (Location[]) allLocs.toArray(new > Location[allLocs.size()]); > > //example search 1 > runSearch("the cat"); > > runSearch("the dog"); > > runSearch("the mouse"); > > > } > > private static void runSearch(String > queryString) throws > ParseException, IOException > { > System.out.println("query:"+queryString); > Query query=parser.parse(queryString); > Location queryLocation=new Location(1f,1f); > RelevanceAndDistanceCollector collector=new > RelevanceAndDistanceCollector(10, > queryLocation,locsCache); > searcher.search(query,collector); > ScoreDoc[] results = collector.getMatches(); > for (int i = 0; i < results.length; i++) > { > Document > doc=reader.document(results[i].doc); > > System.out.print("["+results[i].doc+"]"); > > System.out.print("("+results[i].score+")"); > > System.out.println("\t"+doc.get("description")); > } > System.out.println(""); > } > > > public static void addDoc(IndexWriter > writer,String description, > float lat, float lon) throws IOException > { > Document doc=new Document(); > doc.add(Field.UnIndexed("lat", ""+lat)); > doc.add(Field.UnIndexed("lon", ""+lon)); > > doc.add(Field.Text("description",description)); > writer.addDocument(doc); > } > static class Location > { > float lat; > float lon; > public Location(float lat, float lon) > { > this.lat=lat; > this.lon=lon; > } > public float distance(Location loc) > { > float latDiff = Math.abs(loc.lat-lat); > float lonDiff = Math.abs(loc.lon-lon); > float dist=(float) > Math.sqrt((latDiff*latDiff)+(lonDiff*lonDiff)); > return dist; > } > > } > static class RelevanceAndDistanceCollector > extends HitCollector > { > HitQueue hq; > Location queryLocation; > float maxDistance=5000; > private Location[] docLocs; > > public RelevanceAndDistanceCollector(int > numDocs, Location > queryLocation, Location[] docLocs) > { > this.queryLocation=queryLocation; > this.docLocs=docLocs; > hq=new HitQueue(numDocs); > } > public void collect(int doc, float score) > { > > score=score*(maxDistance-queryLocation.distance(docLocs[doc])); > hq.insert(new ScoreDoc(doc,score)); > > } > public ScoreDoc[] getMatches() > { > ScoreDoc sd[]=new ScoreDoc[hq.size()]; > while(hq.size()>0) > { > sd[hq.size()-1]=(ScoreDoc) hq.pop(); > } > return sd; > } > } > static class HitQueue extends PriorityQueue { > public HitQueue(int size) { > initialize(size); > } > public final boolean lessThan(Object a, > Object b) { > ScoreDoc hitA = (ScoreDoc)a; > ScoreDoc hitB = (ScoreDoc)b; > if (hitA.score == hitB.score) > return hitA.doc > hitB.doc; > else > return hitA.score < hitB.score; > } > } > > } > > > > > ___________________________________________________________ > > How much free photo storage do you get? Store your > holiday > snaps for FREE with Yahoo! Photos > http://uk.photos.yahoo.com > > --------------------------------------------------------------------- > To unsubscribe, e-mail: > java-user-unsubscribe@lucene.apache.org > For additional commands, e-mail: > java-user-help@lucene.apache.org > === message truncated === __________________________________________________ Do You Yahoo!? Tired of spam? Yahoo! Mail has the best spam protection around http://mail.yahoo.com --------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org For additional commands, e-mail: java-user-help@lucene.apache.org