Return-Path: X-Original-To: apmail-lucene-dev-archive@www.apache.org Delivered-To: apmail-lucene-dev-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id F2BF11053E for ; Thu, 27 Feb 2014 00:31:35 +0000 (UTC) Received: (qmail 78990 invoked by uid 500); 27 Feb 2014 00:31:30 -0000 Delivered-To: apmail-lucene-dev-archive@lucene.apache.org Received: (qmail 78834 invoked by uid 500); 27 Feb 2014 00:31:29 -0000 Mailing-List: contact dev-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list dev@lucene.apache.org Received: (qmail 78691 invoked by uid 99); 27 Feb 2014 00:31:27 -0000 Received: from arcas.apache.org (HELO arcas.apache.org) (140.211.11.28) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 27 Feb 2014 00:31:27 +0000 Date: Thu, 27 Feb 2014 00:31:27 +0000 (UTC) From: "David (JIRA)" To: dev@lucene.apache.org Message-ID: In-Reply-To: References: Subject: [jira] [Comment Edited] (SOLR-5773) CollapsingQParserPlugin problem with ElevateComponent MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit X-JIRA-FingerPrint: 30527f35849b9dde25b450d4833f0394 [ https://issues.apache.org/jira/browse/SOLR-5773?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13913797#comment-13913797 ] David edited comment on SOLR-5773 at 2/27/14 12:31 AM: ------------------------------------------------------- Last fix had a bug. This is working for me. {code} public CollapsingScoreCollector(int maxDoc, int segments, SortedDocValues values, int nullPolicy, IntOpenHashSet boostDocs) { this.maxDoc = maxDoc; this.contexts = new AtomicReaderContext[segments]; this.collapsedSet = new OpenBitSet(maxDoc); this.boostDocs = boostDocs; if(this.boostDocs != null) { //Set the elevated docs now. Iterator it = this.boostDocs.iterator(); while(it.hasNext()) { IntCursor cursor = it.next(); this.collapsedSet.fastSet(cursor.value); } } this.values = values; int valueCount = values.getValueCount(); this.ords = new int[valueCount]; this.groupIsBoosted = new boolean[valueCount]; Arrays.fill(this.ords, -1); this.scores = new float[valueCount]; Arrays.fill(this.scores, -Float.MAX_VALUE); this.nullPolicy = nullPolicy; if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { nullScores = new FloatArrayList(); } } public boolean acceptsDocsOutOfOrder() { //Documents must be sent in order to this collector. return false; } public void setNextReader(AtomicReaderContext context) throws IOException { this.contexts[context.ord] = context; this.docBase = context.docBase; } public void collect(int docId) throws IOException { int globalDoc = docId+this.docBase; int ord = values.getOrd(globalDoc); if(ord > -1) { if (this.collapsedSet.fastGet(globalDoc)) { //If we have a document in the group that is potentially not //the top scorer but also exists as an elevated document //set it as the globalDoc and it will be removed in //favor of the elevated document groupIsBoosted[ord] = true; ords[ord] = globalDoc; } else if (!groupIsBoosted[ord]) { float score = scorer.score(); if(score > scores[ord]) { ords[ord] = globalDoc; scores[ord] = score; } } } if (this.collapsedSet.fastGet(globalDoc)) { //The doc is elevated so score does not matter //We just want to be sure it doesn't fall into the null policy ords[ord] = globalDoc; } else if(nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) { float score = scorer.score(); if(score > nullScore) { nullScore = score; nullDoc = globalDoc; } } else if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { collapsedSet.fastSet(globalDoc); nullScores.add(scorer.score()); } } {code} This approach will work for default grouping. Will still have to implement fixes for min max grouping. I will probably also want to make this a toggle-able feature. was (Author: dboychuck): Actually I had to change my approach. {code} public CollapsingScoreCollector(int maxDoc, int segments, SortedDocValues values, int nullPolicy, IntOpenHashSet boostDocs) { this.maxDoc = maxDoc; this.contexts = new AtomicReaderContext[segments]; this.collapsedSet = new OpenBitSet(maxDoc); this.boostDocs = boostDocs; if(this.boostDocs != null) { //Set the elevated docs now. Iterator it = this.boostDocs.iterator(); while(it.hasNext()) { IntCursor cursor = it.next(); this.collapsedSet.fastSet(cursor.value); } } this.values = values; int valueCount = values.getValueCount(); this.ords = new int[valueCount]; this.groupIsBoosted = new boolean[valueCount]; Arrays.fill(this.ords, -1); this.scores = new float[valueCount]; Arrays.fill(this.scores, -Float.MAX_VALUE); this.nullPolicy = nullPolicy; if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { nullScores = new FloatArrayList(); } } public boolean acceptsDocsOutOfOrder() { //Documents must be sent in order to this collector. return false; } public void setNextReader(AtomicReaderContext context) throws IOException { this.contexts[context.ord] = context; this.docBase = context.docBase; } public void collect(int docId) throws IOException { int globalDoc = docId+this.docBase; int ord = values.getOrd(globalDoc); if(ord > -1) { if (this.collapsedSet.fastGet(globalDoc)) { //If we have a document in the group that is potentially not //the top scorer but also exists as an elevated document //set it as the globalDoc and it will be removed in //favor of the elevated document groupIsBoosted[ord] = true; ords[ord] = globalDoc; } else if (!groupIsBoosted[ord]) { float score = scorer.score(); if(score > scores[ord]) { ords[ord] = globalDoc; scores[ord] = score; } } } if (this.collapsedSet.fastGet(globalDoc)) { //The doc is elevated so score does not matter //We just want to be sure it doesn't fall into the null policy ords[ord] = globalDoc; } else if(nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) { float score = scorer.score(); if(score > nullScore) { nullScore = score; nullDoc = globalDoc; } } else if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) { collapsedSet.fastSet(globalDoc); nullScores.add(scorer.score()); } } {code} This approach will work for default grouping. Will still have to implement fixes for min max grouping. I will probably also want to make this a toggle-able feature. > CollapsingQParserPlugin problem with ElevateComponent > ----------------------------------------------------- > > Key: SOLR-5773 > URL: https://issues.apache.org/jira/browse/SOLR-5773 > Project: Solr > Issue Type: Improvement > Components: query parsers > Affects Versions: 4.6.1 > Reporter: David > Labels: collapse, solr > Fix For: 4.7 > > Original Estimate: 8h > Remaining Estimate: 8h > > Hi Joel, > I sent you an email but I'm not sure if you received it or not. I ran into a bit of trouble using the CollapsingQParserPlugin with elevated documents. To explain it simply, I want to exclude grouped documents when one of the members of the group are contained in the elevated document set. I'm not sure this is possible currently because as you explain above elevated documents are added to the request context after the original query is constructed. > To try to better illustrate the problem. If I have 2 documents docid=1 and docid=2 and both have a groupid of 'a'. If a grouped query scores docid 2 first in the results but I have elevated docid 1 then both documents are shown in the results when I really only want the elevated document to be shown in the results. > Is this something that would be difficult to implement? Any help is appreciated. > I think the solution would be to remove the documents from liveDocs that share the same groupid in the getBoostDocs() function. Let me know if this makes any sense. I'll continue working towards a solution in the meantime. > {code} > private IntOpenHashSet getBoostDocs(SolrIndexSearcher indexSearcher, Set boosted) throws IOException { > IntOpenHashSet boostDocs = null; > if(boosted != null) { > SchemaField idField = indexSearcher.getSchema().getUniqueKeyField(); > String fieldName = idField.getName(); > HashSet localBoosts = new HashSet(boosted.size()*2); > Iterator boostedIt = boosted.iterator(); > while(boostedIt.hasNext()) { > localBoosts.add(new BytesRef(boostedIt.next())); > } > boostDocs = new IntOpenHashSet(boosted.size()*2); > Listleaves = indexSearcher.getTopReaderContext().leaves(); > TermsEnum termsEnum = null; > DocsEnum docsEnum = null; > for(AtomicReaderContext leaf : leaves) { > AtomicReader reader = leaf.reader(); > int docBase = leaf.docBase; > Bits liveDocs = reader.getLiveDocs(); > Terms terms = reader.terms(fieldName); > termsEnum = terms.iterator(termsEnum); > Iterator it = localBoosts.iterator(); > while(it.hasNext()) { > BytesRef ref = it.next(); > if(termsEnum.seekExact(ref)) { > docsEnum = termsEnum.docs(liveDocs, docsEnum); > int doc = docsEnum.nextDoc(); > if(doc != -1) { > //Found the document. > boostDocs.add(doc+docBase); > *// HERE REMOVE ANY DOCUMENTS THAT SHARE THE GROUPID NOT ONLY THE DOCID //* > it.remove(); > } > } > } > } > } > return boostDocs; > } > {code} -- This message was sent by Atlassian JIRA (v6.1.5#6160) --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org For additional commands, e-mail: dev-help@lucene.apache.org