Return-Path: X-Original-To: apmail-lucene-java-user-archive@www.apache.org Delivered-To: apmail-lucene-java-user-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id A97E9D836 for ; Wed, 22 Aug 2012 02:06:38 +0000 (UTC) Received: (qmail 54727 invoked by uid 500); 22 Aug 2012 02:06:36 -0000 Delivered-To: apmail-lucene-java-user-archive@lucene.apache.org Received: (qmail 54685 invoked by uid 500); 22 Aug 2012 02:06:36 -0000 Mailing-List: contact java-user-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-user@lucene.apache.org Delivered-To: mailing list java-user@lucene.apache.org Received: (qmail 54675 invoked by uid 99); 22 Aug 2012 02:06:36 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 22 Aug 2012 02:06:36 +0000 X-ASF-Spam-Status: No, hits=2.2 required=5.0 tests=HTML_MESSAGE,RCVD_IN_DNSWL_LOW,SPF_NEUTRAL X-Spam-Check-By: apache.org Received-SPF: neutral (athena.apache.org: local policy) Received: from [209.85.216.48] (HELO mail-qa0-f48.google.com) (209.85.216.48) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 22 Aug 2012 02:06:28 +0000 Received: by qady1 with SMTP id y1so549259qad.14 for ; Tue, 21 Aug 2012 19:06:07 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20120113; h=mime-version:in-reply-to:references:date:message-id:subject:from:to :content-type:x-gm-message-state; bh=DQemcdfyGzoDc7oY7VzjscctR1o+OZnPLTFdHVr2+Ao=; b=YJm8DLwQBiBvFub6ZZqG8/WbsN1ucs6Au2XUtouMLQ/mLaRmCYxwI2DjKlsGAnAa/x ViKkAbp4+C3oNXQXvThagxlbJfzHTuTs0+P7QA4KFS08tfw5SkcQDMSmbVoyEZQWjqKB HmXtfY8csFnfIm05Hlwqo7FT8+AG3G7K2qIyJAA590wJwp3ogoT0ehGgOswe0yxitVmE 14yDxwpRsbrk4oIqZk2uuCoUymJ5muEC9QeuAfhYHqgzix5qp96j3XU0cA61/BeWCIqE wh2pg8WUtapOAtAgUpYBpKYEAHf5LgRdK4PUYy+yAc0n2Q2kk7M1fbdaOSj1k7sq19vp aPwA== MIME-Version: 1.0 Received: by 10.229.136.9 with SMTP id p9mr983923qct.73.1345601167390; Tue, 21 Aug 2012 19:06:07 -0700 (PDT) Received: by 10.229.28.69 with HTTP; Tue, 21 Aug 2012 19:06:07 -0700 (PDT) In-Reply-To: References: <0AB7FDA54FDD46C9920B2ED379BEA2FC@JackKrupansky> Date: Tue, 21 Aug 2012 22:06:07 -0400 Message-ID: Subject: Re: Creating Span Queries from Boolean Queries From: Dave Seltzer To: java-user@lucene.apache.org Content-Type: multipart/alternative; boundary=00248c7118e99779ac04c7d12f9a X-Gm-Message-State: ALoCoQnJQPjkmgsGDZgl1JXRTDV9jHzOhDkbj5i9hR3m2tB+DQx5EasFwDA+lkKO+8+WZfa/o5iX X-Virus-Checked: Checked by ClamAV on apache.org --00248c7118e99779ac04c7d12f9a Content-Type: text/plain; charset=ISO-8859-1 So I've taken my first shot at solving my problem using the three functions below. When I set the slop to 10 it produces the following result: This BooleanQuery +content:"london olympics" +(+content:football +content:or +content:soccer) -content:nfl becomes this SpanQuery: spanNot(spanNear([spanNear([content:london, content:olympics], 0, true), spanNear([content:football, content:or, content:soccer], 10, false)], 10, false), spanOr([content:nfl])) Right now I've implemented TermQuery, PhraseQuery and BooleanQuery. Is there a list of queries that could be produced using the Lucene Query Parser? Any thoughts on how I should implement Wildcard queries? Thanks! -Dave public static SpanQuery ConvertQuery(Query input, int slop) { SpanQuery convertedQuery = null; if(input instanceof TermQuery) { //support for term query convertedQuery = new SpanTermQuery(((TermQuery)input).getTerm()); } else if(input instanceof PhraseQuery) { //support for phrase query convertedQuery = ConvertPhraseQueryToSpanQuery((PhraseQuery)input); } else if(input instanceof BooleanQuery) { //support for nested boolean query convertedQuery = ConvertBooleanQuery((BooleanQuery)input,slop); } return convertedQuery; } public static SpanQuery ConvertPhraseQueryToSpanQuery(PhraseQuery input) { SpanQuery retval = null; ArrayList terms = new ArrayList(); for(Term t : input.getTerms()) { terms.add(new SpanTermQuery(t)); } retval = new SpanNearQuery(terms.toArray(new SpanQuery[terms.size()]), 0, true); return retval; } public static SpanQuery ConvertBooleanQuery(BooleanQuery input, int slop) { ArrayList andClauses = new ArrayList(); ArrayList orClauses = new ArrayList(); ArrayList notClauses = new ArrayList(); SpanQuery retval = null; //iterate thorough any child clauses prior to for(BooleanClause clause : ((BooleanQuery) input).clauses()) { SpanQuery convertedQuery = ConvertQuery(clause.getQuery(), slop); if(convertedQuery != null) { if(clause.getOccur() == BooleanClause.Occur.MUST) { andClauses.add(convertedQuery); } else if (clause.getOccur() == BooleanClause.Occur.SHOULD) { orClauses.add(convertedQuery); } else if (clause.getOccur() == BooleanClause.Occur.MUST_NOT) { notClauses.add(convertedQuery); } } } //alright, now lets assemble the clauses that we've collected for this query SpanQuery andSpans = null; SpanQuery orSpans = null; SpanQuery notSpans = null; //if there are no ANDs and no ORs then we'll return null if(andClauses.size() + orClauses.size() == 0) return null; if(andClauses.size() > 0) { if(andClauses.size() > 1) { andSpans = new SpanNearQuery(andClauses.toArray(new SpanQuery[andClauses.size()]), slop, false); } else { andSpans = andClauses.get(0); } } if(orClauses.size() > 0) { orSpans = new SpanOrQuery(orClauses.toArray(new SpanQuery[orClauses.size()])); } if(notClauses.size() > 0) { notSpans = new SpanOrQuery(notClauses.toArray(new SpanQuery[notClauses.size()])); } //build an intermediate query using the above clauses SpanQuery intermediateQuery = null; if(andClauses.size() > 0 && orClauses.size() == 0) { intermediateQuery = andSpans; } else if (orClauses.size() > 0 && andClauses.size() == 0) { intermediateQuery = orSpans; } else { intermediateQuery = new SpanNearQuery(new SpanQuery[]{andSpans,orSpans}, slop, false); } //if we have any NOT queries append them to the end if(notClauses.size() > 0) { retval = new SpanNotQuery(intermediateQuery, notSpans); } else { retval = intermediateQuery; } return retval; } --00248c7118e99779ac04c7d12f9a--