lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Mark Miller <markrmil...@gmail.com>
Subject Re: Queries spanning paragraphs
Date Mon, 22 Oct 2007 11:42:31 GMT
I implemented this for my qsol query parser: myhardshadow.com/qsol

Uses a modified SpanNotQuery that takes another parameter saying how 
many times the span can cross the specified marker. Index a special 
paragraph marker with your text to delimit paragraphs and then the rest 
is easy.

- Mark

public class SpanWithinQuery extends SpanQuery {
 
    private SpanQuery include;
    private SpanQuery exclude;
    private int proximity;

    /** Construct a SpanWithinQuery matching spans from 
<code>include</code> which
     *  overlap with spans from <code>exclude</code> up to 
<code>proximity</code> times.*/
    public SpanWithinQuery(SpanQuery include, SpanQuery exclude, int 
proximity) {
        this.include = include;
        this.exclude = exclude;
        this.proximity = proximity;

        if (!include.getField().equals(exclude.getField())) {
            throw new IllegalArgumentException("Clauses must have same 
field.");
        }
    }

    /** Return the SpanQuery whose matches are filtered. */
    public SpanQuery getInclude() {
        return include;
    }

    /** Return the SpanQuery whose matches must not overlap those 
returned. */
    public SpanQuery getExclude() {
        return exclude;
    }

    public String getField() {
        return include.getField();
    }

    /** Returns a collection of all terms matched by this query.
     * @deprecated use extractTerms instead
     * @see #extractTerms(Set)
     */
    public Collection getTerms() {
        return include.getTerms();
    }

    public void extractTerms(Set terms) {
        include.extractTerms(terms);
    }

    public String toString(String field) {
        StringBuffer buffer = new StringBuffer();
        buffer.append("spanWithin(");
        buffer.append(include.toString(field));
        buffer.append(", ");
        buffer.append(proximity + " ,");
        buffer.append(exclude.toString(field));
        buffer.append(")");
        buffer.append(ToStringUtils.boost(getBoost()));

        return buffer.toString();
    }

    public Spans getSpans(final IndexReader reader) throws IOException {
        return new Spans() {
                private Spans includeSpans = include.getSpans(reader);
                private boolean moreInclude = true;
                private Spans excludeSpans = exclude.getSpans(reader);
                private boolean moreExclude = true;

                public boolean next() throws IOException {
                    if (moreInclude) { // move to next include
                        moreInclude = includeSpans.next();
                    }

                    while (moreInclude && moreExclude) {
                        if (includeSpans.doc() > excludeSpans.doc()) { 
// skip exclude
                            moreExclude = 
excludeSpans.skipTo(includeSpans.doc());
                        }

                        int count = 0;

                        while (moreExclude // while exclude is before
                                 &&(includeSpans.doc() == 
excludeSpans.doc())) {
                            if ((!(excludeSpans.end() <= 
includeSpans.start()))) {
                                count += 1;

                                if (count > proximity) {
                                    break;
                                }
                            }

                            moreExclude = excludeSpans.next(); // 
increment exclude
                        }

                        if (!moreExclude // if no intersection
                                 ||(includeSpans.doc() != 
excludeSpans.doc()) ||
                                (includeSpans.end() <= 
excludeSpans.start())) {
                            break; // we found a match
                        }

                        moreInclude = includeSpans.next(); // 
intersected: keep scanning
                    }

                    return moreInclude;
                }

                public boolean skipTo(int target) throws IOException {
                    if (moreInclude) { // skip include
                        moreInclude = includeSpans.skipTo(target);
                    }

                    if (!moreInclude) {
                        return false;
                    }

                    if (moreExclude // skip exclude
                             &&(includeSpans.doc() > excludeSpans.doc())) {
                        moreExclude = 
excludeSpans.skipTo(includeSpans.doc());
                    }

                    int count = 0;

                    while (moreExclude // while exclude is before
                             &&(includeSpans.doc() == excludeSpans.doc())) {
                        if ((!(excludeSpans.end() <= 
includeSpans.start()))) {
                            count += 1;

                            if (count > proximity) {
                                break;
                            }
                        }

                        moreExclude = excludeSpans.next(); // increment 
exclude
                    }

                    if (!moreExclude // if no intersection
                             ||(includeSpans.doc() != excludeSpans.doc()) ||
                            (includeSpans.end() <= excludeSpans.start())) {
                        return true; // we found a match
                    }

                    boolean returnboolean = next();

                    return returnboolean; // scan to next match
                }

                public int doc() {
                    return includeSpans.doc();
                }

                public int start() {
                    return includeSpans.start();
                }

                public int end() {
                    return includeSpans.end();
                }

                public String toString() {
                    return "spans(" + SpanWithinQuery.this.toString() + ")";
                }
            };
    }

    public Query rewrite(IndexReader reader) throws IOException {
        SpanWithinQuery clone = null;

        SpanQuery rewrittenInclude = (SpanQuery) include.rewrite(reader);

        if (rewrittenInclude != include) {
            clone = (SpanWithinQuery) this.clone();
            clone.include = rewrittenInclude;
        }

        SpanQuery rewrittenExclude = (SpanQuery) exclude.rewrite(reader);

        if (rewrittenExclude != exclude) {
            if (clone == null) {
                clone = (SpanWithinQuery) this.clone();
            }

            clone.exclude = rewrittenExclude;
        }

        if (clone != null) {
            return clone; // some clauses rewrote
        } else {
            return this; // no clauses rewrote
        }
    }

    /** Returns true iff <code>o</code> is equal to this. */
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }

        if (!(o instanceof SpanWithinQuery)) {
            return false;
        }

        SpanWithinQuery other = (SpanWithinQuery) o;

        return this.include.equals(other.include) &&
        this.exclude.equals(other.exclude) &&
        (this.getBoost() == other.getBoost()) && (proximity == 
other.proximity);
    }

    public int hashCode() {
        int h = include.hashCode();
        h = (h << 1) | (h >>> 31); // rotate left
        h ^= exclude.hashCode();
        h = (h << 1) | (h >>> 31); // rotate left
        h ^= Float.floatToRawIntBits(getBoost());
        h ^= proximity;

        return h;
    }
}


---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message