jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mreut...@apache.org
Subject svn commit: r523251 - in /jackrabbit/trunk/jackrabbit-core/src/main: java/org/apache/jackrabbit/core/query/lucene/ java/org/apache/jackrabbit/core/query/sql/ java/org/apache/jackrabbit/core/query/xpath/ javacc/sql/
Date Wed, 28 Mar 2007 09:45:46 GMT
Author: mreutegg
Date: Wed Mar 28 02:45:44 2007
New Revision: 523251

URL: http://svn.apache.org/viewvc?view=rev&rev=523251
Log:
JCR-820: Add support for query result highlighting
- initial version

Added:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java   (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultXMLExcerpt.java   (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ExcerptProvider.java   (with props)
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SimpleExcerptProvider.java   (with props)
Modified:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ChildAxisQuery.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DescendantSelfAxisQuery.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LazyQueryResultImpl.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ParentAxisQuery.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/RowIteratorImpl.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/sql/DefaultParserVisitor.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/sql/JCRSQLQueryBuilder.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/xpath/XPathQueryBuilder.java
    jackrabbit/trunk/jackrabbit-core/src/main/javacc/sql/JCRSQL.jjt

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java?view=diff&rev=523251&r1=523250&r2=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java Wed Mar 28 02:45:44 2007
@@ -221,8 +221,8 @@
         }
         if (indexWriter == null) {
             indexWriter = new IndexWriter(getDirectory(), analyzer, false);
-			// since lucene 2.0 setMaxBuffereDocs is equivalent to previous minMergeDocs attribute
-			indexWriter.setMaxBufferedDocs(minMergeDocs);
+            // since lucene 2.0 setMaxBuffereDocs is equivalent to previous minMergeDocs attribute
+            indexWriter.setMaxBufferedDocs(minMergeDocs);
             indexWriter.setMaxMergeDocs(maxMergeDocs);
             indexWriter.setMergeFactor(mergeFactor);
             indexWriter.setMaxFieldLength(maxFieldLength);
@@ -335,26 +335,9 @@
             for (Enumeration fields = doc.fields(); fields.hasMoreElements(); ) {
                 Field f = (Field) fields.nextElement();
                 Field field = null;
-                Field.TermVector tv;
-                if (f.isTermVectorStored()) {
-                    tv = Field.TermVector.YES;
-                } else {
-                    tv = Field.TermVector.NO;
-                }
-                Field.Store stored;
-                if (f.isStored()) {
-                    stored = Field.Store.YES;
-                } else {
-                    stored = Field.Store.NO;
-                }
-                Field.Index indexed;
-                if (!f.isIndexed()) {
-                    indexed = Field.Index.NO;
-                } else if (f.isTokenized()) {
-                    indexed = Field.Index.TOKENIZED;
-                } else {
-                    indexed = Field.Index.UN_TOKENIZED;
-                }
+                Field.TermVector tv = getTermVectorParameter(f);
+                Field.Store stored = getStoreParameter(f);
+                Field.Index indexed = getIndexParameter(f);
                 if (f.readerValue() != null) {
                     // replace all readers with empty string reader
                     field = new Field(f.name(), new StringReader(""), tv);
@@ -365,6 +348,7 @@
                     field = new Field(f.name(), f.binaryValue(), stored);
                 }
                 if (field != null) {
+                    field.setOmitNorms(f.getOmitNorms());
                     copy.add(field);
                 }
             }
@@ -399,8 +383,8 @@
     void setMinMergeDocs(int minMergeDocs) {
         this.minMergeDocs = minMergeDocs;
         if (indexWriter != null) {
-			// since lucene 2.0 setMaxBuffereDocs is equivalent to previous minMergeDocs attribute
-			indexWriter.setMaxBufferedDocs(minMergeDocs);
+            // since lucene 2.0 setMaxBuffereDocs is equivalent to previous minMergeDocs attribute
+            indexWriter.setMaxBufferedDocs(minMergeDocs);
         }
     }
 
@@ -431,6 +415,60 @@
         this.maxFieldLength = maxFieldLength;
         if (indexWriter != null) {
             indexWriter.setMaxFieldLength(maxFieldLength);
+        }
+    }
+
+    //------------------------------< internal >--------------------------------
+
+    /**
+     * Returns the index parameter set on <code>f</code>.
+     *
+     * @param f a lucene field.
+     * @return the index parameter on <code>f</code>.
+     */
+    private Field.Index getIndexParameter(Field f) {
+        if (!f.isIndexed()) {
+            return Field.Index.NO;
+        } else if (f.isTokenized()) {
+            return Field.Index.TOKENIZED;
+        } else {
+            return Field.Index.UN_TOKENIZED;
+        }
+    }
+
+    /**
+     * Returns the store parameter set on <code>f</code>.
+     *
+     * @param f a lucene field.
+     * @return the store parameter on <code>f</code>.
+     */
+    private Field.Store getStoreParameter(Field f) {
+        if (f.isCompressed()) {
+            return Field.Store.COMPRESS;
+        } else if (f.isStored()) {
+            return Field.Store.YES;
+        } else {
+            return Field.Store.NO;
+        }
+    }
+
+    /**
+     * Returns the term vector parameter set on <code>f</code>.
+     *
+     * @param f a lucene field.
+     * @return the term vector parameter on <code>f</code>.
+     */
+    private Field.TermVector getTermVectorParameter(Field f) {
+        if (f.isStorePositionWithTermVector() && f.isStoreOffsetWithTermVector()) {
+            return Field.TermVector.WITH_POSITIONS_OFFSETS;
+        } else if (f.isStorePositionWithTermVector()) {
+            return Field.TermVector.WITH_POSITIONS;
+        } else if (f.isStoreOffsetWithTermVector()) {
+            return Field.TermVector.WITH_OFFSETS;
+        } else if (f.isTermVectorStored()) {
+            return Field.TermVector.YES;
+        } else {
+            return Field.TermVector.NO;
         }
     }
 

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ChildAxisQuery.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ChildAxisQuery.java?view=diff&rev=523251&r1=523250&r2=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ChildAxisQuery.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ChildAxisQuery.java Wed Mar 28 02:45:44 2007
@@ -40,6 +40,7 @@
 import java.util.BitSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Set;
 
 /**
  * Implements a lucene <code>Query</code> which returns the child nodes of the
@@ -119,6 +120,13 @@
      */
     protected Weight createWeight(Searcher searcher) {
         return new ChildAxisWeight(searcher);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public void extractTerms(Set terms) {
+        contextQuery.extractTerms(terms);
     }
 
     /**

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java?view=auto&rev=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java Wed Mar 28 02:45:44 2007
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.TermPositionVector;
+import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+
+/**
+ * This is an adapted version of the <code>FulltextHighlighter</code> posted in
+ * issue: <a href="http://issues.apache.org/jira/browse/LUCENE-644">LUCENE-644</a>.
+ * <p/>
+ * Important: for this highlighter to function properly, field must be stored
+ * with token offsets.<br/> Use Field constructor {@link
+ * Field#Field(String,String,Field.Store,Field.Index,Field.TermVector)
+ * Field(String, String, Field.Store, Field.Index, Field.TermVector)} where the
+ * last argument is either {@link Field.TermVector#WITH_POSITIONS_OFFSETS} or
+ * {@link org.apache.lucene.document.Field.TermVector#WITH_OFFSETS}
+ *
+ * @see org.apache.lucene.index.TermPositionVector
+ * @see org.apache.lucene.index.TermFreqVector
+ */
+class DefaultHighlighter {
+
+    /**
+     * A default value of <tt>3</tt>
+     */
+    public static final int DEFAULT_MAXFRAGMENTS = 3;
+
+    /**
+     * A default value of <tt>80</tt>
+     */
+    public static final int DEFAULT_SURROUND = 80;
+
+    public static final String START_EXCERPT = "<excerpt>";
+
+    public static final String END_EXCERPT = "</excerpt>";
+
+    public static final String START_FRAGMENT_SEPARATOR = "<fragment>";
+
+    public static final String END_FRAGMENT_SEPARATOR = "</fragment>";
+
+    public static final String EMPTY_EXCERPT = "<excerpt/>";
+
+    private DefaultHighlighter() {
+    }
+
+    /**
+     * @param tvec    the term position vector for this hit
+     * @param query   the {@link Query query object} which must already have
+     *                been rewritten into primitive types
+     * @param field   the field name
+     * @param text    the original text that was used to create the tokens.
+     * @param prepend the string used to prepend a highlighted token, for
+     *                example <tt>&quot;&lt;b&gt;&quot;</tt>
+     * @param append  the string used to append a highlighted token, for example
+     *                <tt>&quot;&lt;/b&gt;&quot;</tt>
+     * @return a String with text fragments where tokens from the query are
+     *         highlighted
+     */
+    public static String highlight(TermPositionVector tvec,
+                                   Query query,
+                                   String field,
+                                   String text,
+                                   String prepend,
+                                   String append)
+            throws IOException {
+        return highlight(tvec, query, field, text, prepend, append,
+                DEFAULT_MAXFRAGMENTS, DEFAULT_SURROUND);
+    }
+
+    /**
+     * @param tvec         the term position vector for this hit
+     * @param query        the {@link Query query object} which must already
+     *                     have been rewritten into primitive types
+     * @param field        the field name
+     * @param text         the original text that was used to create the tokens.
+     * @param prepend      the string used to prepend a highlighted token, for
+     *                     example <tt>&quot;&lt;b&gt;&quot;</tt>
+     * @param append       the string used to append a highlighted token, for
+     *                     example <tt>&quot;&lt;/b&gt;&quot;</tt>
+     * @param maxFragments the maximum number of fragments
+     * @param surround     the maximum number of chars surrounding a highlighted
+     *                     token
+     * @return a String with text fragments where tokens from the query are
+     *         highlighted
+     */
+    public static String highlight(TermPositionVector tvec,
+                                   Query query,
+                                   String field,
+                                   String text,
+                                   String prepend,
+                                   String append,
+                                   int maxFragments,
+                                   int surround)
+            throws IOException {
+        Set extractedTerms = new HashSet();
+        query.extractTerms(extractedTerms);
+        // only keep terms for given field
+        for (Iterator it = extractedTerms.iterator(); it.hasNext(); ) {
+            if (!((Term) it.next()).field().equals(field)) {
+                it.remove();
+            }
+        }
+        String[] terms = new String[extractedTerms.size()];
+        Iterator it = extractedTerms.iterator();
+        for (int i = 0; it.hasNext(); i++) {
+            terms[i] = ((Term) it.next()).text();
+        }
+        ArrayList list = new ArrayList();
+        int[] tvecindexes = tvec.indexesOf(terms, 0, terms.length);
+        for (int i = 0; i < tvecindexes.length; i++) {
+            TermVectorOffsetInfo[] termoffsets = tvec.getOffsets(tvecindexes[i]);
+            for (int ii = 0; ii < termoffsets.length; ii++) {
+                list.add(termoffsets[ii]);
+            }
+        }
+
+        TermVectorOffsetInfo[] offsets = (TermVectorOffsetInfo[]) list.toArray(new TermVectorOffsetInfo[0]);
+        // sort offsets
+        if (terms.length > 1) {
+            java.util.Arrays.sort(offsets, new TermVectorOffsetInfoSorter());
+        }
+
+        return mergeFragments(offsets, new StringReader(text), prepend,
+                append, maxFragments, surround);
+    }
+
+    private static String mergeFragments(TermVectorOffsetInfo[] offsets,
+                                         StringReader reader,
+                                         String prefix,
+                                         String suffix,
+                                         int maxFragments,
+                                         int surround)
+            throws IOException {
+        if (offsets == null || offsets.length == 0) {
+            // nothing to highlight
+            return EMPTY_EXCERPT;
+        }
+        int lastOffset = offsets.length; // Math.min(10, offsets.length); // 10 terms is plenty?
+        ArrayList fragmentInfoList = new ArrayList();
+        FragmentInfo fi = new FragmentInfo(offsets[0], surround * 2);
+        for (int i = 1; i < lastOffset; i++) {
+            if (fi.add(offsets[i])) {
+                continue;
+            }
+            fragmentInfoList.add(fi);
+            fi = new FragmentInfo(offsets[i], surround * 2);
+        }
+        fragmentInfoList.add(fi);
+
+        // sort with score
+        java.util.Collections.sort(fragmentInfoList, new FragmentInfoScoreSorter());
+
+        // extract best fragments
+        ArrayList bestFragmentsList = new ArrayList();
+        for (int i = 0; i < Math.min(fragmentInfoList.size(), maxFragments); i++) {
+            bestFragmentsList.add(fragmentInfoList.get(i));
+        }
+
+        // re-sort with positions
+        java.util.Collections.sort(bestFragmentsList, new FragmentInfoPositionSorter());
+
+        // merge #maxFragments fragments
+        StringBuffer sb = new StringBuffer(START_EXCERPT);
+        int pos = 0;
+        char[] cbuf;
+        int skip;
+        int nextStart;
+        int skippedChars;
+        for (int i = 0; i < bestFragmentsList.size(); i++) {
+            fi = (FragmentInfo) bestFragmentsList.get(i);
+            nextStart = fi.getStartOffset();
+            skip = nextStart - pos;
+            if (skip > surround * 2) {
+                skip -= surround;
+                if (i > 0) {
+                    // end last fragment
+                    cbuf = new char[surround];
+                    reader.read(cbuf, 0, surround);
+                    // find last whitespace
+                    skippedChars = 1;
+                    for (; skippedChars < surround + 1; skippedChars++) {
+                        if (Character.isWhitespace(cbuf[surround - skippedChars])) {
+                            break;
+                        }
+                    }
+                    pos += surround;
+                    if (skippedChars > surround) {
+                        skippedChars = surround;
+                    }
+                    sb.append(cbuf, 0, surround - skippedChars);
+                    sb.append(END_FRAGMENT_SEPARATOR);
+                }
+            }
+
+            if (skip >= surround) {
+                if (i > 0) {
+                    skip -= surround;
+                }
+                // skip
+                reader.skip((long) skip);
+                pos += skip;
+            }
+            // start fragment
+            skippedChars = 0;
+            cbuf = new char[nextStart - pos];
+            reader.read(cbuf, 0, nextStart - pos);
+            pos += (nextStart - pos);
+            sb.append(START_FRAGMENT_SEPARATOR);
+            // find first whitespace
+            for (; skippedChars < cbuf.length; skippedChars++) {
+                if (Character.isWhitespace(cbuf[skippedChars])) {
+                    skippedChars += 1;
+                    break;
+                }
+            }
+
+            sb.append(cbuf, skippedChars, cbuf.length - skippedChars);
+
+            // iterate terms
+            for (Iterator iter = fi.iterator(); iter.hasNext();) {
+                TermVectorOffsetInfo ti = (TermVectorOffsetInfo) iter.next();
+                nextStart = ti.getStartOffset();
+                if (nextStart - pos > 0) {
+                    cbuf = new char[nextStart - pos];
+                    int charsRead = reader.read(cbuf, 0, nextStart - pos);
+                    pos += (nextStart - pos);
+                    sb.append(cbuf, 0, charsRead);
+                }
+                sb.append(prefix);
+                nextStart = ti.getEndOffset();
+                // print term
+                cbuf = new char[nextStart - pos];
+                reader.read(cbuf, 0, nextStart - pos);
+                pos += (nextStart - pos);
+                sb.append(cbuf);
+                sb.append(suffix);
+            }
+        }
+        if (pos != 0) {
+            // end fragment
+            if (offsets.length > lastOffset) {
+                surround = Math.min(offsets[lastOffset].getStartOffset() - pos, surround);
+            }
+            cbuf = new char[surround];
+            skip = reader.read(cbuf, 0, surround);
+            boolean EOF = reader.read() == -1;
+            if (skip >= 0) {
+                if (!EOF) {
+                    skippedChars = 1;
+                    for (; skippedChars < surround + 1; skippedChars++) {
+                        if (Character.isWhitespace(cbuf[surround - skippedChars])) {
+                            break;
+                        }
+                    }
+                    if (skippedChars > surround) {
+                        skippedChars = surround;
+                    }
+                } else {
+                    skippedChars = 0;
+                }
+                sb.append(cbuf, 0, EOF ? skip : (surround - skippedChars));
+                sb.append(END_FRAGMENT_SEPARATOR);
+            }
+        }
+        sb.append(END_EXCERPT);
+        return sb.toString();
+    }
+
+    private static class FragmentInfo {
+        ArrayList offsetInfosList;
+        int startOffset;
+        int endOffset;
+        int mergeGap;
+        int numTerms;
+
+        public FragmentInfo(TermVectorOffsetInfo offsetinfo, int mergeGap) {
+            offsetInfosList = new ArrayList();
+            offsetInfosList.add(offsetinfo);
+            startOffset = offsetinfo.getStartOffset();
+            endOffset = offsetinfo.getEndOffset();
+            this.mergeGap = mergeGap;
+            numTerms = 1;
+        }
+
+        public boolean add(TermVectorOffsetInfo offsetinfo) {
+            if (offsetinfo.getStartOffset() > (endOffset + mergeGap)) {
+                return false;
+            }
+            offsetInfosList.add(offsetinfo);
+            numTerms++;
+            endOffset = offsetinfo.getEndOffset();
+            return true;
+        }
+
+        public Iterator iterator() {
+            return offsetInfosList.iterator();
+        }
+
+        public int getStartOffset() {
+            return startOffset;
+        }
+
+        public int getEndOffset() {
+            return endOffset;
+        }
+
+        public int numTerms() {
+            return numTerms;
+        }
+    }
+
+    private static class FragmentInfoScoreSorter
+            implements java.util.Comparator {
+        public int compare(Object o1, Object o2) {
+            int s1 = ((FragmentInfo) o1).numTerms();
+            int s2 = ((FragmentInfo) o2).numTerms();
+            if (s1 == s2) {
+                return ((FragmentInfo) o1).getStartOffset() < ((FragmentInfo) o2).getStartOffset() ? -1 : 1;
+            }
+            return s1 > s2 ? -1 : 1;
+        }
+
+        public boolean equals(Object obj) {
+            return false;
+        }
+    }
+
+    private static class FragmentInfoPositionSorter
+            implements java.util.Comparator {
+        public int compare(Object o1, Object o2) {
+            int s1 = ((FragmentInfo) o1).getStartOffset();
+            int s2 = ((FragmentInfo) o2).getStartOffset();
+            if (s1 == s2) {
+                return 0;
+            }
+            return s1 < s2 ? -1 : 1;
+        }
+
+        public boolean equals(Object obj) {
+            return false;
+        }
+    }
+
+    private static class TermVectorOffsetInfoSorter
+            implements java.util.Comparator {
+        public int compare(Object o1, Object o2) {
+            int s1 = ((TermVectorOffsetInfo) o1).getStartOffset();
+            int s2 = ((TermVectorOffsetInfo) o2).getStartOffset();
+            if (s1 == s2) {
+                return 0;
+            }
+            return s1 < s2 ? -1 : 1;
+        }
+
+        public boolean equals(Object obj) {
+            return false;
+        }
+    }
+
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultXMLExcerpt.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultXMLExcerpt.java?view=auto&rev=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultXMLExcerpt.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultXMLExcerpt.java Wed Mar 28 02:45:44 2007
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import org.apache.jackrabbit.core.NodeId;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.TermFreqVector;
+import org.apache.lucene.index.TermPositionVector;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+/**
+ * <code>DefaultXMLExcerpt</code> implements an ExcerptProvider.
+ */
+class DefaultXMLExcerpt implements ExcerptProvider {
+
+    /**
+     * Logger instance for this class.
+     */
+    private static final Logger log = LoggerFactory.getLogger(DefaultXMLExcerpt.class);
+
+    /**
+     * The search index.
+     */
+    private SearchIndex index;
+
+    /**
+     * The current query.
+     */
+    private Query query;
+
+    /**
+     * {@inheritDoc}
+     */
+    public void init(Query query, SearchIndex index) throws IOException {
+        this.index = index;
+        this.query = query;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public String getExcerpt(NodeId id, int maxFragments, int maxFragmentSize)
+            throws IOException {
+        IndexReader reader = index.getIndexReader();
+        try {
+            Term idTerm = new Term(FieldNames.UUID, id.getUUID().toString());
+            TermDocs tDocs = reader.termDocs(idTerm);
+            int docNumber;
+            Document doc;
+            try {
+                if (tDocs.next()) {
+                    docNumber = tDocs.doc();
+                    doc = reader.document(docNumber);
+                } else {
+                    // node not found in index
+                    return null;
+                }
+            } finally {
+                tDocs.close();
+            }
+            Field[] fields = doc.getFields(FieldNames.FULLTEXT);
+            if (fields == null) {
+                log.debug("Fulltext field not stored, using {}",
+                        SimpleExcerptProvider.class.getName());
+                SimpleExcerptProvider exProvider = new SimpleExcerptProvider();
+                exProvider.init(query, index);
+                return exProvider.getExcerpt(id, maxFragments, maxFragmentSize);
+            }
+            StringBuffer text = new StringBuffer();
+            String separator = "";
+            for (int i = 0; i < fields.length; i++) {
+                text.append(separator);
+                text.append(fields[i].stringValue());
+                // this is a hack! in general multiple fields with the same
+                // name are handled properly, that is, offset and position is
+                // calculated correctly. there is one case however where
+                // the offset gets wrong:
+                // if a term text ends with characters that are considered noise
+                // then the offset of the next field will be off by the number
+                // of noise characters.
+                // therefore we delete noise characters at the end of the text
+                for (int j = text.length() - 1; j >= 0; j--) {
+                    if (Character.isLetterOrDigit(text.charAt(j))) {
+                        break;
+                    } else {
+                        text.deleteCharAt(j);
+                    }
+                }
+                separator = " ";
+            }
+            TermFreqVector tfv = reader.getTermFreqVector(
+                    docNumber, FieldNames.FULLTEXT);
+            if (tfv instanceof TermPositionVector) {
+                return createExcerpt((TermPositionVector) tfv, text.toString(),
+                        maxFragments, maxFragmentSize);
+            } else {
+                log.debug("No TermPositionVector on Fulltext field, using {}",
+                        SimpleExcerptProvider.class.getName());
+                SimpleExcerptProvider exProvider = new SimpleExcerptProvider();
+                exProvider.init(query, index);
+                return exProvider.getExcerpt(id, maxFragments, maxFragmentSize);
+            }
+        } finally {
+            reader.close();
+        }
+    }
+
+    /**
+     * Creates an excerpt for the given <code>text</code> using token offset
+     * information provided by <code>tpv</code>.
+     *
+     * @param tpv             the term position vector for the fulltext field.
+     * @param text            the original text.
+     * @param maxFragments    the maximum number of fragments to create.
+     * @param maxFragmentSize the maximum number of characters in a fragment.
+     * @return the xml excerpt.
+     * @throws IOException if an error occurs while creating the excerpt.
+     */
+    private String createExcerpt(TermPositionVector tpv,
+                                 String text,
+                                 int maxFragments,
+                                 int maxFragmentSize)
+            throws IOException {
+        return DefaultHighlighter.highlight(tpv, query, FieldNames.FULLTEXT,
+                text, "<highlight>", "</highlight>", maxFragments, maxFragmentSize / 2);
+    }
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultXMLExcerpt.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DescendantSelfAxisQuery.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DescendantSelfAxisQuery.java?view=diff&rev=523251&r1=523250&r2=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DescendantSelfAxisQuery.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DescendantSelfAxisQuery.java Wed Mar 28 02:45:44 2007
@@ -27,6 +27,7 @@
 
 import java.io.IOException;
 import java.util.BitSet;
+import java.util.Set;
 
 /**
  * Implements a lucene <code>Query</code> which filters a sub query by checking
@@ -106,6 +107,14 @@
      */
     public String toString(String field) {
         return "DescendantSelfAxisQuery";
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public void extractTerms(Set terms) {
+        contextQuery.extractTerms(terms);
+        subQuery.extractTerms(terms);
     }
 
     //------------------------< DescendantSelfAxisWeight >--------------------------

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ExcerptProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ExcerptProvider.java?view=auto&rev=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ExcerptProvider.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ExcerptProvider.java Wed Mar 28 02:45:44 2007
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import org.apache.jackrabbit.core.NodeId;
+import org.apache.jackrabbit.name.QName;
+import org.apache.lucene.search.Query;
+
+import java.io.IOException;
+
+/**
+ * <code>ExcerptProvider</code> defines an interface to get an XML excerpt
+ * of a matching node.<br/>
+ * E.g. if you search for 'jackrabbit' and 'query' you may get the following
+ * result for a node:
+ * <pre>
+ * &lt;excerpt>
+ *     &lt;fragment>&lt;highlight>Jackrabbit&lt;/highlight> implements both the mandatory XPath and optional SQL &lt;highlight>query&lt;/highlight> syntax.&lt;/fragment>
+ *     &lt;fragment>Before parsing the XPath &lt;highlight>query&lt;/highlight> in &lt;highlight>Jackrabbit&lt;/highlight>, the statement is surrounded&lt;/fragment>
+ * &lt;/excerpt>
+ * </pre>
+ */
+public interface ExcerptProvider {
+
+    /**
+     * QName of the rep:excerpt function.
+     */
+    public final QName REP_EXCERPT = new QName(QName.NS_REP_URI, "excerpt(.)");
+
+    /**
+     * Initializes this excerpt provider.
+     *
+     * @param query excerpts will be based on this query.
+     * @param index provides access to the search index.
+     * @throws IOException if an error occurs while initializing this excerpt
+     *                     provider.
+     */
+    public void init(Query query, SearchIndex index) throws IOException;
+
+    /**
+     * Returns the XML excerpt for the node with <code>id</code>.
+     *
+     * @param id              a node id.
+     * @param maxFragments    the maximum number of fragments to create.
+     * @param maxFragmentSize the maximum number of characters in a fragment.
+     * @return the XML excerpt or <code>null</code> if there is no node with
+     *         <code>id</code>.
+     * @throws IOException if an error occurs while creating the excerpt.
+     */
+    public String getExcerpt(NodeId id, int maxFragments, int maxFragmentSize)
+            throws IOException;
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ExcerptProvider.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LazyQueryResultImpl.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LazyQueryResultImpl.java?view=diff&rev=523251&r1=523250&r2=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LazyQueryResultImpl.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LazyQueryResultImpl.java Wed Mar 28 02:45:44 2007
@@ -120,6 +120,11 @@
     private final boolean docOrder;
 
     /**
+     * The excerpt provider or <code>null</code> if none was created yet.
+     */
+    private ExcerptProvider excerptProvider;
+
+    /**
      * Creates a new query result.
      *
      * @param index         the search index where the query is executed.
@@ -190,7 +195,15 @@
      * {@inheritDoc}
      */
     public RowIterator getRows() throws RepositoryException {
-        return new RowIteratorImpl(getNodeIterator(), selectProps, resolver);
+        if (excerptProvider == null) {
+            try {
+                excerptProvider = index.createExcerptProvider(query);
+            } catch (IOException e) {
+                throw new RepositoryException(e);
+            }
+        }
+        return new RowIteratorImpl(getNodeIterator(),
+                selectProps, resolver, excerptProvider);
     }
 
     /**

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java?view=diff&rev=523251&r1=523250&r2=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java Wed Mar 28 02:45:44 2007
@@ -42,6 +42,7 @@
 
 import java.io.InputStream;
 import java.io.Reader;
+import java.io.IOException;
 import java.util.Calendar;
 import java.util.Iterator;
 import java.util.Set;
@@ -78,6 +79,12 @@
     protected final TextExtractor extractor;
 
     /**
+     * If set to <code>true</code> the fulltext field is stored and and a term
+     * vector is created with offset information.
+     */
+    protected boolean supportHighlighting = false;
+
+    /**
      * Creates a new node indexer.
      *
      * @param node          the node state to index.
@@ -104,6 +111,16 @@
     }
 
     /**
+     * If set to <code>true</code> additional information is stored in the index
+     * to support highlighting using the rep:excerpt pseudo property.
+     *
+     * @param b <code>true</code> to enable highlighting support.
+     */
+    public void setSupportHighlighting(boolean b) {
+        supportHighlighting = b;
+    }
+
+    /**
      * Creates a lucene Document.
      *
      * @return the lucene Document with the index layout.
@@ -276,7 +293,7 @@
                 InputStream stream =
                         ((BLOBFileValue) internalValue).getStream();
                 Reader reader = extractor.extractText(stream, type, encoding);
-                doc.add(new Field(FieldNames.FULLTEXT, reader));
+                doc.add(createFulltextField(reader));
             }
         } catch (Exception e) {
             // TODO: How to recover from a transient indexing failure?
@@ -458,11 +475,7 @@
                 Field.TermVector.NO));
         if (tokenized) {
             // also create fulltext index of this value
-            doc.add(new Field(FieldNames.FULLTEXT,
-                    stringValue,
-                    Field.Store.NO,
-                    Field.Index.TOKENIZED,
-                    Field.TermVector.NO));
+            doc.add(createFulltextField(stringValue));
             // create fulltext index on property
             int idx = fieldName.indexOf(':');
             fieldName = fieldName.substring(0, idx + 1)
@@ -498,5 +511,61 @@
                 Field.Store.NO,
                 Field.Index.UN_TOKENIZED,
                 Field.TermVector.NO));
+    }
+
+    /**
+     * Creates a fulltext field for the string <code>value</code>.
+     *
+     * @param value the string value.
+     * @return a lucene field.
+     */
+    protected Field createFulltextField(String value) {
+        if (supportHighlighting) {
+            // store field compressed if greater than 16k
+            Field.Store stored;
+            if (value.length() > 0x4000) {
+                stored = Field.Store.COMPRESS;
+            } else {
+                stored = Field.Store.YES;
+            }
+            return new Field(FieldNames.FULLTEXT, value, stored,
+                    Field.Index.TOKENIZED, Field.TermVector.WITH_OFFSETS);
+        } else {
+            return new Field(FieldNames.FULLTEXT, value,
+                    Field.Store.NO, Field.Index.TOKENIZED);
+        }
+    }
+
+    /**
+     * Creates a fulltext field for the reader <code>value</code>.
+     *
+     * @param value the reader value.
+     * @return a lucene field.
+     */
+    protected Field createFulltextField(Reader value) {
+        if (supportHighlighting) {
+            // need to create a string value
+            StringBuffer textExtract = new StringBuffer();
+            char[] buffer = new char[1024];
+            int len;
+            try {
+                while ((len = value.read(buffer)) > -1) {
+                    textExtract.append(buffer, 0, len);
+                }
+            } catch (IOException e) {
+                log.warn("Exception reading value for fulltext field: " +
+                        e.getMessage());
+                log.debug("Dump:", e);
+            } finally {
+                try {
+                    value.close();
+                } catch (IOException e) {
+                    // ignore
+                }
+            }
+            return createFulltextField(textExtract.toString());
+        } else {
+            return new Field(FieldNames.FULLTEXT, value);
+        }
     }
 }

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ParentAxisQuery.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ParentAxisQuery.java?view=diff&rev=523251&r1=523250&r2=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ParentAxisQuery.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ParentAxisQuery.java Wed Mar 28 02:45:44 2007
@@ -31,6 +31,7 @@
 import java.util.BitSet;
 import java.util.Map;
 import java.util.HashMap;
+import java.util.Set;
 
 /**
  * <code>ParentAxisQuery</code> selects the parent nodes of a context query.
@@ -79,6 +80,13 @@
      */
     protected Weight createWeight(Searcher searcher) {
         return new ParentAxisWeight(searcher);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public void extractTerms(Set terms) {
+        contextQuery.extractTerms(terms);
     }
 
     /**

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/RowIteratorImpl.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/RowIteratorImpl.java?view=diff&rev=523251&r1=523250&r2=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/RowIteratorImpl.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/RowIteratorImpl.java Wed Mar 28 02:45:44 2007
@@ -25,6 +25,8 @@
 import org.apache.jackrabbit.value.LongValue;
 import org.apache.jackrabbit.value.PathValue;
 import org.apache.jackrabbit.value.StringValue;
+import org.slf4j.LoggerFactory;
+import org.slf4j.Logger;
 
 import javax.jcr.ItemNotFoundException;
 import javax.jcr.Property;
@@ -37,6 +39,7 @@
 import java.util.HashSet;
 import java.util.NoSuchElementException;
 import java.util.Set;
+import java.io.IOException;
 
 /**
  * Implements the {@link javax.jcr.query.RowIterator} interface returned by
@@ -45,6 +48,11 @@
 class RowIteratorImpl implements RowIterator {
 
     /**
+     * The logger instance for this class.
+     */
+    private static final Logger log = LoggerFactory.getLogger(RowIteratorImpl.class);
+
+    /**
      * Iterator over nodes, that constitute the result set.
      */
     private final ScoreNodeIterator nodes;
@@ -60,6 +68,11 @@
     private final NamespaceResolver resolver;
 
     /**
+     * The excerpt provider.
+     */
+    private final ExcerptProvider excerptProvider;
+
+    /**
      * Creates a new <code>RowIteratorImpl</code> that iterates over the result
      * nodes.
      *
@@ -70,11 +83,31 @@
      *                   <code>Session</code>.
      */
     RowIteratorImpl(ScoreNodeIterator nodes, QName[] properties, NamespaceResolver resolver) {
+        this(nodes, properties, resolver, null);
+    }
+
+    /**
+     * Creates a new <code>RowIteratorImpl</code> that iterates over the result
+     * nodes.
+     *
+     * @param nodes      a <code>ScoreNodeIterator</code> that contains the
+     *                   nodes of the query result.
+     * @param properties <code>QName</code> of the select properties.
+     * @param resolver   <code>NamespaceResolver</code> of the user
+     *                   <code>Session</code>.
+     * @param exProvider the excerpt provider associated with the query result
+     *                   that created this row iterator.
+     */
+    RowIteratorImpl(ScoreNodeIterator nodes,
+                    QName[] properties,
+                    NamespaceResolver resolver,
+                    ExcerptProvider exProvider) {
         this.nodes = nodes;
         this.properties = properties;
         this.resolver = resolver;
+        this.excerptProvider = exProvider;
     }
-
+    
     /**
      * Returns the next <code>Row</code> in the iteration.
      *
@@ -213,11 +246,13 @@
                             tmp[i] = null;
                         }
                     } else {
-                        // property not set or jcr:path / jcr:score
+                        // property not set or jcr:path / jcr:score / jcr:highlight
                         if (QName.JCR_PATH.equals(properties[i])) {
                             tmp[i] = PathValue.valueOf(node.getPath());
                         } else if (QName.JCR_SCORE.equals(properties[i])) {
-                            tmp[i] = new LongValue((int) (score * 1000f));
+                            tmp[i] = new LongValue(Math.round(score * 1000f));
+                        } else if (isExcerptFunction(properties[i])) {
+                            tmp[i] = getExcerpt();
                         } else {
                             tmp[i] = null;
                         }
@@ -267,13 +302,50 @@
                     if (QName.JCR_PATH.equals(prop)) {
                         return PathValue.valueOf(node.getPath());
                     } else if (QName.JCR_SCORE.equals(prop)) {
-                        return new LongValue((int) (score * 1000f));
+                        return new LongValue(Math.round(score * 1000f));
+                    } else if (isExcerptFunction(prop)) {
+                        return getExcerpt();
                     } else {
                         return null;
                     }
                 }
             } catch (NameException e) {
                 throw new RepositoryException(e.getMessage(), e);
+            }
+        }
+
+        /**
+         * @param name a QName.
+         * @return <code>true</code> if <code>name</code> is the rep:excerpt
+         *         function, <code>false</code> otherwise.
+         */
+        private boolean isExcerptFunction(QName name) {
+            return name.getNamespaceURI().equals(QName.NS_REP_URI) &&
+                    name.getLocalName().startsWith("excerpt(");
+        }
+
+        /**
+         * Returns an excerpt for the node associated with this row.
+         *
+         * @return a StringValue or <code>null</code> if the excerpt cannot be
+         *         created or an error occurs.
+         */
+        private Value getExcerpt() {
+            if (excerptProvider == null) {
+                return null;
+            }
+            try {
+                long time = System.currentTimeMillis();
+                String excerpt = excerptProvider.getExcerpt(node.getNodeId(), 3, 150);
+                time = System.currentTimeMillis() - time;
+                log.debug("Created excerpt in {} ms.", new Long(time));
+                if (excerpt != null) {
+                    return new StringValue(excerpt);
+                } else {
+                    return null;
+                }
+            } catch (IOException e) {
+                return null;
             }
         }
     }

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java?view=diff&rev=523251&r1=523250&r2=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java Wed Mar 28 02:45:44 2007
@@ -43,6 +43,7 @@
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.commons.collections.iterators.AbstractIteratorDecorator;
 
 import javax.jcr.RepositoryException;
@@ -224,6 +225,19 @@
     private int resultFetchSize = Integer.MAX_VALUE;
 
     /**
+     * If set to <code>true</code> the fulltext field is stored and and a term
+     * vector is created with offset information.
+     * <p/>
+     * Default value is: <code>false</code>.
+     */
+    private boolean supportHighlighting = false;
+
+    /**
+     * The excerpt provider class. Implements {@link ExcerptProvider}.
+     */
+    private Class excerptProviderClass = DefaultXMLExcerpt.class;
+
+    /**
      * Indicates if this <code>SearchIndex</code> is closed and cannot be used
      * anymore.
      */
@@ -418,22 +432,9 @@
                                   QName[] orderProps,
                                   boolean[] orderSpecs) throws IOException {
         checkOpen();
-        QueryHandler parentHandler = getContext().getParentHandler();
-        IndexReader parentReader = null;
-        if (parentHandler instanceof SearchIndex) {
-            parentReader = ((SearchIndex) parentHandler).index.getIndexReader();
-        }
-
         SortField[] sortFields = createSortFields(orderProps, orderSpecs);
 
-        IndexReader reader = index.getIndexReader();
-        if (parentReader != null) {
-            // todo FIXME not type safe
-            CachingMultiReader[] readers = {(CachingMultiReader) reader,
-                                            (CachingMultiReader) parentReader};
-            reader = new CombinedIndexReader(readers);
-        }
-
+        IndexReader reader = getIndexReader();
         IndexSearcher searcher = new IndexSearcher(reader);
         Hits hits;
         if (sortFields.length > 0) {
@@ -445,6 +446,27 @@
     }
 
     /**
+     * Creates an excerpt provider for the given <code>query</code>.
+     *
+     * @param query the query.
+     * @return an excerpt provider for the given <code>query</code>.
+     * @throws IOException if the provider cannot be created.
+     */
+    public ExcerptProvider createExcerptProvider(Query query)
+            throws IOException {
+        ExcerptProvider ep;
+        try {
+            ep = (ExcerptProvider) excerptProviderClass.newInstance();
+        } catch (Exception e) {
+            IOException ex = new IOException();
+            ex.initCause(e);
+            throw ex;
+        }
+        ep.init(query, this);
+        return ep;
+    }
+
+    /**
      * Returns the analyzer in use for indexing.
      * @return the analyzer in use for indexing.
      */
@@ -470,6 +492,31 @@
     }
 
     /**
+     * Returns an index reader for this search index. The caller of this method
+     * is responsible for closing the index reader when he is finished using
+     * it.
+     *
+     * @return an index reader for this search index.
+     * @throws IOException the index reader cannot be obtained.
+     */
+    public IndexReader getIndexReader() throws IOException {
+        QueryHandler parentHandler = getContext().getParentHandler();
+        IndexReader parentReader = null;
+        if (parentHandler instanceof SearchIndex) {
+            parentReader = ((SearchIndex) parentHandler).index.getIndexReader();
+        }
+
+        IndexReader reader = index.getIndexReader();
+        if (parentReader != null) {
+            // todo FIXME not type safe
+            CachingMultiReader[] readers = {(CachingMultiReader) reader,
+                                            (CachingMultiReader) parentReader};
+            reader = new CombinedIndexReader(readers);
+        }
+        return reader;
+    }
+
+    /**
      * Creates the SortFields for the order properties.
      *
      * @param orderProps the order properties.
@@ -513,8 +560,10 @@
     protected Document createDocument(NodeState node,
                                       NamespaceMappings nsMappings)
             throws RepositoryException {
-        return new NodeIndexer(node, getContext().getItemStateManager(),
-                nsMappings, extractor).createDoc();
+        NodeIndexer indexer = new NodeIndexer(node,
+                getContext().getItemStateManager(), nsMappings, extractor);
+        indexer.setSupportHighlighting(supportHighlighting);
+        return indexer.createDoc();
     }
 
     /**
@@ -934,6 +983,52 @@
         return extractorTimeout;
     }
 
+    /**
+     * If set to <code>true</code> additional information is stored in the index
+     * to support highlighting using the rep:excerpt pseudo property.
+     *
+     * @param b <code>true</code> to enable highlighting support.
+     */
+    public void setSupportHighlighting(boolean b) {
+        supportHighlighting = b;
+    }
+
+    /**
+     * @return <code>true</code> if highlighting support is enabled.
+     */
+    public boolean getSupportHighlighting() {
+        return supportHighlighting;
+    }
+
+    /**
+     * Sets the class name for the {@link ExcerptProvider} that should be used
+     * for the rep:excerpt pseudo property in a query.
+     *
+     * @param className the name of a class that implements {@link
+     *                  ExcerptProvider}.
+     */
+    public void setExcerptProviderClass(String className) {
+        try {
+            Class clazz = Class.forName(className);
+            if (ExcerptProvider.class.isAssignableFrom(clazz)) {
+                excerptProviderClass = clazz;
+            } else {
+                log.warn("Invalid value for excerptProviderClass, {} does " +
+                        "not implement ExcerptProvider interface.", className);
+            }
+        } catch (ClassNotFoundException e) {
+            log.warn("Invalid value for excerptProviderClass, class {} not " +
+                    "found.", className);
+        }
+    }
+
+    /**
+     * @return the class name of the excerpt provider implementation.
+     */
+    public String getExcerptProviderClass() {
+        return excerptProviderClass.getName();
+    }
+
     //----------------------------< internal >----------------------------------
 
     /**
@@ -945,6 +1040,6 @@
     private void checkOpen() throws IOException {
         if (closed) {
             throw new IOException("query handler closed and cannot be used anymore.");
-}
+        }
     }
 }

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SimpleExcerptProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SimpleExcerptProvider.java?view=auto&rev=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SimpleExcerptProvider.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SimpleExcerptProvider.java Wed Mar 28 02:45:44 2007
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.query.lucene;
+
+import org.apache.lucene.search.Query;
+import org.apache.jackrabbit.core.NodeId;
+import org.apache.jackrabbit.core.PropertyId;
+import org.apache.jackrabbit.core.value.InternalValue;
+import org.apache.jackrabbit.core.state.ItemStateManager;
+import org.apache.jackrabbit.core.state.NodeState;
+import org.apache.jackrabbit.core.state.PropertyState;
+import org.apache.jackrabbit.core.state.ItemStateException;
+import org.apache.jackrabbit.name.QName;
+
+import javax.jcr.PropertyType;
+import java.io.IOException;
+import java.util.Iterator;
+
+/**
+ * <code>SimpleExcerptProvider</code> is a <b>very</b> simple excerpt provider.
+ * It does not do any highlighting and simply returns up to
+ * <code>maxFragmentSize</code> characters of string properties for a given
+ * node.
+ * @see #getExcerpt(org.apache.jackrabbit.core.NodeId, int, int)
+ */
+public class SimpleExcerptProvider implements ExcerptProvider {
+
+    /**
+     * The item state manager.
+     */
+    private ItemStateManager ism;
+
+    /**
+     * {@inheritDoc}
+     */
+    public void init(Query query, SearchIndex index) throws IOException {
+        ism = index.getContext().getItemStateManager();
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    public String getExcerpt(NodeId id, int maxFragments, int maxFragmentSize)
+            throws IOException {
+        StringBuffer text = new StringBuffer();
+        try {
+            NodeState nodeState = (NodeState) ism.getItemState(id);
+            String separator = "";
+            for (Iterator it = nodeState.getPropertyNames().iterator();
+                 it.hasNext() && text.length() < maxFragmentSize; ) {
+                PropertyId propId = new PropertyId(id, (QName) it.next());
+                PropertyState propState = (PropertyState) ism.getItemState(propId);
+                if (propState.getType() == PropertyType.STRING) {
+                    text.append(separator);
+                    separator = " ... ";
+                    InternalValue[] values = propState.getValues();
+                    for (int i = 0; i < values.length; i++) {
+                        text.append(values[i].toString());
+                    }
+                }
+            }
+        } catch (ItemStateException e) {
+            // ignore
+        }
+        if (text.length() > maxFragmentSize) {
+            int lastSpace = text.lastIndexOf(" ", maxFragmentSize);
+            if (lastSpace != -1) {
+                text.setLength(lastSpace);
+            } else {
+                text.setLength(maxFragmentSize);
+            }
+            text.append(" ...");
+        }
+        return "<excerpt><fragment>" + text.toString() + "</fragment></excerpt>";
+    }
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SimpleExcerptProvider.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/sql/DefaultParserVisitor.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/sql/DefaultParserVisitor.java?view=diff&rev=523251&r1=523250&r2=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/sql/DefaultParserVisitor.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/sql/DefaultParserVisitor.java Wed Mar 28 02:45:44 2007
@@ -97,4 +97,8 @@
     public Object visit(ASTUpperFunction node, Object data) {
         return data;
     }
+
+    public Object visit(ASTExcerptFunction node, Object data) {
+        return data;
+    }
 }

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/sql/JCRSQLQueryBuilder.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/sql/JCRSQLQueryBuilder.java?view=diff&rev=523251&r1=523250&r2=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/sql/JCRSQLQueryBuilder.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/sql/JCRSQLQueryBuilder.java Wed Mar 28 02:45:44 2007
@@ -262,6 +262,11 @@
                 root.addSelectProperty(node.getName());
                 return data;
             }
+
+            public Object visit(ASTExcerptFunction node, Object data) {
+                root.addSelectProperty(new QName(QName.NS_REP_URI, "excerpt(.)"));
+                return data;
+            }
         }, root);
 
         return data;
@@ -538,6 +543,11 @@
         }
         parent.addOperand(new PropertyFunctionQueryNode(parent, PropertyFunctionQueryNode.UPPER_CASE));
         return parent;
+    }
+
+    public Object visit(ASTExcerptFunction node, Object data) {
+        // do nothing
+        return data;
     }
 
     //------------------------< internal >--------------------------------------

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/xpath/XPathQueryBuilder.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/xpath/XPathQueryBuilder.java?view=diff&rev=523251&r1=523250&r2=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/xpath/XPathQueryBuilder.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/xpath/XPathQueryBuilder.java Wed Mar 28 02:45:44 2007
@@ -928,6 +928,14 @@
                 } catch (NameException e) {
                     exceptions.add(e);
                 }
+            } else if (queryNode.getType() == QueryNode.TYPE_PATH) {
+                // use function name as name of a pseudo property in select clause
+                try {
+                    QName name = NameFormat.parse(fName + "()", resolver);
+                    root.addSelectProperty(name);
+                } catch (NameException e) {
+                    exceptions.add(e);
+                }
             } else {
                 exceptions.add(new InvalidQueryException("Unsupported function: " + fName));
             }

Modified: jackrabbit/trunk/jackrabbit-core/src/main/javacc/sql/JCRSQL.jjt
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/javacc/sql/JCRSQL.jjt?view=diff&rev=523251&r1=523250&r2=523251
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/javacc/sql/JCRSQL.jjt (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/javacc/sql/JCRSQL.jjt Wed Mar 28 02:45:44 2007
@@ -106,6 +106,7 @@
 | < ESCAPE: "ESCAPE" >
 | < SELECT: "SELECT" >
 | < BETWEEN: "BETWEEN" >
+| < EXCERPT: "EXCERPT" >
 | < CONTAINS: "CONTAINS" >
 }
 
@@ -272,7 +273,7 @@
 {}
 {
     (<ASTERISK>)
-  | (Identifier() (<PERIOD> Identifier() { Node n = jjtree.popNode(); jjtree.popNode(); jjtree.pushNode(n); } )? (<COMMA> Identifier() (<PERIOD> Identifier() { Node n = jjtree.popNode(); jjtree.popNode(); jjtree.pushNode(n); } )? )*)
+  | ((ExcerptFunction() | Identifier() (<PERIOD> Identifier() { Node n = jjtree.popNode(); jjtree.popNode(); jjtree.pushNode(n); } )?) (<COMMA> (ExcerptFunction() | Identifier() (<PERIOD> Identifier() { Node n = jjtree.popNode(); jjtree.popNode(); jjtree.pushNode(n); } )?) )*)
 }
 
 void TableExpression() #void :
@@ -605,6 +606,15 @@
   )
   {
     return jjtThis.getName();
+  }
+}
+
+QName ExcerptFunction() :
+{}
+{
+  <EXCERPT> "(" <PERIOD> ")"
+  {
+    return new QName(QName.NS_REP_URI, "excerpt(.)");
   }
 }
 



Mime
View raw message