jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ju...@apache.org
Subject svn commit: r1174630 - in /jackrabbit/branches/2.1: ./ jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/
Date Fri, 23 Sep 2011 10:11:33 GMT
Author: jukka
Date: Fri Sep 23 10:11:33 2011
New Revision: 1174630

URL: http://svn.apache.org/viewvc?rev=1174630&view=rev
Log:
2.1: Merged revisions 1173250 and 1173706 (JCR-3077, JCR-3075)

Modified:
    jackrabbit/branches/2.1/   (props changed)
    jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java
    jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java
    jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java
    jackrabbit/branches/2.1/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java

Propchange: jackrabbit/branches/2.1/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Sep 23 10:11:33 2011
@@ -1,6 +1,6 @@
 /jackrabbit/branches/1.5:794012,794100,794102
-/jackrabbit/branches/2.2:1065610,1067910,1071647,1071665,1071668,1089453,1126998
+/jackrabbit/branches/2.2:1065610,1067910,1071647,1071665,1071668,1089453,1126998,1173250,1173706
 /jackrabbit/sandbox/JCR-1456:774917-886178
 /jackrabbit/sandbox/JCR-2170:812417-816332
 /jackrabbit/sandbox/tripod-JCR-2209:795441-795863
-/jackrabbit/trunk:931121,931479,931483-931484,931504,931609,931613,931838,931919,932318-932319,933144,933197,933203,933213,933216,933554,933646,933694,934405,934412,934849,935557,936668,938099,945528,950440,950680,955222,955229,955307,955852,961487,961626,964362,965539,986682,986686,986715,991144,995411-995412,996810,999298-999299,999965,1000947,1001707,1002065-1002066,1002084,1002101-1002102,1002168,1002170,1002589,1002608,1002657,1002729,1003423,1003470,1003542,1003773,1004182,1004184,1004223-1004224,1004652,1005057,1005112,1036117,1036336-1036337,1038201,1039064,1039423,1040090,1065599,1069831,1071562,1071573,1087304,1089436,1104027
+/jackrabbit/trunk:931121,931479,931483-931484,931504,931609,931613,931838,931919,932318-932319,933144,933197,933203,933213,933216,933554,933646,933694,934405,934412,934849,935557,936668,938099,945528,950440,950680,955222,955229,955307,955852,961487,961626,964362,965539,986682,986686,986715,991144,995411-995412,996810,999298-999299,999965,1000947,1001707,1002065-1002066,1002084,1002101-1002102,1002168,1002170,1002589,1002608,1002657,1002729,1003423,1003470,1003542,1003773,1004182,1004184,1004223-1004224,1004652,1005057,1005112,1036117,1036336-1036337,1038201,1039064,1039423,1040090,1065599,1069831,1071562,1071573,1087304,1089436,1104027,1173196

Modified: jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java?rev=1174630&r1=1174629&r2=1174630&view=diff
==============================================================================
--- jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java
(original)
+++ jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java
Fri Sep 23 10:11:33 2011
@@ -16,29 +16,32 @@
  */
 package org.apache.jackrabbit.core.query.lucene;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.lucene.search.Query;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.jackrabbit.core.id.NodeId;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermDocs;
 import org.apache.lucene.index.TermFreqVector;
 import org.apache.lucene.index.TermPositionVector;
 import org.apache.lucene.index.TermVectorOffsetInfo;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
-import org.apache.jackrabbit.core.id.NodeId;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.io.Reader;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.TreeMap;
-import java.util.SortedMap;
-import java.util.Arrays;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * <code>AbstractExcerpt</code> implements base functionality for an excerpt
@@ -176,10 +179,32 @@ public abstract class AbstractExcerpt im
     /**
      * @return the extracted terms from the query.
      */
-    protected final Set<Term> getQueryTerms() {
-        Set<Term> extractedTerms = new HashSet<Term>();
-        Set<Term> relevantTerms = new HashSet<Term>();
-        query.extractTerms(extractedTerms);
+    protected final Set<Term[]> getQueryTerms() {
+        Set<Term[]> relevantTerms = new HashSet<Term[]>();
+        getQueryTerms(query, relevantTerms);
+        return relevantTerms;
+    }
+
+    private static void getQueryTerms(Query q, Set<Term[]> relevantTerms) {
+        if (q instanceof BooleanQuery) {
+            final BooleanQuery bq = (BooleanQuery) q;
+            for (BooleanClause clause : bq.getClauses()) {
+                getQueryTerms(clause.getQuery(), relevantTerms);
+            }
+            return;
+        }
+        //need to preserve insertion order
+        Set<Term> extractedTerms = new LinkedHashSet<Term>();
+        q.extractTerms(extractedTerms);
+        Set<Term> filteredTerms = filterRelevantTerms(extractedTerms);
+        if (!filteredTerms.isEmpty()) {
+            relevantTerms.add(filteredTerms.toArray(new Term[] {}));
+        }
+    }
+
+    private static Set<Term> filterRelevantTerms(Set<Term> extractedTerms) {
+      //need to preserve insertion order
+        Set<Term> relevantTerms = new LinkedHashSet<Term>();
         // only keep terms for fulltext fields
         for (Term t : extractedTerms) {
             if (t.field().equals(FieldNames.FULLTEXT)) {

Modified: jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java?rev=1174630&r1=1174629&r2=1174630&view=diff
==============================================================================
--- jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java
(original)
+++ jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java
Fri Sep 23 10:11:33 2011
@@ -19,17 +19,19 @@ package org.apache.jackrabbit.core.query
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.Set;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
 
+import org.apache.jackrabbit.util.Text;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermPositionVector;
 import org.apache.lucene.index.TermVectorOffsetInfo;
-import org.apache.lucene.index.Term;
-import org.apache.jackrabbit.util.Text;
 
 /**
  * This is an adapted version of the <code>FulltextHighlighter</code> posted
in
@@ -93,7 +95,7 @@ public class DefaultHighlighter {
      *         highlighted
      */
     public static String highlight(TermPositionVector tvec,
-                                   Set<Term> queryTerms,
+                                   Set<Term[]> queryTerms,
                                    String text,
                                    String excerptStart,
                                    String excerptEnd,
@@ -120,7 +122,7 @@ public class DefaultHighlighter {
      *         highlighted
      */
     public static String highlight(TermPositionVector tvec,
-                                   Set<Term> queryTerms,
+                                   Set<Term[]> queryTerms,
                                    String text,
                                    int maxFragments,
                                    int surround)
@@ -134,7 +136,7 @@ public class DefaultHighlighter {
      * @see #highlight(TermPositionVector, Set, String, String, String, String, String, String,
String, int, int)
      */
     protected String doHighlight(TermPositionVector tvec,
-                                 Set<Term> queryTerms,
+                                 Set<Term[]> queryTerms,
                                  String text,
                                  String excerptStart,
                                  String excerptEnd,
@@ -144,21 +146,102 @@ public class DefaultHighlighter {
                                  String hlEnd,
                                  int maxFragments,
                                  int surround) throws IOException {
-        String[] terms = new String[queryTerms.size()];
-        Iterator<Term> it = queryTerms.iterator();
-        for (int i = 0; it.hasNext(); i++) {
-            terms[i] = it.next().text();
-        }
-        List<TermVectorOffsetInfo> list = new ArrayList<TermVectorOffsetInfo>();
-        int[] tvecindexes = tvec.indexesOf(terms, 0, terms.length);
-        for (int tvecindex : tvecindexes) {
-            TermVectorOffsetInfo[] termoffsets = tvec.getOffsets(tvecindex);
-            list.addAll(Arrays.asList(termoffsets));
+
+        List<TermVectorOffsetInfo> termOffsetInfo = new ArrayList<TermVectorOffsetInfo>();
+        
+        Iterator<Term[]> it = queryTerms.iterator();
+        while (it.hasNext()) {
+            Term[] qt = it.next();
+            final int qtLen = qt.length;
+            if (qt == null || qtLen == 0) {
+                continue;
+            }
+            String[] qtText = new String[qtLen];
+            for (int i = 0; i < qtLen; i++) {
+                qtText[i] = qt[i].text();
+            }
+            int[] tvecindexes = tvec.indexesOf(qtText, 0, qtText.length);
+            Map<Integer, TermVectorOffsetInfo[]> localTermOffsetInfo = new HashMap<Integer,
TermVectorOffsetInfo[]>();
+            for (int tvecindex : tvecindexes) {
+                TermVectorOffsetInfo[] termoffsets = tvec.getOffsets(tvecindex);
+                if (termoffsets == null || termoffsets.length == 0) {
+                    continue;
+                }
+                localTermOffsetInfo.put(tvecindex, termoffsets);
+            }
+
+            // to keep the order of the keys, use tvecindexes,
+            // if a term is not found tvecindexes[] = -1
+            // when dealing with multiple terms that have to exist, just check
+            // if the first one is there
+            if (tvecindexes.length > 0 && tvecindexes[0] >= 0) {
+                // we have to build one interval TermVectorOffsetInfo for each
+                // hit;
+                List<TermVectorOffsetInfo> intervalTermOffsetInfo = new ArrayList<TermVectorOffsetInfo>();
+
+                // pick all the first key's hist as interval start
+                TermVectorOffsetInfo[] firstKeyTermOffsets = localTermOffsetInfo
+                        .get(tvecindexes[0]);
+                Arrays.sort(firstKeyTermOffsets,
+                        new TermVectorOffsetInfoSorter());
+                intervalTermOffsetInfo.addAll(Arrays
+                        .asList(firstKeyTermOffsets));                
+
+                // check if each key is part of an interval, if not, it is
+                // dropped from the list
+                for (int i = 1; i < tvecindexes.length; i++) {
+                    final Integer key = tvecindexes[i];
+                    TermVectorOffsetInfo[] termoffsets = localTermOffsetInfo
+                            .get(key);
+                    if (termoffsets == null) {
+                        continue;
+                    }
+                    Arrays.sort(termoffsets, new TermVectorOffsetInfoSorter());
+
+                    Iterator<TermVectorOffsetInfo> intervalIterator = intervalTermOffsetInfo
+                            .iterator();
+
+                    int index = 0;
+                    while (intervalIterator.hasNext()) {
+                        TermVectorOffsetInfo intervalOI = intervalIterator
+                                .next();
+                        if (index >= termoffsets.length) {
+                            intervalIterator.remove();
+                            continue;
+                        }
+                        boolean matchSearch = true;
+                        boolean matchFound = false;
+                        while (matchSearch) {
+                            TermVectorOffsetInfo localOI = termoffsets[index];
+                            // check interval match
+                            // CJK languages will have the tokens from the PhraseQuery glued
together (see LUCENE-2458)
+                            int diff = localOI.getStartOffset()
+                                    - intervalOI.getEndOffset();
+                            // TODO we'll probably have to remove 'diff == 0'
+                            // after upgrading to lucene 3.1
+                            if (diff == 1 || diff == 0) {
+                                intervalOI.setEndOffset(localOI.getEndOffset());
+                                matchSearch = false;
+                                matchFound = true;
+                            }
+                            index++;
+                            if (index >= termoffsets.length) {
+                                matchSearch = false;
+                            }
+                        }
+                        if (!matchFound) {
+                            index--;
+                            intervalIterator.remove();
+                        }
+                    }
+                }
+                termOffsetInfo.addAll(intervalTermOffsetInfo);
+            }
         }
 
-        TermVectorOffsetInfo[] offsets = list.toArray(new TermVectorOffsetInfo[list.size()]);
+        TermVectorOffsetInfo[] offsets = termOffsetInfo.toArray(new TermVectorOffsetInfo[termOffsetInfo.size()]);
         // sort offsets
-        if (terms.length > 1) {
+        if (offsets != null && offsets.length > 1) {
             Arrays.sort(offsets, new TermVectorOffsetInfoSorter());
         }
 
@@ -248,8 +331,8 @@ public class DefaultHighlighter {
                     if (skippedChars > surround) {
                         skippedChars = surround;
                     }
-                    sb.append(Text.encodeIllegalXMLCharacters(
-                            new String(cbuf, 0, surround - skippedChars)));
+                    sb.append(escape(new String(cbuf, 0, surround
+                            - skippedChars)));
                     sb.append(fragmentEnd);
                 }
             }
@@ -296,8 +379,8 @@ public class DefaultHighlighter {
             if (!sentenceStart) {
                 sb.append("... ");
             }
-            sb.append(Text.encodeIllegalXMLCharacters(
-                    new String(cbuf, skippedChars, cbuf.length - skippedChars)));
+            sb.append(escape(new String(cbuf, skippedChars, cbuf.length
+                    - skippedChars)));
 
             // iterate terms
             for (Iterator iter = fi.iterator(); iter.hasNext();) {
@@ -307,8 +390,7 @@ public class DefaultHighlighter {
                     cbuf = new char[nextStart - pos];
                     int charsRead = reader.read(cbuf, 0, nextStart - pos);
                     pos += (nextStart - pos);
-                    sb.append(Text.encodeIllegalXMLCharacters(
-                            new String(cbuf, 0, charsRead)));
+                    sb.append(escape(new String(cbuf, 0, charsRead)));
                 }
                 sb.append(hlStart);
                 nextStart = ti.getEndOffset();
@@ -316,8 +398,7 @@ public class DefaultHighlighter {
                 cbuf = new char[nextStart - pos];
                 reader.read(cbuf, 0, nextStart - pos);
                 pos += (nextStart - pos);
-                sb.append(Text.encodeIllegalXMLCharacters(
-                        new String(cbuf)));
+                sb.append(escape(new String(cbuf)));
                 sb.append(hlEnd);
             }
         }
@@ -343,8 +424,8 @@ public class DefaultHighlighter {
                 } else {
                     skippedChars = 0;
                 }
-                sb.append(Text.encodeIllegalXMLCharacters(
-                        new String(cbuf, 0, EOF ? skip : (surround - skippedChars))));
+                sb.append(escape(new String(cbuf, 0, EOF ? skip
+                        : (surround - skippedChars))));
                 if (!EOF) {
                     char lastChar = sb.charAt(sb.length() - 1);
                     if (lastChar != '.' && lastChar != '!' && lastChar !=
'?') {
@@ -364,7 +445,7 @@ public class DefaultHighlighter {
      * @param text the text.
      * @param excerptStart the excerpt start.
      * @param excerptEnd the excerpt end.
-     * @param fragmentStart the fragement start.
+     * @param fragmentStart the fragment start.
      * @param fragmentEnd the fragment end.
      * @param maxLength the maximum length of the fragment.
      * @return a default excerpt.
@@ -393,10 +474,24 @@ public class DefaultHighlighter {
                 }
             }
         }
-        excerpt.append(Text.encodeIllegalXMLCharacters(tmp.toString()));
+        excerpt.append(escape(tmp.toString()));
         excerpt.append(fragmentEnd).append(excerptEnd);
         return excerpt.toString();
     }
+    
+    
+    /**
+     * Escapes input text suitable for the output format.
+     * <p>
+     * By default does XML-escaping. Can be overridden for
+     * other formats.
+     * 
+     * @param input raw text.
+     * @return text suitably escaped.
+     */
+    protected String escape(String input) {
+        return Text.encodeIllegalXMLCharacters(input);
+    }
 
     private static class FragmentInfo {
         List<TermVectorOffsetInfo> offsetInfosList;

Modified: jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java?rev=1174630&r1=1174629&r2=1174630&view=diff
==============================================================================
--- jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java
(original)
+++ jackrabbit/branches/2.1/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java
Fri Sep 23 10:11:33 2011
@@ -77,7 +77,7 @@ public class WeightedHighlighter extends
      *         highlighted
      */
     public static String highlight(TermPositionVector tvec,
-                                   Set<Term> queryTerms,
+                                   Set<Term[]> queryTerms,
                                    String text,
                                    String excerptStart,
                                    String excerptEnd,
@@ -103,7 +103,7 @@ public class WeightedHighlighter extends
      *         highlighted
      */
     public static String highlight(TermPositionVector tvec,
-                                   Set<Term> queryTerms,
+                                   Set<Term[]> queryTerms,
                                    String text,
                                    int maxFragments,
                                    int surround) throws IOException {
@@ -194,14 +194,17 @@ public class WeightedHighlighter extends
                 TermVectorOffsetInfo oi = (TermVectorOffsetInfo) fIt.next();
                 if (lastOffsetInfo != null) {
                     // fill in text between terms
-                    sb.append(text.substring(lastOffsetInfo.getEndOffset(), oi.getStartOffset()));
+                    sb.append(escape(text.substring(
+                            lastOffsetInfo.getEndOffset(), oi.getStartOffset())));
                 }
                 sb.append(hlStart);
-                sb.append(text.substring(oi.getStartOffset(), oi.getEndOffset()));
+                sb.append(escape(text.substring(oi.getStartOffset(),
+                        oi.getEndOffset())));
                 sb.append(hlEnd);
                 lastOffsetInfo = oi;
             }
-            limit = Math.min(text.length(), fi.getStartOffset() - len + (surround * 2));
+            limit = Math.min(text.length(), fi.getStartOffset() - len
+                    + (surround * 2));
             endFragment(sb, text, fi.getEndOffset(), limit);
             sb.append(fragmentEnd);
         }
@@ -222,10 +225,10 @@ public class WeightedHighlighter extends
      * @return the length of the start fragment that was appended to
      *         <code>sb</code>.
      */
-    private static int startFragment(StringBuffer sb, String text, int offset, int limit)
{
+    private int startFragment(StringBuffer sb, String text, int offset, int limit) {
         if (limit == 0) {
             // append all
-            sb.append(text.substring(0, offset));
+            sb.append(escape(text.substring(0, offset)));
             return offset;
         }
         String intro = "... ";
@@ -241,7 +244,7 @@ public class WeightedHighlighter extends
                 }
             }
         }
-        sb.append(intro).append(text.substring(start, offset));
+        sb.append(intro).append(escape(text.substring(start, offset)));
         return offset - start;
     }
 
@@ -255,10 +258,10 @@ public class WeightedHighlighter extends
      * @param offset the end offset of the last matching term in the fragment.
      * @param limit  do not go further than <code>limit</code>.
      */
-    private static void endFragment(StringBuffer sb, String text, int offset, int limit)
{
+    private void endFragment(StringBuffer sb, String text, int offset, int limit) {
         if (limit == text.length()) {
             // append all
-            sb.append(text.substring(offset));
+            sb.append(escape(text.substring(offset)));
             return;
         }
         int end = offset;
@@ -268,7 +271,7 @@ public class WeightedHighlighter extends
                 end = i;
             }
         }
-        sb.append(text.substring(offset, end)).append(" ...");
+        sb.append(escape(text.substring(offset, end))).append(" ...");
     }
 
     private static class FragmentInfo {

Modified: jackrabbit/branches/2.1/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/branches/2.1/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java?rev=1174630&r1=1174629&r2=1174630&view=diff
==============================================================================
--- jackrabbit/branches/2.1/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java
(original)
+++ jackrabbit/branches/2.1/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java
Fri Sep 23 10:11:33 2011
@@ -16,9 +16,9 @@
  */
 package org.apache.jackrabbit.core.query;
 
+import javax.jcr.Node;
 import javax.jcr.RepositoryException;
 import javax.jcr.Value;
-import javax.jcr.Node;
 import javax.jcr.query.QueryResult;
 import javax.jcr.query.Row;
 import javax.jcr.query.RowIterator;
@@ -104,6 +104,10 @@ public class ExcerptTest extends Abstrac
                 "apache jackrabbit");
     }
 
+    /**
+     * Verifies character encoding on a node property that does not contain any
+     * excerpt info
+     */
     public void testEncodeIllegalCharsNoHighlights() throws RepositoryException {
         String text = "bla <strong>bla</strong> bla";
         String excerpt = createExcerpt("bla &lt;strong&gt;bla&lt;/strong&gt;
bla");
@@ -116,9 +120,126 @@ public class ExcerptTest extends Abstrac
         QueryResult result = executeQuery(stmt);
         RowIterator rows = result.getRows();
         assertEquals(1, rows.getSize());
-        assertEquals(excerpt, rows.nextRow().getValue("rep:excerpt(text)").getString());
+        String ex = rows.nextRow().getValue("rep:excerpt(text)").getString();
+        assertEquals("Expected " + excerpt + ", but got ", excerpt, ex);
     }
 
+    /**
+     * Verifies character encoding on a node property that contains excerpt info
+     */
+    public void testEncodeIllegalCharsHighlights() throws RepositoryException {
+        checkExcerpt("bla <strong>bla</strong> foo",
+                "bla &lt;strong&gt;bla&lt;/strong&gt; <strong>foo</strong>",
+                "foo");
+    }
+
+    /**
+     * test for https://issues.apache.org/jira/browse/JCR-3077
+     * 
+     * when given a quoted phrase, the excerpt should evaluate it whole as a
+     * token (not break is down)
+     * 
+     */
+    public void testQuotedPhrase() throws RepositoryException {
+        checkExcerpt("one two three four",
+                "one <strong>two three</strong> four", "\"two three\"");
+    }
+
+    /**
+     * Verifies excerpt generation on a node property that does not contain any
+     * excerpt info for a quoted phrase
+     */
+    public void testQuotedPhraseNoMatch() throws RepositoryException {
+        String text = "one two three four";
+        String excerpt = createExcerpt("one two three four");
+        String terms = "\"five six\"";
+
+        Node n = testRootNode.addNode(nodeName1);
+        n.setProperty("text", text);
+        n.setProperty("other", terms);
+        superuser.save();
+
+        String stmt = getStatement(terms);
+        QueryResult result = executeQuery(stmt);
+        RowIterator rows = result.getRows();
+        assertEquals(1, rows.getSize());
+        String ex = rows.nextRow().getValue("rep:excerpt(text)").getString();
+        assertEquals("Expected " + excerpt + ", but got ", excerpt, ex);
+    }
+
+    /**
+     * 
+     * Verifies excerpt generation on a node property that contains the exact
+     * quoted phrase but with scrambled words.
+     * 
+     * More clearly it actually checks that the order of tokens is respected for
+     * a quoted phrase.
+     */
+    public void testQuotedPhraseNoMatchScrambled() throws RepositoryException {
+        String text = "one two three four";
+        String excerpt = createExcerpt("one two three four");
+        String terms = "\"three two\"";
+
+        Node n = testRootNode.addNode(nodeName1);
+        n.setProperty("text", text);
+        n.setProperty("other", terms);
+        superuser.save();
+
+        String stmt = getStatement(terms);
+        QueryResult result = executeQuery(stmt);
+        RowIterator rows = result.getRows();
+        assertEquals(1, rows.getSize());
+        String ex = rows.nextRow().getValue("rep:excerpt(text)").getString();
+        assertEquals("Expected " + excerpt + ", but got ", excerpt, ex);
+    }
+    
+    /**
+     * Verifies excerpt generation on a node property that does not contain the
+     * exact quoted phrase, but contains fragments of it.
+     * 
+     */
+    public void testQuotedPhraseNoMatchGap() throws RepositoryException {
+        String text = "one two three four";
+        String excerpt = createExcerpt("one two three four");
+        String terms = "\"two four\"";
+
+        Node n = testRootNode.addNode(nodeName1);
+        n.setProperty("text", text);
+        n.setProperty("other", terms);
+        superuser.save();
+
+        String stmt = getStatement(terms);
+        QueryResult result = executeQuery(stmt);
+        RowIterator rows = result.getRows();
+        assertEquals(1, rows.getSize());
+        String ex = rows.nextRow().getValue("rep:excerpt(text)").getString();
+        assertEquals("Expected " + excerpt + ", but got ", excerpt, ex);
+    }
+    
+    /**
+     * test for https://issues.apache.org/jira/browse/JCR-3077
+     * 
+     * JA search acts as a PhraseQuery, thanks to LUCENE-2458. so it should be
+     * covered by the QuotedTest search.
+     * 
+     */
+    public void testHighlightJa() throws RepositoryException {
+
+        // http://translate.google.com/#auto|en|%E3%82%B3%E3%83%B3%E3%83%86%E3%83%B3%E3%83%88
+        String jContent = "\u30b3\u30fe\u30c6\u30f3\u30c8";
+        // http://translate.google.com/#auto|en|%E3%83%86%E3%82%B9%E3%83%88
+        String jTest = "\u30c6\u30b9\u30c8";
+
+        String content = "some text with japanese: " + jContent + " (content)"
+                + " and " + jTest + " (test).";
+
+        // expected excerpt; note this may change if excerpt providers change
+        String expectedExcerpt = "some text with japanese: " + jContent
+                + " (content) and <strong>" + jTest + "</strong> (test).";
+        checkExcerpt(content, expectedExcerpt, jTest);
+    }
+
+
     private void checkExcerpt(String text, String fragmentText, String terms)
             throws RepositoryException {
         String excerpt = createExcerpt(fragmentText);
@@ -137,7 +258,7 @@ public class ExcerptTest extends Abstrac
     private void createTestData(String text) throws RepositoryException {
         Node n = testRootNode.addNode(nodeName1);
         n.setProperty("text", text);
-        testRootNode.save();
+        superuser.save();
     }
 
     private String getExcerpt(Row row) throws RepositoryException {



Mime
View raw message