lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r960484 [2/2] - in /lucene/dev/trunk: lucene/ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/ lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/ lucene/contrib/instantiated/src/java/org/apa...
Date Mon, 05 Jul 2010 08:33:27 GMT
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java Mon Jul  5 08:33:25 2010
@@ -32,6 +32,7 @@ import org.apache.lucene.index.Terms;
 import org.apache.lucene.queryParser.QueryParser; // for javadoc
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.PagedBytes;
 
 /**
  * An abstract {@link Query} that matches documents
@@ -177,11 +178,6 @@ public abstract class MultiTermQuery ext
   private abstract static class BooleanQueryRewrite extends RewriteMethod {
   
     protected final int collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException {
-
-      if (query.field == null) {
-        throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery.");
-      }
-
       final Fields fields = MultiFields.getFields(reader);
       if (fields == null) {
         // reader has no fields
@@ -203,10 +199,9 @@ public abstract class MultiTermQuery ext
         termsEnum.attributes().addAttribute(BoostAttribute.class);
       collector.boostAtt = boostAtt;
       int count = 0;
-      BytesRef term;
-      final Term placeholderTerm = new Term(query.field);
-      while ((term = termsEnum.next()) != null) {
-        if (collector.collect(placeholderTerm.createTerm(term.utf8ToString()), boostAtt.getBoost())) {
+      BytesRef bytes;
+      while ((bytes = termsEnum.next()) != null) {
+        if (collector.collect(bytes, boostAtt.getBoost())) {
           count++;
         } else {
           break;
@@ -217,15 +212,15 @@ public abstract class MultiTermQuery ext
     }
     
     protected static abstract class TermCollector {
-      /** this field is only set if a boostAttribute is used (e.g. {@link FuzzyTermsEnum}) */
       private BoostAttribute boostAtt = null;
     
       /** return false to stop collecting */
-      public abstract boolean collect(Term t, float boost) throws IOException;
+      public abstract boolean collect(BytesRef bytes, float boost) throws IOException;
       
       /** set the minimum boost as a hint for the term producer */
       protected final void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost) {
-        if (boostAtt != null) boostAtt.setMaxNonCompetitiveBoost(maxNonCompetitiveBoost);
+        assert boostAtt != null;
+        boostAtt.setMaxNonCompetitiveBoost(maxNonCompetitiveBoost);
       }
     }
   }
@@ -234,9 +229,11 @@ public abstract class MultiTermQuery ext
     @Override
     public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
       final BooleanQuery result = new BooleanQuery(true);
+      final Term placeholderTerm = new Term(query.field);
       query.incTotalNumberOfTerms(collectTerms(reader, query, new TermCollector() {
-        public boolean collect(Term t, float boost) {
-          TermQuery tq = new TermQuery(t); // found a match
+        public boolean collect(BytesRef bytes, float boost) {
+          // add new TQ, we must clone the term, else it may get overwritten!
+          TermQuery tq = new TermQuery(placeholderTerm.createTerm(new BytesRef(bytes)));
           tq.setBoost(query.getBoost() * boost); // set the boost
           result.add(tq, BooleanClause.Occur.SHOULD); // add to query
           return true;
@@ -297,16 +294,16 @@ public abstract class MultiTermQuery ext
     protected abstract Query getQuery(Term term);
 
     @Override
-    public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
+    public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
       final int maxSize = Math.min(size, BooleanQuery.getMaxClauseCount());
       final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
       collectTerms(reader, query, new TermCollector() {
-        public boolean collect(Term t, float boost) {
+        public boolean collect(BytesRef bytes, float boost) {
           // ignore uncompetetive hits
           if (stQueue.size() >= maxSize && boost <= stQueue.peek().boost)
             return true;
-          // add new entry in PQ
-          st.term = t;
+          // add new entry in PQ, we must clone the term, else it may get overwritten!
+          st.bytes.copy(bytes);
           st.boost = boost;
           stQueue.offer(st);
           // possibly drop entries from queue
@@ -319,9 +316,11 @@ public abstract class MultiTermQuery ext
         private ScoreTerm st = new ScoreTerm();
       });
       
+      final Term placeholderTerm = new Term(query.field);
       final BooleanQuery bq = new BooleanQuery(true);
       for (final ScoreTerm st : stQueue) {
-        Query tq = getQuery(st.term);    // found a match
+        // add new query, we must clone the term, else it may get overwritten!
+        Query tq = getQuery(placeholderTerm.createTerm(st.bytes));
         tq.setBoost(query.getBoost() * st.boost); // set the boost
         bq.add(tq, BooleanClause.Occur.SHOULD);   // add to query
       }
@@ -348,12 +347,13 @@ public abstract class MultiTermQuery ext
     }
   
     private static class ScoreTerm implements Comparable<ScoreTerm> {
-      public Term term;
+      public final BytesRef bytes = new BytesRef();
       public float boost;
       
       public int compareTo(ScoreTerm other) {
         if (this.boost == other.boost)
-          return other.term.compareTo(this.term);
+          // TODO: is it OK to use default compare here?
+          return other.bytes.compareTo(this.bytes);
         else
           return Float.compare(this.boost, other.boost);
       }
@@ -530,58 +530,67 @@ public abstract class MultiTermQuery ext
       final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc());
       final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff);
 
-      final CutOffTermCollector col = new CutOffTermCollector(reader, docCountCutoff, termCountLimit);
+      final CutOffTermCollector col = new CutOffTermCollector(reader, query.field, docCountCutoff, termCountLimit);
       collectTerms(reader, query, col);
       
       if (col.hasCutOff) {
         return CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
+      } else if (col.termCount == 0) {
+        return new BooleanQuery(true);
       } else {
-        final Query result;
-        if (col.pendingTerms.isEmpty()) {
-          result = new BooleanQuery(true);
-        } else {
-          BooleanQuery bq = new BooleanQuery(true);
-          for(Term term : col.pendingTerms) {
-            TermQuery tq = new TermQuery(term);
-            bq.add(tq, BooleanClause.Occur.SHOULD);
+        final PagedBytes.Reader bytesReader = col.pendingTerms.freeze(false);
+        try {
+          final BooleanQuery bq = new BooleanQuery(true);
+          final Term placeholderTerm = new Term(query.field);
+          long start = col.startOffset;
+          for(int i = 0; i < col.termCount; i++) {
+            final BytesRef bytes = new BytesRef();
+            start = bytesReader.fillUsingLengthPrefix3(bytes, start);
+            bq.add(new TermQuery(placeholderTerm.createTerm(bytes)), BooleanClause.Occur.SHOULD);
           }
           // Strip scores
-          result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
+          final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
           result.setBoost(query.getBoost());
+          query.incTotalNumberOfTerms(col.termCount);
+          return result;
+        } finally {
+          bytesReader.close();
         }
-        query.incTotalNumberOfTerms(col.pendingTerms.size());
-        return result;
       }
     }
     
     private static final class CutOffTermCollector extends TermCollector {
-      CutOffTermCollector(IndexReader reader, int docCountCutoff, int termCountLimit) {
+      CutOffTermCollector(IndexReader reader, String field, int docCountCutoff, int termCountLimit) {
         this.reader = reader;
+        this.field = field;
         this.docCountCutoff = docCountCutoff;
         this.termCountLimit = termCountLimit;
       }
     
-      public boolean collect(Term t, float boost) throws IOException {
-        pendingTerms.add(t);
-        if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
+      public boolean collect(BytesRef bytes, float boost) throws IOException {
+        termCount++;
+        if (termCount >= termCountLimit || docVisitCount >= docCountCutoff) {
           hasCutOff = true;
           return false;
         }
+        pendingTerms.copyUsingLengthPrefix(bytes);
         // Loading the TermInfo from the terms dict here
         // should not be costly, because 1) the
         // query/filter will load the TermInfo when it
         // runs, and 2) the terms dict has a cache:
-        // @deprecated: in 4.0 use BytesRef for collectTerms()
-        docVisitCount += reader.docFreq(t);
+        docVisitCount += reader.docFreq(field, bytes);
         return true;
       }
       
       int docVisitCount = 0;
       boolean hasCutOff = false;
+      int termCount = 0;
       
       final IndexReader reader;
+      final String field;
       final int docCountCutoff, termCountLimit;
-      final ArrayList<Term> pendingTerms = new ArrayList<Term>();
+      final PagedBytes pendingTerms = new PagedBytes(15); // max term size is 32 KiB
+      final long startOffset = pendingTerms.getPointer();
     }
 
     @Override
@@ -647,18 +656,7 @@ public abstract class MultiTermQuery ext
    */
   public MultiTermQuery(final String field) {
     this.field = field;
-  }
-
-  /**
-   * Constructs a query matching terms that cannot be represented with a single
-   * Term.
-   * @deprecated Use {@link #MultiTermQuery(String)}, as the flex branch can
-   * only work on one field per terms enum. If you override
-   * {@link #getTermsEnum(IndexReader)}, you cannot use this ctor.
-   */
-  @Deprecated
-  public MultiTermQuery() {
-    this(null);
+    assert field != null;
   }
 
   /** Returns the field name for this query */

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java Mon Jul  5 08:33:25 2010
@@ -106,10 +106,6 @@ public class MultiTermQueryWrapperFilter
    */
   @Override
   public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
-    if (query.field == null) {
-      throw new NullPointerException("If you implement getTermsEnum(), you must specify a non-null field in the constructor of MultiTermQuery.");
-    }
-
     final Fields fields = MultiFields.getFields(reader);
     if (fields == null) {
       // reader has no fields

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java Mon Jul  5 08:33:25 2010
@@ -184,15 +184,14 @@ public class PhraseQuery extends Query {
       final Bits delDocs = MultiFields.getDeletedDocs(reader);
       for (int i = 0; i < terms.size(); i++) {
         final Term t = terms.get(i);
-        final BytesRef text = new BytesRef(t.text());
         DocsAndPositionsEnum postingsEnum = MultiFields.getTermPositionsEnum(reader,
                                                                              delDocs,
                                                                              t.field(),
-                                                                             text);
+                                                                             t.bytes());
         // PhraseQuery on a field that did not index
         // positions.
         if (postingsEnum == null) {
-          if (MultiFields.getTermDocsEnum(reader, delDocs, t.field(), text) != null) {
+          if (MultiFields.getTermDocsEnum(reader, delDocs, t.field(), t.bytes()) != null) {
             // term does exist, but has no positions
             throw new IllegalStateException("field \"" + t.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + t.text() + ")");
           } else {
@@ -200,7 +199,7 @@ public class PhraseQuery extends Query {
             return null;
           }
         }
-        postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), text), positions.get(i).intValue());
+        postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue());
       }
 
       // sort by increasing docFreq order

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixQuery.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixQuery.java Mon Jul  5 08:33:25 2010
@@ -46,7 +46,7 @@ public class PrefixQuery extends MultiTe
   
   @Override  
   protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
-    if (prefix.text().length() == 0) {
+    if (prefix.bytes().length == 0) {
       // no prefix -- match all terms for this field:
       final Terms terms = MultiFields.getTerms(reader, getField());
       return (terms != null) ? terms.iterator() : TermsEnum.EMPTY;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java Mon Jul  5 08:33:25 2010
@@ -36,7 +36,7 @@ public class PrefixTermsEnum extends Fil
 
   public PrefixTermsEnum(IndexReader reader, Term prefix) throws IOException {
     super(reader, prefix.field());
-    setInitialSeekTerm(prefixRef = new BytesRef(prefix.text()));
+    setInitialSeekTerm(prefixRef = prefix.bytes());
   }
 
   @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/QueryTermVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/QueryTermVector.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/QueryTermVector.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/QueryTermVector.java Mon Jul  5 08:33:25 2010
@@ -29,14 +29,16 @@ import java.util.Map;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.index.TermFreqVector;
+import org.apache.lucene.util.BytesRef;
 
 /**
  *
  *
  **/
 public class QueryTermVector implements TermFreqVector {
-  private String [] terms = new String[0];
+  private BytesRef [] terms = new BytesRef[0];
   private int [] termFreqs = new int[0];
 
   public String getField() { return null;  }
@@ -45,7 +47,7 @@ public class QueryTermVector implements 
    * 
    * @param queryTerms The original list of terms from the query, can contain duplicates
    */ 
-  public QueryTermVector(String [] queryTerms) {
+  public QueryTermVector(BytesRef [] queryTerms) {
 
     processTerms(queryTerms);
   }
@@ -56,35 +58,37 @@ public class QueryTermVector implements 
       TokenStream stream = analyzer.tokenStream("", new StringReader(queryString));
       if (stream != null)
       {
-        List<String> terms = new ArrayList<String>();
+        List<BytesRef> terms = new ArrayList<BytesRef>();
         try {
           boolean hasMoreTokens = false;
           
           stream.reset(); 
-          final CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
+          final TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
 
           hasMoreTokens = stream.incrementToken();
           while (hasMoreTokens) {
-            terms.add(termAtt.toString());
+            BytesRef bytes = new BytesRef();
+            termAtt.toBytesRef(bytes);
+            terms.add(bytes);
             hasMoreTokens = stream.incrementToken();
           }
-          processTerms(terms.toArray(new String[terms.size()]));
+          processTerms(terms.toArray(new BytesRef[terms.size()]));
         } catch (IOException e) {
         }
       }
     }                                                              
   }
   
-  private void processTerms(String[] queryTerms) {
+  private void processTerms(BytesRef[] queryTerms) {
     if (queryTerms != null) {
       Arrays.sort(queryTerms);
-      Map<String,Integer> tmpSet = new HashMap<String,Integer>(queryTerms.length);
+      Map<BytesRef,Integer> tmpSet = new HashMap<BytesRef,Integer>(queryTerms.length);
       //filter out duplicates
-      List<String> tmpList = new ArrayList<String>(queryTerms.length);
+      List<BytesRef> tmpList = new ArrayList<BytesRef>(queryTerms.length);
       List<Integer> tmpFreqs = new ArrayList<Integer>(queryTerms.length);
       int j = 0;
       for (int i = 0; i < queryTerms.length; i++) {
-        String term = queryTerms[i];
+        BytesRef term = queryTerms[i];
         Integer position = tmpSet.get(term);
         if (position == null) {
           tmpSet.put(term, Integer.valueOf(j++));
@@ -112,7 +116,7 @@ public class QueryTermVector implements 
         sb.append('{');
         for (int i=0; i<terms.length; i++) {
             if (i>0) sb.append(", ");
-            sb.append(terms[i]).append('/').append(termFreqs[i]);
+            sb.append(terms[i].utf8ToString()).append('/').append(termFreqs[i]);
         }
         sb.append('}');
         return sb.toString();
@@ -123,7 +127,7 @@ public class QueryTermVector implements 
     return terms.length;
   }
 
-  public String[] getTerms() {
+  public BytesRef[] getTerms() {
     return terms;
   }
 
@@ -131,12 +135,12 @@ public class QueryTermVector implements 
     return termFreqs;
   }
 
-  public int indexOf(String term) {
+  public int indexOf(BytesRef term) {
     int res = Arrays.binarySearch(terms, term);
         return res >= 0 ? res : -1;
   }
 
-  public int[] indexesOf(String[] terms, int start, int len) {
+  public int[] indexesOf(BytesRef[] terms, int start, int len) {
     int res[] = new int[len];
 
     for (int i=0; i < len; i++) {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java Mon Jul  5 08:33:25 2010
@@ -41,7 +41,7 @@ public final class SingleTermsEnum exten
    */
   public SingleTermsEnum(IndexReader reader, Term singleTerm) throws IOException {
     super(reader, singleTerm.field());
-    singleRef = new BytesRef(singleTerm.text());
+    singleRef = singleTerm.bytes();
     setInitialSeekTerm(singleRef);
   }
 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java Mon Jul  5 08:33:25 2010
@@ -75,7 +75,7 @@ public class TermQuery extends Query {
     public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
       // NOTE: debateably, the caller should never pass in a
       // multi reader...
-      DocsEnum docs = MultiFields.getTermDocsEnum(reader, MultiFields.getDeletedDocs(reader), term.field(), new BytesRef(term.text()));
+      DocsEnum docs = MultiFields.getTermDocsEnum(reader, MultiFields.getDeletedDocs(reader), term.field(), term.bytes());
       if (docs == null) {
         return null;
       }
@@ -118,7 +118,7 @@ public class TermQuery extends Query {
 
       Explanation tfExplanation = new Explanation();
       int tf = 0;
-      DocsEnum docs = reader.termDocsEnum(MultiFields.getDeletedDocs(reader), term.field(), new BytesRef(term.text()));
+      DocsEnum docs = reader.termDocsEnum(MultiFields.getDeletedDocs(reader), term.field(), term.bytes());
       if (docs != null) {
           int newDoc = docs.advance(doc);
           if (newDoc == doc) {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java Mon Jul  5 08:33:25 2010
@@ -85,16 +85,15 @@ public class SpanTermQuery extends SpanQ
   public Spans getSpans(final IndexReader reader) throws IOException {
     // NOTE: debateably, the caller should never pass in a
     // multi reader...
-    final BytesRef textBytes = new BytesRef(term.text());
     final DocsAndPositionsEnum postings = MultiFields.getTermPositionsEnum(reader,
                                                                            MultiFields.getDeletedDocs(reader),
                                                                            term.field(),
-                                                                           textBytes);
+                                                                           term.bytes());
 
     if (postings != null) {
       return new TermSpans(postings, term);
     } else {
-      if (MultiFields.getTermDocsEnum(reader, MultiFields.getDeletedDocs(reader), term.field(), textBytes) != null) {
+      if (MultiFields.getTermDocsEnum(reader, MultiFields.getDeletedDocs(reader), term.field(), term.bytes()) != null) {
         // term does exist, but has no positions
         throw new IllegalStateException("field \"" + term.field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run SpanTermQuery (term=" + term.text() + ")");
       } else {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java Mon Jul  5 08:33:25 2010
@@ -77,6 +77,16 @@ public final class BytesRef implements C
     this();
     copy(text);
   }
+  
+  /**
+   * @param text Initialize the byte[] from the UTF8 bytes
+   * for the provided array.  This must be well-formed
+   * unicode text, with no unpaired surrogates or U+FFFF.
+   */
+  public BytesRef(char text[], int offset, int length) {
+    this(length * 4);
+    copy(text, offset, length);
+  }
 
   public BytesRef(BytesRef other) {
     this();
@@ -106,6 +116,15 @@ public final class BytesRef implements C
     UnicodeUtil.UTF16toUTF8(text, 0, text.length(), this);
   }
 
+  /**
+   * Copies the UTF8 bytes for this string.
+   * 
+   * @param text Must be well-formed unicode text, with no
+   * unpaired surrogates or invalid UTF16 code units.
+   */
+  public void copy(char text[], int offset, int length) {
+    UnicodeUtil.UTF16toUTF8(text, offset, length, this);
+  }
   public boolean bytesEquals(BytesRef other) {
     if (length == other.length) {
       int otherUpto = other.offset;
@@ -277,6 +296,62 @@ public final class BytesRef implements C
     }    
   }
 
+  private final static Comparator<BytesRef> utf8SortedAsUTF16SortOrder = new UTF8SortedAsUTF16Comparator();
+
+  public static Comparator<BytesRef> getUTF8SortedAsUTF16Comparator() {
+    return utf8SortedAsUTF16SortOrder;
+  }
+
+  private static class UTF8SortedAsUTF16Comparator implements Comparator<BytesRef> {
+    // Only singleton
+    private UTF8SortedAsUTF16Comparator() {};
+
+    public int compare(BytesRef a, BytesRef b) {
+
+      final byte[] aBytes = a.bytes;
+      int aUpto = a.offset;
+      final byte[] bBytes = b.bytes;
+      int bUpto = b.offset;
+      
+      final int aStop;
+      if (a.length < b.length) {
+        aStop = aUpto + a.length;
+      } else {
+        aStop = aUpto + b.length;
+      }
+
+      while(aUpto < aStop) {
+        int aByte = aBytes[aUpto++] & 0xff;
+        int bByte = bBytes[bUpto++] & 0xff;
+
+        if (aByte != bByte) {
+
+          // See http://icu-project.org/docs/papers/utf16_code_point_order.html#utf-8-in-utf-16-order
+
+          // We know the terms are not equal, but, we may
+          // have to carefully fixup the bytes at the
+          // difference to match UTF16's sort order:
+          if (aByte >= 0xee && bByte >= 0xee) {
+            if ((aByte & 0xfe) == 0xee) {
+              aByte += 0x10;
+            }
+            if ((bByte&0xfe) == 0xee) {
+              bByte += 0x10;
+            }
+          }
+          return aByte - bByte;
+        }
+      }
+
+      // One is a prefix of the other, or, they are equal:
+      return a.length - b.length;
+    }
+
+    public boolean equals(Object other) {
+      return this == other;
+    }
+  }
+
   public void writeExternal(ObjectOutput out)
     throws IOException
   {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/PagedBytes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/PagedBytes.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/PagedBytes.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/PagedBytes.java Mon Jul  5 08:33:25 2010
@@ -125,6 +125,26 @@ public final class PagedBytes {
       return index;
     }
 
+    /** @lucene.internal  Reads length as 1 or 2 byte vInt prefix, starting @ start. 
+     * Returns the start offset of the next part, suitable as start parameter on next call
+     * to sequentially read all BytesRefs. */
+    public long fillUsingLengthPrefix3(BytesRef b, long start) {
+      final int index = (int) (start >> blockBits);
+      final int offset = (int) (start & blockMask);
+      final byte[] block = b.bytes = blocks[index];
+
+      if ((block[offset] & 128) == 0) {
+        b.length = block[offset];
+        b.offset = offset+1;
+        start += 1L + b.length;
+      } else {
+        b.length = (((int) (block[offset] & 0x7f)) << 8) | (block[1+offset] & 0xff);
+        b.offset = offset+2;
+        start += 2L + b.length;
+        assert b.length > 0;
+      }
+      return start;
+    }
 
     /** @lucene.internal */
     public byte[][] getBlocks() {
@@ -230,7 +250,7 @@ public final class PagedBytes {
 
   /** Commits final byte[], trimming it if necessary and if trim=true */
   public Reader freeze(boolean trim) {
-    if (upto < blockSize) {
+    if (trim && upto < blockSize) {
       final byte[] newBlock = new byte[upto];
       System.arraycopy(currentBlock, 0, newBlock, 0, upto);
       currentBlock = newBlock;

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java Mon Jul  5 08:33:25 2010
@@ -464,7 +464,7 @@ public class TestAddIndexes extends Luce
   private void verifyTermDocs(Directory dir, Term term, int numDocs)
       throws IOException {
     IndexReader reader = IndexReader.open(dir, true);
-    DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, null, term.field, new BytesRef(term.text));
+    DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, null, term.field, term.bytes);
     int count = 0;
     while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
       count++;

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloads.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloads.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloads.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloads.java Mon Jul  5 08:33:25 2010
@@ -188,7 +188,7 @@ public class TestPayloads extends Lucene
         Term[] terms = generateTerms(fieldName, numTerms);
         StringBuilder sb = new StringBuilder();
         for (int i = 0; i < terms.length; i++) {
-            sb.append(terms[i].text);
+            sb.append(terms[i].text());
             sb.append(" ");
         }
         String content = sb.toString();

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPositionBasedTermVectorMapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPositionBasedTermVectorMapper.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPositionBasedTermVectorMapper.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPositionBasedTermVectorMapper.java Mon Jul  5 08:33:25 2010
@@ -15,6 +15,7 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 
 import java.io.IOException;
@@ -65,7 +66,7 @@ public class TestPositionBasedTermVector
     //Test single position
     for (int i = 0; i < tokens.length; i++) {
       String token = tokens[i];
-      mapper.map(token, 1, null, thePositions[i]);
+      mapper.map(new BytesRef(token), 1, null, thePositions[i]);
 
     }
     Map<String,Map<Integer,PositionBasedTermVectorMapper.TVPositionInfo>> map = mapper.getFieldToTerms();

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java Mon Jul  5 08:33:25 2010
@@ -100,7 +100,7 @@ public class TestSegmentMerger extends L
     
     TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
     assertTrue(vector != null);
-    String [] terms = vector.getTerms();
+    BytesRef [] terms = vector.getTerms();
     assertTrue(terms != null);
     //System.out.println("Terms size: " + terms.length);
     assertTrue(terms.length == 3);
@@ -110,7 +110,7 @@ public class TestSegmentMerger extends L
     assertTrue(vector instanceof TermPositionVector == true);
     
     for (int i = 0; i < terms.length; i++) {
-      String term = terms[i];
+      String term = terms[i].utf8ToString();
       int freq = freqs[i];
       //System.out.println("Term: " + term + " Freq: " + freq);
       assertTrue(DocHelper.FIELD_2_TEXT.indexOf(term) != -1);

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java Mon Jul  5 08:33:25 2010
@@ -192,11 +192,11 @@ public class TestSegmentReader extends L
   public void testTermVectors() throws IOException {
     TermFreqVector result = reader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
     assertTrue(result != null);
-    String [] terms = result.getTerms();
+    BytesRef [] terms = result.getTerms();
     int [] freqs = result.getTermFrequencies();
     assertTrue(terms != null && terms.length == 3 && freqs != null && freqs.length == 3);
     for (int i = 0; i < terms.length; i++) {
-      String term = terms[i];
+      String term = terms[i].utf8ToString();
       int freq = freqs[i];
       assertTrue(DocHelper.FIELD_2_TEXT.indexOf(term) != -1);
       assertTrue(freq > 0);

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java Mon Jul  5 08:33:25 2010
@@ -516,8 +516,8 @@ public class TestStressIndexing2 extends
         System.out.println("v1=" + v1 + " v2=" + v2 + " i=" + i + " of " + d1.length);
       assertEquals(v1.size(), v2.size());
       int numTerms = v1.size();
-      String[] terms1 = v1.getTerms();
-      String[] terms2 = v2.getTerms();
+      BytesRef[] terms1 = v1.getTerms();
+      BytesRef[] terms2 = v2.getTerms();
       int[] freq1 = v1.getTermFrequencies();
       int[] freq2 = v2.getTermFrequencies();
       for(int j=0;j<numTerms;j++) {

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java Mon Jul  5 08:33:25 2010
@@ -32,6 +32,7 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.store.MockRAMDirectory;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 
 public class TestTermVectorsReader extends LuceneTestCase {
@@ -170,11 +171,11 @@ public class TestTermVectorsReader exten
     for (int j = 0; j < 5; j++) {
       TermFreqVector vector = reader.get(j, testFields[0]);
       assertTrue(vector != null);
-      String[] terms = vector.getTerms();
+      BytesRef[] terms = vector.getTerms();
       assertTrue(terms != null);
       assertTrue(terms.length == testTerms.length);
       for (int i = 0; i < terms.length; i++) {
-        String term = terms[i];
+        String term = terms[i].utf8ToString();
         //System.out.println("Term: " + term);
         assertTrue(term.equals(testTerms[i]));
       }
@@ -184,14 +185,14 @@ public class TestTermVectorsReader exten
   public void testPositionReader() throws IOException {
     TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
     TermPositionVector vector;
-    String[] terms;
+    BytesRef[] terms;
     vector = (TermPositionVector) reader.get(0, testFields[0]);
     assertTrue(vector != null);
     terms = vector.getTerms();
     assertTrue(terms != null);
     assertTrue(terms.length == testTerms.length);
     for (int i = 0; i < terms.length; i++) {
-      String term = terms[i];
+      String term = terms[i].utf8ToString();
       //System.out.println("Term: " + term);
       assertTrue(term.equals(testTerms[i]));
       int[] positions = vector.getTermPositions(i);
@@ -217,7 +218,7 @@ public class TestTermVectorsReader exten
     assertTrue(terms != null);
     assertTrue(terms.length == testTerms.length);
     for (int i = 0; i < terms.length; i++) {
-      String term = terms[i];
+      String term = terms[i].utf8ToString();
       //System.out.println("Term: " + term);
       assertTrue(term.equals(testTerms[i]));
     }
@@ -227,11 +228,11 @@ public class TestTermVectorsReader exten
     TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos);
     TermPositionVector vector = (TermPositionVector) reader.get(0, testFields[0]);
     assertTrue(vector != null);
-    String[] terms = vector.getTerms();
+    BytesRef[] terms = vector.getTerms();
     assertTrue(terms != null);
     assertTrue(terms.length == testTerms.length);
     for (int i = 0; i < terms.length; i++) {
-      String term = terms[i];
+      String term = terms[i].utf8ToString();
       //System.out.println("Term: " + term);
       assertTrue(term.equals(testTerms[i]));
       int[] positions = vector.getTermPositions(i);
@@ -413,7 +414,7 @@ public class TestTermVectorsReader exten
     }
 
     @Override
-    public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+    public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
       if (documentNumber == -1) {
         throw new RuntimeException("Documentnumber should be set at this point!");
       }

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TermInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TermInfosWriter.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TermInfosWriter.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TermInfosWriter.java Mon Jul  5 08:33:25 2010
@@ -76,7 +76,6 @@ final class TermInfosWriter {
   private int lastFieldNumber = -1;
 
   private TermInfosWriter other;
-  private BytesRef utf8Result = new BytesRef(10);
 
   TermInfosWriter(Directory directory, String segment, FieldInfos fis,
                   int interval)
@@ -106,8 +105,7 @@ final class TermInfosWriter {
   }
 
   void add(Term term, TermInfo ti) throws IOException {
-    UnicodeUtil.UTF16toUTF8(term.text(), 0, term.text().length(), utf8Result);
-    add(fieldInfos.fieldNumber(term.field()), utf8Result.bytes, utf8Result.length, ti);
+    add(fieldInfos.fieldNumber(term.field()), term.bytes().bytes, term.bytes().length, ti);
   }
 
   // Currently used only by assert statements

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java Mon Jul  5 08:33:25 2010
@@ -30,25 +30,6 @@ import org.junit.Test;
 
 public class TestSurrogates extends LuceneTestCaseJ4 {
 
-  // like Term, but uses BytesRef for text
-  private static class FieldAndText implements Comparable<FieldAndText> {
-    String field;
-    BytesRef text;
-
-    public FieldAndText(Term t) {
-      field = t.field();
-      text = new BytesRef(t.text());
-    }
-    
-    public int compareTo(FieldAndText other) {
-      if (other.field == field) {
-        return text.compareTo(other.text);
-      } else {
-        return field.compareTo(other.field);
-      }
-    }
-  }
-
   // chooses from a very limited alphabet to exacerbate the
   // surrogate seeking required
   private static String makeDifficultRandomUnicodeString(Random r) {
@@ -76,7 +57,7 @@ public class TestSurrogates extends Luce
     return new String(buffer, 0, end);
   }
 
-  private SegmentInfo makePreFlexSegment(Random r, String segName, Directory dir, FieldInfos fieldInfos, Codec codec, List<FieldAndText> fieldTerms) throws IOException {
+  private SegmentInfo makePreFlexSegment(Random r, String segName, Directory dir, FieldInfos fieldInfos, Codec codec, List<Term> fieldTerms) throws IOException {
 
     final int numField = _TestUtil.nextInt(r, 2, 5);
 
@@ -110,11 +91,14 @@ public class TestSurrogates extends Luce
     fieldInfos.write(dir, segName);
 
     // sorts in UTF16 order, just like preflex:
-    Collections.sort(terms);
+    Collections.sort(terms, new Comparator<Term>() {
+      public int compare(Term o1, Term o2) {
+        return o1.compareToUTF16(o2);
+      }
+    });
 
     TermInfosWriter w = new TermInfosWriter(dir, segName, fieldInfos, 128);
     TermInfo ti = new TermInfo();
-    BytesRef utf8 = new BytesRef(10);
     String lastText = null;
     int uniqueTermCount = 0;
     if (VERBOSE) {
@@ -127,23 +111,22 @@ public class TestSurrogates extends Luce
       if (lastText != null && lastText.equals(text)) {
         continue;
       }
-      fieldTerms.add(new FieldAndText(t));
+      fieldTerms.add(t);
       uniqueTermCount++;
       lastText = text;
-      UnicodeUtil.UTF16toUTF8(text, 0, text.length(), utf8);
 
       if (VERBOSE) {
         System.out.println("  " + toHexString(t));
       }
-      w.add(fi.number, utf8.bytes, utf8.length, ti);
+      w.add(fi.number, t.bytes().bytes, t.bytes().length, ti);
     }
     w.close();
 
     Collections.sort(fieldTerms);
     if (VERBOSE) {
       System.out.println("\nTEST: codepoint order");
-      for(FieldAndText t: fieldTerms) {
-        System.out.println("  " + t.field + ":" + UnicodeUtil.toHexString(t.text.utf8ToString()));
+      for(Term t: fieldTerms) {
+        System.out.println("  " + t.field() + ":" + toHexString(t));
       }
     }
 
@@ -166,7 +149,7 @@ public class TestSurrogates extends Luce
 
     Random r = newRandom();
     FieldInfos fieldInfos = new FieldInfos();
-    List<FieldAndText> fieldTerms = new ArrayList<FieldAndText>();
+    List<Term> fieldTerms = new ArrayList<Term>();
     SegmentInfo si = makePreFlexSegment(r, "_0", dir, fieldInfos, codec, fieldTerms);
 
     // hack alert!!
@@ -188,8 +171,8 @@ public class TestSurrogates extends Luce
       BytesRef text;
       BytesRef lastText = null;
       while((text = termsEnum.next()) != null) {
-        UnicodeUtil.UTF8toUTF16(text.bytes, text.offset, text.length, utf16);
         if (VERBOSE) {
+          UnicodeUtil.UTF8toUTF16(text.bytes, text.offset, text.length, utf16);
           System.out.println("got term=" + field + ":" + UnicodeUtil.toHexString(new String(utf16.result, 0, utf16.length)));
           System.out.println();
         }
@@ -199,8 +182,8 @@ public class TestSurrogates extends Luce
           assertTrue(lastText.compareTo(text) < 0);
           lastText.copy(text);
         }
-        assertEquals(fieldTerms.get(termCount).field, field);
-        assertEquals(fieldTerms.get(termCount).text, text);
+        assertEquals(fieldTerms.get(termCount).field(), field);
+        assertEquals(fieldTerms.get(termCount).bytes(), text);
         termCount++;
       }
       if (VERBOSE) {

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java Mon Jul  5 08:33:25 2010
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.*;
@@ -175,11 +176,11 @@ class MultiThreadTermVectorsReader imple
   
   private void verifyVectors(TermFreqVector[] vectors, int num) {
     StringBuilder temp = new StringBuilder();
-    String[] terms = null;
+    BytesRef[] terms = null;
     for (int i = 0; i < vectors.length; i++) {
       terms = vectors[i].getTerms();
       for (int z = 0; z < terms.length; z++) {
-        temp.append(terms[z]);
+        temp.append(terms[z].utf8ToString());
       }
     }
     

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestQueryTermVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestQueryTermVector.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestQueryTermVector.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestQueryTermVector.java Mon Jul  5 08:33:25 2010
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.analysis.MockAnalyzer;
 
@@ -28,12 +29,14 @@ public class TestQueryTermVector extends
   }
 
   public void testConstructor() {
-    String [] queryTerm = {"foo", "bar", "foo", "again", "foo", "bar", "go", "go", "go"};
+    BytesRef [] queryTerm = {new BytesRef("foo"), new BytesRef("bar"), new BytesRef("foo"), 
+        new BytesRef("again"), new BytesRef("foo"), new BytesRef("bar"), new BytesRef("go"),
+        new BytesRef("go"), new BytesRef("go")};
     //Items are sorted lexicographically
-    String [] gold = {"again", "bar", "foo", "go"};
+    BytesRef [] gold = {new BytesRef("again"), new BytesRef("bar"), new BytesRef("foo"), new BytesRef("go")};
     int [] goldFreqs = {1, 2, 3, 3};
     QueryTermVector result = new QueryTermVector(queryTerm);
-    String [] terms = result.getTerms();
+    BytesRef [] terms = result.getTerms();
     assertTrue(terms.length == 4);
     int [] freq = result.getTermFrequencies();
     assertTrue(freq.length == 4);
@@ -49,7 +52,7 @@ public class TestQueryTermVector extends
     checkGold(terms, gold, freq, goldFreqs);
   }
 
-  private void checkGold(String[] terms, String[] gold, int[] freq, int[] goldFreqs) {
+  private void checkGold(BytesRef[] terms, BytesRef[] gold, int[] freq, int[] goldFreqs) {
     for (int i = 0; i < terms.length; i++) {
       assertTrue(terms[i].equals(gold[i]));
       assertTrue(freq[i] == goldFreqs[i]);

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermVectors.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermVectors.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermVectors.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermVectors.java Mon Jul  5 08:33:25 2010
@@ -17,6 +17,7 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
@@ -123,11 +124,11 @@ public class TestTermVectors extends Luc
     for(int i=0;i<v.length;i++) {
       TermPositionVector posVec = (TermPositionVector) v[i];
       assertEquals(expectedFields[i], posVec.getField());
-      String[] terms = posVec.getTerms();
+      BytesRef[] terms = posVec.getTerms();
       assertEquals(3, terms.length);
-      assertEquals("content", terms[0]);
-      assertEquals("here", terms[1]);
-      assertEquals("some", terms[2]);
+      assertEquals("content", terms[0].utf8ToString());
+      assertEquals("here", terms[1].utf8ToString());
+      assertEquals("some", terms[2].utf8ToString());
       for(int j=0;j<3;j++) {
         int[] positions = posVec.getTermPositions(j);
         assertEquals(1, positions.length);
@@ -156,7 +157,7 @@ public class TestTermVectors extends Luc
         
         if(shouldBePosVector || shouldBeOffVector){
           TermPositionVector posVec = (TermPositionVector)vector[0];
-          String [] terms = posVec.getTerms();
+          BytesRef [] terms = posVec.getTerms();
           assertTrue(terms != null && terms.length > 0);
           
           for (int j = 0; j < terms.length; j++) {
@@ -184,7 +185,7 @@ public class TestTermVectors extends Luc
           }
           catch(ClassCastException ignore){
             TermFreqVector freqVec = vector[0];
-            String [] terms = freqVec.getTerms();
+            BytesRef [] terms = freqVec.getTerms();
             assertTrue(terms != null && terms.length > 0);
           }
           
@@ -277,11 +278,11 @@ public class TestTermVectors extends Luc
             //float coord = sim.coord()
             //System.out.println("TF: " + tf + " IDF: " + idf + " LenNorm: " + lNorm);
             assertTrue(vector != null);
-            String[] vTerms = vector.getTerms();
+            BytesRef[] vTerms = vector.getTerms();
             int [] freqs = vector.getTermFrequencies();
             for (int i = 0; i < vTerms.length; i++)
               {
-                if (text.equals(vTerms[i]))
+                if (text.equals(vTerms[i].utf8ToString()))
                   {
                     assertTrue(freqs[i] == freq);
                   }
@@ -306,11 +307,11 @@ public class TestTermVectors extends Luc
       TermFreqVector vector = knownSearcher.reader.getTermFreqVector(hits[1].doc, "field");
       assertTrue(vector != null);
       //System.out.println("Vector: " + vector);
-      String[] terms = vector.getTerms();
+      BytesRef[] terms = vector.getTerms();
       int [] freqs = vector.getTermFrequencies();
       assertTrue(terms != null && terms.length == 10);
       for (int i = 0; i < terms.length; i++) {
-        String term = terms[i];
+        String term = terms[i].utf8ToString();
         //System.out.println("Term: " + term);
         int freq = freqs[i];
         assertTrue(test4.indexOf(term) != -1);
@@ -327,7 +328,7 @@ public class TestTermVectors extends Luc
         if (tve != null && last != null)
         {
           assertTrue("terms are not properly sorted", last.getFrequency() >= tve.getFrequency());
-          Integer expectedFreq =  test4Map.get(tve.getTerm());
+          Integer expectedFreq =  test4Map.get(tve.getTerm().utf8ToString());
           //we expect double the expectedFreq, since there are two fields with the exact same text and we are collapsing all fields
           assertTrue("Frequency is not correct:", tve.getFrequency() == 2*expectedFreq.intValue());
         }
@@ -421,9 +422,9 @@ public class TestTermVectors extends Luc
     assertTrue(vector.length == 1);
     TermPositionVector tfv = (TermPositionVector) vector[0];
     assertTrue(tfv.getField().equals("field"));
-    String[] terms = tfv.getTerms();
+    BytesRef[] terms = tfv.getTerms();
     assertEquals(1, terms.length);
-    assertEquals(terms[0], "one");
+    assertEquals(terms[0].utf8ToString(), "one");
     assertEquals(5, tfv.getTermFrequencies()[0]);
 
     int[] positions = tfv.getTermPositions(0);
@@ -447,7 +448,7 @@ public class TestTermVectors extends Luc
     }
 
     @Override
-    public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+    public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
 
     }
   }

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java Mon Jul  5 08:33:25 2010
@@ -265,7 +265,7 @@ public class LukeRequestHandler extends 
           if( v != null ) {
             SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>();
             for( int i=0; i<v.size(); i++ ) {
-              tfv.add( v.getTerms()[i], v.getTermFrequencies()[i] );
+              tfv.add( v.getTerms()[i].utf8ToString(), v.getTermFrequencies()[i] );
             }
             f.add( "termVector", tfv );
           }

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java Mon Jul  5 08:33:25 2010
@@ -292,9 +292,9 @@ public class TermVectorComponent extends
       this.reader = reader;
     }
 
-    public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+    public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
       NamedList termInfo = new NamedList();
-        fieldNL.add(term, termInfo);
+        fieldNL.add(term.utf8ToString(), termInfo);
         if (fieldOptions.termFreq == true) {
           termInfo.add("tf", frequency);
         }
@@ -323,14 +323,14 @@ public class TermVectorComponent extends
         }
     }
 
-    private int getDocFreq(String term) {
+    private int getDocFreq(BytesRef term) {
       int result = 1;
       currentTerm = currentTerm.createTerm(term);
       try {
         Terms terms = MultiFields.getTerms(reader, currentTerm.field());
         if (terms != null) {
           TermsEnum termsEnum = terms.iterator();
-          if (termsEnum.seek(new BytesRef(term)) == TermsEnum.SeekStatus.FOUND) {
+          if (termsEnum.seek(term) == TermsEnum.SeekStatus.FOUND) {
             result = termsEnum.docFreq();
           }
         }

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java Mon Jul  5 08:33:25 2010
@@ -256,7 +256,7 @@ public class UnInvertedField {
           deState.termsEnum = te.tenum;
           deState.reuse = te.docsEnum;
         }
-        DocSet set = searcher.getDocSet(new TermQuery(new Term(ti.field, topTerm.term.utf8ToString())), deState);
+        DocSet set = searcher.getDocSet(new TermQuery(new Term(ti.field, topTerm.term)), deState);
         te.docsEnum = deState.reuse;
 
         maxTermCounts[termNum] = set.size();
@@ -514,7 +514,7 @@ public class UnInvertedField {
       for (TopTerm tt : bigTerms.values()) {
         // TODO: counts could be deferred if sorted==false
         if (tt.termNum >= startTerm && tt.termNum < endTerm) {
-          counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(ti.field, tt.term.utf8ToString())), docs);
+          counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(ti.field, tt.term)), docs);
         }
       }
 
@@ -712,7 +712,7 @@ public class UnInvertedField {
     for (TopTerm tt : bigTerms.values()) {
       // TODO: counts could be deferred if sorted==false
       if (tt.termNum >= 0 && tt.termNum < numTermsInField) {
-        final Term t = new Term(ti.field, tt.term.utf8ToString());
+        final Term t = new Term(ti.field, tt.term);
         if (finfo.length == 0) {
           counts[tt.termNum] = searcher.numDocs(new TermQuery(t), docs);
         } else {

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java Mon Jul  5 08:33:25 2010
@@ -480,7 +480,7 @@ public class SolrIndexSearcher extends I
     if (fields == null) return -1;
     Terms terms = fields.terms(t.field());
     if (terms == null) return -1;
-    BytesRef termBytes = new BytesRef(t.text());
+    BytesRef termBytes = t.bytes();
     DocsEnum docs = terms.docs(MultiFields.getDeletedDocs(reader), termBytes, null);
     if (docs == null) return -1;
     int id = docs.nextDoc();
@@ -754,7 +754,7 @@ public class SolrIndexSearcher extends I
           
           Fields fields = sir.fields();
           Terms terms = fields.terms(t.field());
-          BytesRef termBytes = new BytesRef(t.text());
+          BytesRef termBytes = t.bytes();
           
           Bits skipDocs = sir.getDeletedDocs();
           DocsEnum docsEnum = terms==null ? null : terms.docs(skipDocs, termBytes, null);

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/update/DirectUpdateHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/update/DirectUpdateHandler.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/update/DirectUpdateHandler.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/update/DirectUpdateHandler.java Mon Jul  5 08:33:25 2010
@@ -118,7 +118,7 @@ public class DirectUpdateHandler extends
     DocsEnum tdocs = MultiFields.getTermDocsEnum(ir,
                                                  MultiFields.getDeletedDocs(ir),
                                                  idTerm.field(),
-                                                 new BytesRef(idTerm.text()));
+                                                 idTerm.bytes());
     if (tdocs != null) {
       return tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS;
     } else {



Mime
View raw message