lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r960484 [1/2] - in /lucene/dev/trunk: lucene/ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/ lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/ lucene/contrib/instantiated/src/java/org/apa...
Date Mon, 05 Jul 2010 08:33:27 GMT
Author: uschindler
Date: Mon Jul  5 08:33:25 2010
New Revision: 960484

URL: http://svn.apache.org/viewvc?rev=960484&view=rev
Log:
LUCENE-2514: Term is no longer character based

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
    lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
    lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
    lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
    lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
    lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermFreqVector.java
    lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
    lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java
    lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java
    lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
    lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java
    lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldSortedTermVectorMapper.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PositionBasedTermVectorMapper.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentTermPositionVector.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentTermVector.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SortedTermVectorMapper.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermFreqVector.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorEntry.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorMapper.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermBuffer.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixQuery.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/PrefixTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/QueryTermVector.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/SingleTermsEnum.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/TermQuery.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/PagedBytes.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloads.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPositionBasedTermVectorMapper.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TermInfosWriter.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/codecs/preflex/TestSurrogates.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestQueryTermVector.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermVectors.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/update/DirectUpdateHandler.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Jul  5 08:33:25 2010
@@ -102,6 +102,15 @@ Changes in backwards compatibility polic
 
 API Changes
 
+* LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer
+  required to be character based. Lucene views a term as an arbitrary byte[]:
+  during analysis, character-based terms are converted to UTF8 byte[],
+  but analyzers are free to directly create terms as byte[]
+  (NumericField does this, for example).  The term data is buffered as
+  byte[] during indexing, written as byte[] into the terms dictionary,
+  and iterated as byte[] (wrapped in a BytesRef) by IndexReader for
+  searching.
+
 * LUCENE-1458, LUCENE-2111: IndexReader now directly exposes its
   deleted docs (getDeletedDocs), providing a new Bits interface to
   directly query by doc ID.
@@ -147,15 +156,6 @@ New features
   standard codec), and int block (really a "base" for using
   block-based compressors like PForDelta for storing postings data).
 
-* LUCENE-2302, LUCENE-1458, LUCENE-2111: Terms are no longer required
-  to be character based.  Lucene views a term as an arbitrary byte[]:
-  during analysis, character-based terms are converted to UTF8 byte[],
-  but analyzers are free to directly create terms as byte[]
-  (NumericField does this, for example).  The term data is buffered as
-  byte[] during indexing, written as byte[] into the terms dictionary,
-  and iterated as byte[] (wrapped in a BytesRef) by IndexReader for
-  searching.
-
 * LUCENE-2385: Moved NoDeletionPolicy from benchmark to core. NoDeletionPolicy
   can be used to prevent commits from ever getting deleted from the index.
   (Shai Erera)

Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java Mon Jul  5 08:33:25 2010
@@ -36,6 +36,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.TermFreqVector;
 import org.apache.lucene.index.TermPositionVector;
 import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.lucene.util.BytesRef;
 
 /**
  * Hides implementation issues associated with obtaining a TokenStream for use
@@ -176,7 +177,7 @@ public class TokenSources {
       }
     }
     // code to reconstruct the original sequence of Tokens
-    String[] terms = tpv.getTerms();
+    BytesRef[] terms = tpv.getTerms();
     int[] freq = tpv.getTermFrequencies();
     int totalTokens = 0;
 
@@ -204,7 +205,7 @@ public class TokenSources {
           unsortedTokens = new ArrayList<Token>();
         }
         for (int tp = 0; tp < offsets.length; tp++) {
-          Token token = new Token(terms[t], offsets[tp].getStartOffset(), offsets[tp]
+          Token token = new Token(terms[t].utf8ToString(), offsets[tp].getStartOffset(), offsets[tp]
               .getEndOffset());
           unsortedTokens.add(token);
         }
@@ -220,7 +221,7 @@ public class TokenSources {
         // tokens stored with positions - can use this to index straight into
         // sorted array
         for (int tp = 0; tp < pos.length; tp++) {
-          Token token = new Token(terms[t], offsets[tp].getStartOffset(),
+          Token token = new Token(terms[t].utf8ToString(), offsets[tp].getStartOffset(),
               offsets[tp].getEndOffset());
           tokensInOriginalOrder[pos[tp]] = token;
         }

Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java Mon Jul  5 08:33:25 2010
@@ -30,6 +30,7 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.index.TermPositionVector;
 import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.lucene.util.BytesRef;
 
 public final class TokenStreamFromTermPositionVector extends TokenStream {
 
@@ -54,18 +55,18 @@ public final class TokenStreamFromTermPo
     termAttribute = addAttribute(CharTermAttribute.class);
     positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
     offsetAttribute = addAttribute(OffsetAttribute.class);
-    final String[] terms = termPositionVector.getTerms();
+    final BytesRef[] terms = termPositionVector.getTerms();
     for (int i = 0; i < terms.length; i++) {
       final TermVectorOffsetInfo[] offsets = termPositionVector.getOffsets(i);
       final int[] termPositions = termPositionVector.getTermPositions(i);
       for (int j = 0; j < termPositions.length; j++) {
         Token token;
         if (offsets != null) {
-          token = new Token(terms[i].toCharArray(), 0, terms[i].length(),
+          token = new Token(terms[i].utf8ToString(),
               offsets[j].getStartOffset(), offsets[j].getEndOffset());
         } else {
           token = new Token();
-          token.setEmpty().append(terms[i]);
+          token.setEmpty().append(terms[i].utf8ToString());
         }
         // Yes - this is the position, not the increment! This is for
         // sorting. This value

Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java Mon Jul  5 08:33:25 2010
@@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.TermFreqVector;
 import org.apache.lucene.index.TermPositionVector;
 import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.lucene.util.BytesRef;
 
 /**
  * <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
@@ -80,15 +81,15 @@ public class FieldTermStack {
     // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
     if( termSet == null ) return;
     
-    for( String term : tpv.getTerms() ){
-      if( !termSet.contains( term ) ) continue;
+    for( BytesRef term : tpv.getTerms() ){
+      if( !termSet.contains( term.utf8ToString() ) ) continue;
       int index = tpv.indexOf( term );
       TermVectorOffsetInfo[] tvois = tpv.getOffsets( index );
       if( tvois == null ) return; // just return to make null snippets
       int[] poss = tpv.getTermPositions( index );
       if( poss == null ) return; // just return to make null snippets
       for( int i = 0; i < tvois.length; i++ )
-        termList.add( new TermInfo( term, tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
+        termList.add( new TermInfo( term.utf8ToString(), tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
     }
     
     // sort by position

Modified: lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (original)
+++ lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java Mon Jul  5 08:33:25 2010
@@ -290,7 +290,7 @@ public class InstantiatedIndex
           TermPositionVector termPositionVector = (TermPositionVector) sourceIndexReader.getTermFreqVector(document.getDocumentNumber(), field.name());
           if (termPositionVector != null) {
             for (int i = 0; i < termPositionVector.getTerms().length; i++) {
-              String token = termPositionVector.getTerms()[i];
+              String token = termPositionVector.getTerms()[i].utf8ToString();
               InstantiatedTerm term = findTerm(field.name(), token);
               InstantiatedTermDocumentInformation termDocumentInformation = term.getAssociatedDocument(document.getDocumentNumber());
               termDocumentInformation.setTermOffsets(termPositionVector.getOffsets(i));

Modified: lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (original)
+++ lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java Mon Jul  5 08:33:25 2010
@@ -464,7 +464,7 @@ public class InstantiatedIndexReader ext
       List<InstantiatedTermDocumentInformation> tv = doc.getVectorSpace().get(field);
       mapper.setExpectations(field, tv.size(), true, true);
       for (InstantiatedTermDocumentInformation tdi : tv) {
-        mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
+        mapper.map(tdi.getTerm().getTerm().bytes(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
       }
     }
   }
@@ -475,7 +475,7 @@ public class InstantiatedIndexReader ext
     for (Map.Entry<String, List<InstantiatedTermDocumentInformation>> e : doc.getVectorSpace().entrySet()) {
       mapper.setExpectations(e.getKey(), e.getValue().size(), true, true);
       for (InstantiatedTermDocumentInformation tdi : e.getValue()) {
-        mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
+        mapper.map(tdi.getTerm().getTerm().bytes(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
       }
     }
   }

Modified: lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermFreqVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermFreqVector.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermFreqVector.java (original)
+++ lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermFreqVector.java Mon Jul  5 08:33:25 2010
@@ -1,6 +1,7 @@
 package org.apache.lucene.store.instantiated;
 
 import org.apache.lucene.index.TermFreqVector;
+import org.apache.lucene.util.BytesRef;
 
 import java.io.Serializable;
 import java.util.Arrays;
@@ -34,18 +35,18 @@ public class InstantiatedTermFreqVector
 
   private final List<InstantiatedTermDocumentInformation> termDocumentInformations;
   private final String field;
-  private final String terms[];
+  private final BytesRef terms[];
   private final int termFrequencies[];
 
   public InstantiatedTermFreqVector(InstantiatedDocument document, String field) {
     this.field = field;
     termDocumentInformations = document.getVectorSpace().get(field);
-    terms = new String[termDocumentInformations.size()];
+    terms = new BytesRef[termDocumentInformations.size()];
     termFrequencies = new int[termDocumentInformations.size()];
 
     for (int i = 0; i < termDocumentInformations.size(); i++) {
       InstantiatedTermDocumentInformation termDocumentInformation = termDocumentInformations.get(i);
-      terms[i] = termDocumentInformation.getTerm().text();
+      terms[i] = termDocumentInformation.getTerm().getTerm().bytes();
       termFrequencies[i] = termDocumentInformation.getTermPositions().length;
     }
   }
@@ -77,7 +78,7 @@ public class InstantiatedTermFreqVector
     return terms == null ? 0 : terms.length;
   }
 
-  public String[] getTerms() {
+  public BytesRef[] getTerms() {
     return terms;
   }
 
@@ -85,14 +86,14 @@ public class InstantiatedTermFreqVector
     return termFrequencies;
   }
 
-  public int indexOf(String termText) {
+  public int indexOf(BytesRef termText) {
     if (terms == null)
       return -1;
     int res = Arrays.binarySearch(terms, termText);
     return res >= 0 ? res : -1;
   }
 
-  public int[] indexesOf(String[] termNumbers, int start, int len) {
+  public int[] indexesOf(BytesRef[] termNumbers, int start, int len) {
     // TODO: there must be a more efficient way of doing this.
     //       At least, we could advance the lower bound of the terms array
     //       as we find valid indices. Also, it might be possible to leverage

Modified: lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Mon Jul  5 08:33:25 2010
@@ -41,14 +41,14 @@ public class InstantiatedTermsEnum exten
 
   @Override
   public SeekStatus seek(BytesRef text, boolean useCache) {
-    final Term t = new Term(field, text.utf8ToString());
+    final Term t = new Term(field, text);
     int loc = Arrays.binarySearch(terms, t, InstantiatedTerm.termComparator);
     if (loc < 0) {
       upto = -loc - 1;
       if (upto >= terms.length) {
         return SeekStatus.END;
       } else {
-        br.copy(terms[upto].getTerm().text());
+        br.copy(terms[upto].getTerm().bytes());
         return SeekStatus.NOT_FOUND;
       }
     } else {

Modified: lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Mon Jul  5 08:33:25 2010
@@ -207,7 +207,7 @@ public class MemoryIndex implements Seri
       if (o1 instanceof Map.Entry<?,?>) o1 = ((Map.Entry<?,?>) o1).getKey();
       if (o2 instanceof Map.Entry<?,?>) o2 = ((Map.Entry<?,?>) o2).getKey();
       if (o1 == o2) return 0;
-      return ((String) o1).compareTo((String) o2);
+      return ((Comparable) o1).compareTo((Comparable) o2);
     }
   };
 
@@ -341,21 +341,19 @@ public class MemoryIndex implements Seri
       if (fields.get(fieldName) != null)
         throw new IllegalArgumentException("field must not be added more than once");
       
-      HashMap<String,ArrayIntList> terms = new HashMap<String,ArrayIntList>();
+      HashMap<BytesRef,ArrayIntList> terms = new HashMap<BytesRef,ArrayIntList>();
       int numTokens = 0;
       int numOverlapTokens = 0;
       int pos = -1;
       
-      TermToBytesRefAttribute termAtt = stream.addAttribute(TermToBytesRefAttribute.class);
+      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
       PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
       OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
       BytesRef ref = new BytesRef(10);
       stream.reset();
       while (stream.incrementToken()) {
         termAtt.toBytesRef(ref);
-        // TODO: support non-UTF8 strings (like numerics) here
-        String term = ref.utf8ToString();
-        if (term.length() == 0) continue; // nothing to do
+        if (ref.length == 0) continue; // nothing to do
 //        if (DEBUG) System.err.println("token='" + term + "'");
         numTokens++;
         final int posIncr = posIncrAttribute.getPositionIncrement();
@@ -363,10 +361,10 @@ public class MemoryIndex implements Seri
           numOverlapTokens++;
         pos += posIncr;
         
-        ArrayIntList positions = terms.get(term);
+        ArrayIntList positions = terms.get(ref);
         if (positions == null) { // term not seen before
           positions = new ArrayIntList(stride);
-          terms.put(term, positions);
+          terms.put(new BytesRef(ref), positions);
         }
         if (stride == 1) {
           positions.add(pos);
@@ -490,9 +488,10 @@ public class MemoryIndex implements Seri
       
       int len = info.terms.size();
       size += VM.sizeOfHashMap(len);
-      Iterator<Map.Entry<String,ArrayIntList>> iter2 = info.terms.entrySet().iterator();
+      Iterator<Map.Entry<BytesRef,ArrayIntList>> iter2 = info.terms.entrySet().iterator();
       while (--len >= 0) { // for each term
-        Map.Entry<String,ArrayIntList> e = iter2.next();
+        Map.Entry<BytesRef,ArrayIntList> e = iter2.next();
+        // FIXME: this calculation is probably not correct since we use bytes now.
         size += VM.sizeOfObject(PTR + 3*INT); // assumes substring() memory overlay
 //        size += STR + 2 * ((String) e.getKey()).length();
         ArrayIntList positions = e.getValue();
@@ -534,7 +533,7 @@ public class MemoryIndex implements Seri
   public String toString() {
     StringBuilder result = new StringBuilder(256);    
     sortFields();   
-    int sumChars = 0;
+    int sumBytes = 0;
     int sumPositions = 0;
     int sumTerms = 0;
     
@@ -545,32 +544,32 @@ public class MemoryIndex implements Seri
       info.sortTerms();
       result.append(fieldName + ":\n");
       
-      int numChars = 0;
+      int numBytes = 0;
       int numPositions = 0;
       for (int j=0; j < info.sortedTerms.length; j++) {
-        Map.Entry<String,ArrayIntList> e = info.sortedTerms[j];
-        String term = e.getKey();
+        Map.Entry<BytesRef,ArrayIntList> e = info.sortedTerms[j];
+        BytesRef term = e.getKey();
         ArrayIntList positions = e.getValue();
         result.append("\t'" + term + "':" + numPositions(positions) + ":");
         result.append(positions.toString(stride)); // ignore offsets
         result.append("\n");
         numPositions += numPositions(positions);
-        numChars += term.length();
+        numBytes += term.length;
       }
       
       result.append("\tterms=" + info.sortedTerms.length);
       result.append(", positions=" + numPositions);
-      result.append(", Kchars=" + (numChars/1000.0f));
+      result.append(", Kbytes=" + (numBytes/1000.0f));
       result.append("\n");
       sumPositions += numPositions;
-      sumChars += numChars;
+      sumBytes += numBytes;
       sumTerms += info.sortedTerms.length;
     }
     
     result.append("\nfields=" + sortedFields.length);
     result.append(", terms=" + sumTerms);
     result.append(", positions=" + sumPositions);
-    result.append(", Kchars=" + (sumChars/1000.0f));
+    result.append(", Kbytes=" + (sumBytes/1000.0f));
     return result.toString();
   }
   
@@ -588,10 +587,10 @@ public class MemoryIndex implements Seri
      * Term strings and their positions for this field: Map <String
      * termText, ArrayIntList positions>
      */
-    private final HashMap<String,ArrayIntList> terms; 
+    private final HashMap<BytesRef,ArrayIntList> terms; 
     
     /** Terms sorted ascending by term text; computed on demand */
-    private transient Map.Entry<String,ArrayIntList>[] sortedTerms;
+    private transient Map.Entry<BytesRef,ArrayIntList>[] sortedTerms;
     
     /** Number of added tokens for this field */
     private final int numTokens;
@@ -607,7 +606,7 @@ public class MemoryIndex implements Seri
 
     private static final long serialVersionUID = 2882195016849084649L;  
 
-    public Info(HashMap<String,ArrayIntList> terms, int numTokens, int numOverlapTokens, float boost) {
+    public Info(HashMap<BytesRef,ArrayIntList> terms, int numTokens, int numOverlapTokens, float boost) {
       this.terms = terms;
       this.numTokens = numTokens;
       this.numOverlapTokens = numOverlapTokens;
@@ -627,7 +626,7 @@ public class MemoryIndex implements Seri
     }
         
     /** note that the frequency can be calculated as numPosition(getPositions(x)) */
-    public ArrayIntList getPositions(String term) {
+    public ArrayIntList getPositions(BytesRef term) {
       return terms.get(term);
     }
 
@@ -759,7 +758,7 @@ public class MemoryIndex implements Seri
     public int docFreq(Term term) {
       Info info = getInfo(term.field());
       int freq = 0;
-      if (info != null) freq = info.getPositions(term.text()) != null ? 1 : 0;
+      if (info != null) freq = info.getPositions(term.bytes()) != null ? 1 : 0;
       if (DEBUG) System.err.println("MemoryIndexReader.docFreq: " + term + ", freq:" + freq);
       return freq;
     }
@@ -833,8 +832,7 @@ public class MemoryIndex implements Seri
 
       @Override
       public SeekStatus seek(BytesRef text, boolean useCache) {
-        final String s = text.utf8ToString();
-        termUpto = Arrays.binarySearch(info.sortedTerms, s, termComparator);
+        termUpto = Arrays.binarySearch(info.sortedTerms, text, termComparator);
         if (termUpto < 0) { // not found; choose successor
           termUpto = -termUpto -1;
           if (termUpto >= info.sortedTerms.length) {
@@ -1061,7 +1059,7 @@ public class MemoryIndex implements Seri
       
       return new TermPositionVector() { 
   
-        private final Map.Entry<String,ArrayIntList>[] sortedTerms = info.sortedTerms;
+        private final Map.Entry<BytesRef,ArrayIntList>[] sortedTerms = info.sortedTerms;
         
         public String getField() {
           return fieldName;
@@ -1071,8 +1069,8 @@ public class MemoryIndex implements Seri
           return sortedTerms.length;
         }
   
-        public String[] getTerms() {
-          String[] terms = new String[sortedTerms.length];
+        public BytesRef[] getTerms() {
+          BytesRef[] terms = new BytesRef[sortedTerms.length];
           for (int i=sortedTerms.length; --i >= 0; ) {
             terms[i] = sortedTerms[i].getKey();
           }
@@ -1087,12 +1085,12 @@ public class MemoryIndex implements Seri
           return freqs;
         }
   
-        public int indexOf(String term) {
+        public int indexOf(BytesRef term) {
           int i = Arrays.binarySearch(sortedTerms, term, termComparator);
           return i >= 0 ? i : -1;
         }
   
-        public int[] indexesOf(String[] terms, int start, int len) {
+        public int[] indexesOf(BytesRef[] terms, int start, int len) {
           int[] indexes = new int[len];
           for (int i=0; i < len; i++) {
             indexes[i] = indexOf(terms[start++]);

Modified: lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java Mon Jul  5 08:33:25 2010
@@ -69,7 +69,7 @@ public class TermVectorAccessor {
   }
 
   /** Instance reused to save garbage collector some time */
-  private List<String> tokens;
+  private List<BytesRef> tokens;
 
   /** Instance reused to save garbage collector some time */
   private List<int[]> positions;
@@ -91,7 +91,7 @@ public class TermVectorAccessor {
   private void build(IndexReader indexReader, String field, TermVectorMapper mapper, int documentNumber) throws IOException {
 
     if (tokens == null) {
-      tokens = new ArrayList<String>(500);
+      tokens = new ArrayList<BytesRef>(500);
       positions = new ArrayList<int[]>(500);
       frequencies = new ArrayList<Integer>(500);
     } else {
@@ -122,7 +122,7 @@ public class TermVectorAccessor {
           if (docID == documentNumber) {
 
             frequencies.add(Integer.valueOf(docs.freq()));
-            tokens.add(text.utf8ToString());
+            tokens.add(new BytesRef(text));
 
             if (!mapper.isIgnoringPositions()) {
               int[] positions = new int[docs.freq()];
@@ -173,7 +173,7 @@ public class TermVectorAccessor {
     }
 
     @Override
-    public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+    public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
       decorated.map(term, frequency, offsets, positions);
     }
 

Modified: lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java Mon Jul  5 08:33:25 2010
@@ -76,21 +76,21 @@ public class TestTermVectorAccessor exte
       mapper = new ParallelArrayTermVectorMapper();
       accessor.accept(ir, i, "a", mapper);
       tfv = mapper.materializeVector();
-      assertEquals("doc " + i, "a", tfv.getTerms()[0]);
+      assertEquals("doc " + i, "a", tfv.getTerms()[0].utf8ToString());
       assertEquals("doc " + i, 8, tfv.getTermFrequencies()[0]);
 
       mapper = new ParallelArrayTermVectorMapper();
       accessor.accept(ir, i, "b", mapper);
       tfv = mapper.materializeVector();
       assertEquals("doc " + i, 8, tfv.getTermFrequencies().length);
-      assertEquals("doc " + i, "b", tfv.getTerms()[1]);
+      assertEquals("doc " + i, "b", tfv.getTerms()[1].utf8ToString());
       assertEquals("doc " + i, 7, tfv.getTermFrequencies()[1]);
 
       mapper = new ParallelArrayTermVectorMapper();
       accessor.accept(ir, i, "c", mapper);
       tfv = mapper.materializeVector();
       assertEquals("doc " + i, 8, tfv.getTermFrequencies().length);
-      assertEquals("doc " + i, "c", tfv.getTerms()[2]);
+      assertEquals("doc " + i, "c", tfv.getTerms()[2].utf8ToString());
       assertEquals("doc " + i, 7, tfv.getTermFrequencies()[2]);
 
       mapper = new ParallelArrayTermVectorMapper();

Modified: lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java (original)
+++ lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java Mon Jul  5 08:33:25 2010
@@ -213,7 +213,7 @@ public class FuzzyLikeThisQuery extends 
                         totalVariantDocFreqs+=fe.docFreq();
                         float score=boostAtt.getBoost();
                         if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore){
-                          ScoreTerm st=new ScoreTerm(new Term(startTerm.field(), possibleMatch.utf8ToString()),score,startTerm);                    
+                          ScoreTerm st=new ScoreTerm(new Term(startTerm.field(), new BytesRef(possibleMatch)),score,startTerm);                    
                           variantsQ.insertWithOverflow(st);
                           minScore = variantsQ.top().score; // maintain minScore
                         }

Modified: lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java (original)
+++ lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/TermsFilter.java Mon Jul  5 08:33:25 2010
@@ -77,7 +77,7 @@ public class TermsFilter extends Filter
         }
 
         if (terms != null) {
-          br.copy(term.text());
+          br.copy(term.bytes());
           if (termsEnum.seek(br) == TermsEnum.SeekStatus.FOUND) {
             docs = termsEnum.docs(delDocs, docs);
             while(docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {

Modified: lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java (original)
+++ lucene/dev/trunk/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java Mon Jul  5 08:33:25 2010
@@ -47,6 +47,7 @@ import org.apache.lucene.search.Similari
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.PriorityQueue;
 
 
@@ -848,10 +849,10 @@ public final class MoreLikeThis {
 	 */
 	private void addTermFrequencies(Map<String,Int> termFreqMap, TermFreqVector vector)
 	{
-		String[] terms = vector.getTerms();
+		BytesRef[] terms = vector.getTerms();
 		int freqs[]=vector.getTermFrequencies();
 		for (int j = 0; j < terms.length; j++) {
-		    String term = terms[j];
+		    String term = terms[j].utf8ToString();
 		
 			if(isNoiseWord(term)){
 				continue;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Mon Jul  5 08:33:25 2010
@@ -1073,7 +1073,6 @@ final class DocumentsWriter {
       TermsEnum termsEnum = null;
         
       String currentField = null;
-      BytesRef termRef = new BytesRef();
       DocsEnum docs = null;
         
       for (Entry<Term, BufferedDeletes.Num> entry: deletesFlushed.terms.entrySet()) {
@@ -1097,9 +1096,7 @@ final class DocumentsWriter {
         }
         assert checkDeleteTerm(term);
           
-        termRef.copy(term.text());
-          
-        if (termsEnum.seek(termRef, false) == TermsEnum.SeekStatus.FOUND) {
+        if (termsEnum.seek(term.bytes(), false) == TermsEnum.SeekStatus.FOUND) {
           DocsEnum docsEnum = termsEnum.docs(reader.getDeletedDocs(), docs);
             
           if (docsEnum != null) {
@@ -1166,7 +1163,7 @@ final class DocumentsWriter {
       num.setNum(docIDUpto);
     deletesInRAM.numTerms++;
 
-    deletesInRAM.addBytesUsed(BYTES_PER_DEL_TERM + term.text.length()*CHAR_NUM_BYTE);
+    deletesInRAM.addBytesUsed(BYTES_PER_DEL_TERM + term.bytes.length);
   }
 
   // Buffer a specific docID for deletion.  Currently only

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldSortedTermVectorMapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldSortedTermVectorMapper.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldSortedTermVectorMapper.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldSortedTermVectorMapper.java Mon Jul  5 08:33:25 2010
@@ -2,6 +2,8 @@ package org.apache.lucene.index;
 
 import java.util.*;
 
+import org.apache.lucene.util.BytesRef;
+
 /**
  * Copyright 2007 The Apache Software Foundation
  * <p/>
@@ -44,7 +46,7 @@ public class FieldSortedTermVectorMapper
   }
 
   @Override
-  public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+  public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
     TermVectorEntry entry = new TermVectorEntry(currentField, term, frequency, offsets, positions);
     currentSet.add(entry);
   }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java Mon Jul  5 08:33:25 2010
@@ -883,7 +883,7 @@ public abstract class IndexReader implem
   public abstract Fields fields() throws IOException;
 
   public int docFreq(Term term) throws IOException {
-    return docFreq(term.field(), new BytesRef(term.text()));
+    return docFreq(term.field(), term.bytes());
   }
 
   /** Returns the number of documents containing the term
@@ -1000,7 +1000,7 @@ public abstract class IndexReader implem
     DocsEnum docs = MultiFields.getTermDocsEnum(this,
                                                 MultiFields.getDeletedDocs(this),
                                                 term.field(),
-                                                new BytesRef(term.text()));
+                                                term.bytes());
     if (docs == null) return 0;
     int n = 0;
     int doc;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PositionBasedTermVectorMapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PositionBasedTermVectorMapper.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PositionBasedTermVectorMapper.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PositionBasedTermVectorMapper.java Mon Jul  5 08:33:25 2010
@@ -21,6 +21,8 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.lucene.util.BytesRef;
+
 /**
  * For each Field, store position by position information.  It ignores frequency information
  * <p/>
@@ -69,7 +71,7 @@ public class PositionBasedTermVectorMapp
    * @param positions
    */
   @Override
-  public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+  public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
     for (int i = 0; i < positions.length; i++) {
       Integer posVal = Integer.valueOf(positions[i]);
       TVPositionInfo pos = currentPositions.get(posVal);
@@ -120,20 +122,20 @@ public class PositionBasedTermVectorMapp
   public static class TVPositionInfo{
     private int position;
 
-    private List<String> terms;
+    private List<BytesRef> terms;
 
     private List<TermVectorOffsetInfo> offsets;
 
 
     public TVPositionInfo(int position, boolean storeOffsets) {
       this.position = position;
-      terms = new ArrayList<String>();
+      terms = new ArrayList<BytesRef>();
       if (storeOffsets) {
         offsets = new ArrayList<TermVectorOffsetInfo>();
       }
     }
 
-    void addTerm(String term, TermVectorOffsetInfo info)
+    void addTerm(BytesRef term, TermVectorOffsetInfo info)
     {
       terms.add(term);
       if (offsets != null) {
@@ -151,9 +153,9 @@ public class PositionBasedTermVectorMapp
 
     /**
      * Note, there may be multiple terms at the same position
-     * @return A List of Strings
+     * @return A List of BytesRefs
      */
-    public List<String> getTerms() {
+    public List<BytesRef> getTerms() {
       return terms;
     }
 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentTermPositionVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentTermPositionVector.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentTermPositionVector.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentTermPositionVector.java Mon Jul  5 08:33:25 2010
@@ -1,5 +1,7 @@
 package org.apache.lucene.index;
 
+import org.apache.lucene.util.BytesRef;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -22,7 +24,7 @@ class SegmentTermPositionVector extends 
   protected TermVectorOffsetInfo[][] offsets;
   public static final int[] EMPTY_TERM_POS = new int[0];
   
-  public SegmentTermPositionVector(String field, String terms[], int termFreqs[], int[][] positions, TermVectorOffsetInfo[][] offsets) {
+  public SegmentTermPositionVector(String field, BytesRef terms[], int termFreqs[], int[][] positions, TermVectorOffsetInfo[][] offsets) {
     super(field, terms, termFreqs);
     this.offsets = offsets;
     this.positions = positions;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentTermVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentTermVector.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentTermVector.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentTermVector.java Mon Jul  5 08:33:25 2010
@@ -19,13 +19,15 @@ package org.apache.lucene.index;
 
 import java.util.*;
 
+import org.apache.lucene.util.BytesRef;
+
 
 class SegmentTermVector implements TermFreqVector {
   private String field;
-  private String terms[];
+  private BytesRef terms[];
   private int termFreqs[];
   
-  SegmentTermVector(String field, String terms[], int termFreqs[]) {
+  SegmentTermVector(String field, BytesRef terms[], int termFreqs[]) {
     this.field = field;
     this.terms = terms;
     this.termFreqs = termFreqs;
@@ -59,7 +61,7 @@ class SegmentTermVector implements TermF
     return terms == null ? 0 : terms.length;
   }
 
-  public String [] getTerms() {
+  public BytesRef [] getTerms() {
     return terms;
   }
 
@@ -67,14 +69,14 @@ class SegmentTermVector implements TermF
     return termFreqs;
   }
 
-  public int indexOf(String termText) {
+  public int indexOf(BytesRef termBytes) {
     if(terms == null)
       return -1;
-    int res = Arrays.binarySearch(terms, termText);
+    int res = Arrays.binarySearch(terms, termBytes);
     return res >= 0 ? res : -1;
   }
 
-  public int[] indexesOf(String [] termNumbers, int start, int len) {
+  public int[] indexesOf(BytesRef [] termNumbers, int start, int len) {
     // TODO: there must be a more efficient way of doing this.
     //       At least, we could advance the lower bound of the terms array
     //       as we find valid indexes. Also, it might be possible to leverage

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SortedTermVectorMapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SortedTermVectorMapper.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SortedTermVectorMapper.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SortedTermVectorMapper.java Mon Jul  5 08:33:25 2010
@@ -17,6 +17,8 @@ package org.apache.lucene.index;
 
 import java.util.*;
 
+import org.apache.lucene.util.BytesRef;
+
 /**
  * Store a sorted collection of {@link org.apache.lucene.index.TermVectorEntry}s.  Collects all term information
  * into a single, SortedSet.
@@ -30,7 +32,7 @@ public class SortedTermVectorMapper exte
 
 
   private SortedSet<TermVectorEntry> currentSet;
-  private Map<String,TermVectorEntry> termToTVE = new HashMap<String,TermVectorEntry>();
+  private Map<BytesRef,TermVectorEntry> termToTVE = new HashMap<BytesRef,TermVectorEntry>();
   private boolean storeOffsets;
   private boolean storePositions;
   /**
@@ -61,7 +63,7 @@ public class SortedTermVectorMapper exte
    */
   //We need to combine any previous mentions of the term
   @Override
-  public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+  public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
     TermVectorEntry entry =  termToTVE.get(term);
     if (entry == null) {
       entry = new TermVectorEntry(ALL, term, frequency, 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/Term.java Mon Jul  5 08:33:25 2010
@@ -17,6 +17,9 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
+import java.util.Comparator;
+
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.StringHelper;
 
 /**
@@ -29,14 +32,26 @@ import org.apache.lucene.util.StringHelp
 
 public final class Term implements Comparable<Term>, java.io.Serializable {
   String field;
-  String text;
+  BytesRef bytes;
 
+  /** Constructs a Term with the given field and bytes.
+   * <p>Note that a null field or null bytes value results in undefined
+   * behavior for most Lucene APIs that accept a Term parameter. 
+   * <p>WARNING: the provided BytesRef is not copied, but used directly.
+   * Therefore the bytes should not be modified after construction, for
+   * example, you should clone a copy rather than pass reused bytes from
+   * a TermsEnum.
+   */
+  public Term(String fld, BytesRef bytes) {
+    field = fld == null ? null : StringHelper.intern(fld);
+    this.bytes = bytes;
+  }
+  
   /** Constructs a Term with the given field and text.
    * <p>Note that a null field or null text value results in undefined
    * behavior for most Lucene APIs that accept a Term parameter. */
-  public Term(String fld, String txt) {
-    field = fld == null ? null : StringHelper.intern(fld);
-    text = txt;
+  public Term(String fld, String text) {
+    this(fld, new BytesRef(text));
   }
 
   /** Constructs a Term with the given field and empty text.
@@ -46,15 +61,27 @@ public final class Term implements Compa
    * @param fld
    */
   public Term(String fld) {
-    this(fld, "", true);
+    this(fld, new BytesRef(), true);
   }
 
-  /** @lucene.experimental */
-  public Term(String fld, String txt, boolean intern) {
+  /** 
+   * WARNING: the provided BytesRef is not copied, but used directly.
+   * Therefore the bytes should not be modified after construction, for
+   * example, you should clone a copy rather than pass reused bytes from
+   * a TermsEnum.
+   * 
+   * @lucene.experimental 
+   */
+  public Term(String fld, BytesRef bytes, boolean intern) {
     field = intern ? StringHelper.intern(fld) : fld;	  // field names are interned
-    text = txt;					          // unless already known to be
+    this.bytes = bytes;					          // unless already known to be
   }
 
+  /** @lucene.experimental */
+  public Term(String fld, String text, boolean intern) {
+    this(fld, new BytesRef(text), intern);
+  }
+  
   /** Returns the field of this term, an interned string.   The field indicates
     the part of a document which this term came from. */
   public final String field() { return field; }
@@ -62,8 +89,26 @@ public final class Term implements Compa
   /** Returns the text of this term.  In the case of words, this is simply the
     text of the word.  In the case of dates and other types, this is an
     encoding of the object as a string.  */
-  public final String text() { return text; }
-  
+  public final String text() { return bytes.utf8ToString(); }
+
+  /** Returns the bytes of this term. */
+  public final BytesRef bytes() { return bytes; }
+
+  /**
+   * Optimized construction of new Terms by reusing same field as this Term
+   * - avoids field.intern() overhead 
+   * <p>WARNING: the provided BytesRef is not copied, but used directly.
+   * Therefore the bytes should not be modified after construction, for
+   * example, you should clone a copy rather than pass reused bytes from
+   * a TermsEnum.
+   * @param text The bytes of the new term (field is implicitly same as this Term instance)
+   * @return A new Term
+   */
+  public Term createTerm(BytesRef bytes)
+  {
+      return new Term(field,bytes,false);
+  }
+
   /**
    * Optimized construction of new Terms by reusing same field as this Term
    * - avoids field.intern() overhead 
@@ -89,10 +134,10 @@ public final class Term implements Compa
         return false;
     } else if (!field.equals(other.field))
       return false;
-    if (text == null) {
-      if (other.text != null)
+    if (bytes == null) {
+      if (other.bytes != null)
         return false;
-    } else if (!text.equals(other.text))
+    } else if (!bytes.equals(other.bytes))
       return false;
     return true;
   }
@@ -102,7 +147,7 @@ public final class Term implements Compa
     final int prime = 31;
     int result = 1;
     result = prime * result + ((field == null) ? 0 : field.hashCode());
-    result = prime * result + ((text == null) ? 0 : text.hashCode());
+    result = prime * result + ((bytes == null) ? 0 : bytes.hashCode());
     return result;
   }
 
@@ -113,19 +158,47 @@ public final class Term implements Compa
     The ordering of terms is first by field, then by text.*/
   public final int compareTo(Term other) {
     if (field == other.field)			  // fields are interned
-      return text.compareTo(other.text);
+      return bytes.compareTo(other.bytes);
+    else
+      return field.compareTo(other.field);
+  }
+  
+  @Deprecated
+  private static final Comparator<BytesRef> legacyComparator = 
+    BytesRef.getUTF8SortedAsUTF16Comparator();
+
+  /** 
+   * @deprecated For internal backwards compatibility use only
+   * @lucene.internal
+   */
+  @Deprecated
+  public final int compareToUTF16(Term other) {
+    if (field == other.field) // fields are interned
+      return legacyComparator.compare(this.bytes, other.bytes);
     else
       return field.compareTo(other.field);
   }
 
+  /** 
+   * Resets the field and text of a Term. 
+   * <p>WARNING: the provided BytesRef is not copied, but used directly.
+   * Therefore the bytes should not be modified after construction, for
+   * example, you should clone a copy rather than pass reused bytes from
+   * a TermsEnum.
+   */
+  final void set(String fld, BytesRef bytes) {
+    field = fld;
+    this.bytes = bytes;
+  }
+
   /** Resets the field and text of a Term. */
   final void set(String fld, String txt) {
     field = fld;
-    text = txt;
+    this.bytes = new BytesRef(txt);
   }
 
   @Override
-  public final String toString() { return field + ":" + text; }
+  public final String toString() { return field + ":" + bytes.utf8ToString(); }
 
   private void readObject(java.io.ObjectInputStream in)
     throws java.io.IOException, ClassNotFoundException

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermFreqVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermFreqVector.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermFreqVector.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermFreqVector.java Mon Jul  5 08:33:25 2010
@@ -1,5 +1,7 @@
 package org.apache.lucene.index;
 
+import org.apache.lucene.util.BytesRef;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -38,7 +40,7 @@ public interface TermFreqVector {
   /** 
    * @return An Array of term texts in ascending order.
    */
-  public String[] getTerms();
+  public BytesRef[] getTerms();
 
 
   /** Array of term frequencies. Locations of the array correspond one to one
@@ -54,7 +56,7 @@ public interface TermFreqVector {
    *  <code>term</code> appears. If this term does not appear in the array,
    *  return -1.
    */
-  public int indexOf(String term);
+  public int indexOf(BytesRef term);
 
 
   /** Just like <code>indexOf(int)</code> but searches for a number of terms
@@ -66,6 +68,6 @@ public interface TermFreqVector {
    *  @param start index in the array where the list of terms starts
    *  @param len the number of terms in the list
    */
-  public int[] indexesOf(String[] terms, int start, int len);
+  public int[] indexesOf(BytesRef[] terms, int start, int len);
 
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorEntry.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorEntry.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorEntry.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorEntry.java Mon Jul  5 08:33:25 2010
@@ -1,5 +1,7 @@
 package org.apache.lucene.index;
 
+import org.apache.lucene.util.BytesRef;
+
 /**
  * Copyright 2007 The Apache Software Foundation
  * <p/>
@@ -21,7 +23,7 @@ package org.apache.lucene.index;
  */
 public class TermVectorEntry {
   private String field;
-  private String term;
+  private BytesRef term;
   private int frequency;
   private TermVectorOffsetInfo [] offsets;
   int [] positions;
@@ -30,7 +32,7 @@ public class TermVectorEntry {
   public TermVectorEntry() {
   }
 
-  public TermVectorEntry(String field, String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+  public TermVectorEntry(String field, BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
     this.field = field;
     this.term = term;
     this.frequency = frequency;
@@ -55,7 +57,7 @@ public class TermVectorEntry {
     return positions;
   }
 
-  public String getTerm() {
+  public BytesRef getTerm() {
     return term;
   }
 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorMapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorMapper.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorMapper.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorMapper.java Mon Jul  5 08:33:25 2010
@@ -1,4 +1,7 @@
 package org.apache.lucene.index;
+
+import org.apache.lucene.util.BytesRef;
+
 /**
  * Copyright 2007 The Apache Software Foundation
  *
@@ -62,7 +65,7 @@ public abstract class TermVectorMapper {
    * @param offsets null if the offset is not specified, otherwise the offset into the field of the term
    * @param positions null if the position is not specified, otherwise the position in the field of the term
    */
-  public abstract void map(String term, int frequency, TermVectorOffsetInfo [] offsets, int [] positions);
+  public abstract void map(BytesRef term, int frequency, TermVectorOffsetInfo [] offsets, int [] positions);
 
   /**
    * Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and they

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsReader.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsReader.java Mon Jul  5 08:33:25 2010
@@ -21,6 +21,7 @@ import org.apache.lucene.store.BufferedI
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
 
 import java.io.IOException;
 import java.util.Arrays;
@@ -415,14 +416,15 @@ class TermVectorsReader implements Clone
       deltaLength = tvf.readVInt();
       totalLength = start + deltaLength;
 
-      final String term;
+      final BytesRef term = new BytesRef(totalLength);
       
       // Term stored as utf8 bytes
       if (byteBuffer.length < totalLength) {
         byteBuffer = ArrayUtil.grow(byteBuffer, totalLength);
       }
       tvf.readBytes(byteBuffer, start, deltaLength);
-      term = new String(byteBuffer, 0, totalLength, "UTF-8");
+      System.arraycopy(byteBuffer, 0, term.bytes, 0, totalLength);
+      term.length = totalLength;
       int freq = tvf.readVInt();
       int [] positions = null;
       if (storePositions) { //read in the positions
@@ -491,7 +493,7 @@ class TermVectorsReader implements Clone
 class ParallelArrayTermVectorMapper extends TermVectorMapper
 {
 
-  private String[] terms;
+  private BytesRef[] terms;
   private int[] termFreqs;
   private int positions[][];
   private TermVectorOffsetInfo offsets[][];
@@ -503,7 +505,7 @@ class ParallelArrayTermVectorMapper exte
   @Override
   public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
     this.field = field;
-    terms = new String[numTerms];
+    terms = new BytesRef[numTerms];
     termFreqs = new int[numTerms];
     this.storingOffsets = storeOffsets;
     this.storingPositions = storePositions;
@@ -514,7 +516,7 @@ class ParallelArrayTermVectorMapper exte
   }
 
   @Override
-  public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+  public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
     terms[currentPosition] = term;
     termFreqs[currentPosition] = frequency;
     if (storingOffsets)

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java Mon Jul  5 08:33:25 2010
@@ -21,7 +21,6 @@ import org.apache.lucene.store.Directory
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.UnicodeUtil;
 
 import java.io.IOException;
 
@@ -29,7 +28,6 @@ final class TermVectorsWriter {
   
   private IndexOutput tvx = null, tvd = null, tvf = null;
   private FieldInfos fieldInfos;
-  final BytesRef[] utf8Results = new BytesRef[] {new BytesRef(10), new BytesRef(10)};
 
   public TermVectorsWriter(Directory directory, String segment,
                            FieldInfos fieldInfos)
@@ -97,25 +95,19 @@ final class TermVectorsWriter {
 
         tvf.writeVInt(bits);
 
-        final String[] terms = vectors[i].getTerms();
+        final BytesRef[] terms = vectors[i].getTerms();
         final int[] freqs = vectors[i].getTermFrequencies();
 
-        int utf8Upto = 0;
-        utf8Results[1].length = 0;
-
         for (int j=0; j<numTerms; j++) {
-
-          UnicodeUtil.UTF16toUTF8(terms[j], 0, terms[j].length(), utf8Results[utf8Upto]);
           
-          int start = StringHelper.bytesDifference(utf8Results[1-utf8Upto].bytes,
-                                                   utf8Results[1-utf8Upto].length,
-                                                   utf8Results[utf8Upto].bytes,
-                                                   utf8Results[utf8Upto].length);
-          int length = utf8Results[utf8Upto].length - start;
+          int start = j == 0 ? 0 : StringHelper.bytesDifference(terms[j-1].bytes,
+                                                   terms[j-1].length,
+                                                   terms[j].bytes,
+                                                   terms[j].length);
+          int length = terms[j].length - start;
           tvf.writeVInt(start);       // write shared prefix length
           tvf.writeVInt(length);        // write delta length
-          tvf.writeBytes(utf8Results[utf8Upto].bytes, start, length);  // write delta bytes
-          utf8Upto = 1-utf8Upto;
+          tvf.writeBytes(terms[j].bytes, start, length);  // write delta bytes
 
           final int termFreq = freqs[j];
 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Mon Jul  5 08:33:25 2010
@@ -237,7 +237,6 @@ public class PreFlexFields extends Field
     private FieldInfo fieldInfo;
     private boolean skipNext;
     private BytesRef current;
-    private final BytesRef scratchBytesRef = new BytesRef();
 
     private int[] surrogateSeekPending = new int[1];
     private boolean[] surrogateDidSeekBack = new boolean[1];
@@ -319,7 +318,8 @@ public class PreFlexFields extends Field
           assert pendingPrefix != null;
           assert pendingPrefix.length > seekPrefix;
           pendingPrefix[seekPrefix] = UnicodeUtil.UNI_SUR_HIGH_START;
-          Term t2 = protoTerm.createTerm(new String(pendingPrefix, 0, 1+seekPrefix));
+          pendingPrefix[1+seekPrefix] = UnicodeUtil.UNI_SUR_LOW_START;
+          Term t2 = protoTerm.createTerm(new BytesRef(pendingPrefix, 0, 2+seekPrefix));
           if (DEBUG_SURROGATES) {
             System.out.println("    do pop; seek back to " + UnicodeUtil.toHexString(t2.text()));
           }
@@ -334,7 +334,7 @@ public class PreFlexFields extends Field
           assert pendingPrefix != null;
           assert pendingPrefix.length > seekPrefix;
           pendingPrefix[seekPrefix] = 0xffff;
-          Term t2 = protoTerm.createTerm(new String(pendingPrefix, 0, 1+seekPrefix));
+          Term t2 = protoTerm.createTerm(new BytesRef(pendingPrefix, 0, 1+seekPrefix));
           if (DEBUG_SURROGATES) {
             System.out.println("    finish pop; seek fwd to " + UnicodeUtil.toHexString(t2.text()));
           }
@@ -358,6 +358,9 @@ public class PreFlexFields extends Field
       return false;
     }
 
+    private UnicodeUtil.UTF16Result termBuffer = new UnicodeUtil.UTF16Result();
+    private UnicodeUtil.UTF16Result seekBuffer = new UnicodeUtil.UTF16Result();
+    
     private boolean pushNewSurrogate() throws IOException {
       if (DEBUG_SURROGATES) {
         System.out.println("  check push newSuffix=" + newSuffixStart + " stack=" + getStack());
@@ -366,11 +369,12 @@ public class PreFlexFields extends Field
       if (t == null || t.field() != fieldInfo.name) {
         return false;
       }
-      final String text = t.text();
-      final int textLen = text.length();
 
-      for(int i=Math.max(0,newSuffixStart);i<textLen;i++) {
-        final char ch = text.charAt(i);
+      final BytesRef bytes = t.bytes();
+      UnicodeUtil.UTF8toUTF16(bytes.bytes, bytes.offset, bytes.length, termBuffer);
+
+      for(int i=Math.max(0,newSuffixStart);i<termBuffer.length;i++) {
+        final char ch = termBuffer.result[i];
         if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && ch <= UnicodeUtil.UNI_SUR_HIGH_END && (surrogateSeekUpto == 0 || i > surrogateSeekPending[surrogateSeekUpto-1])) {
 
           if (DEBUG_SURROGATES) {
@@ -385,24 +389,27 @@ public class PreFlexFields extends Field
           // surrogate range; if so, we must first iterate
           // them, then seek back to the surrogates
 
-          char[] testPrefix = new char[i+1];
+          char[] testPrefix = new char[i+2];
           for(int j=0;j<i;j++) {
-            testPrefix[j] = text.charAt(j);
+            testPrefix[j] = termBuffer.result[j];
           }
           testPrefix[i] = 1+UnicodeUtil.UNI_SUR_LOW_END;
 
-          getTermsDict().seekEnum(seekTermEnum, protoTerm.createTerm(new String(testPrefix)));
+          getTermsDict().seekEnum(seekTermEnum, protoTerm.createTerm(new BytesRef(testPrefix, 0, i+1)));
 
           Term t2 = seekTermEnum.term();
           boolean isPrefix;
           if (t2 != null && t2.field() == fieldInfo.name) {
-            String seekText = t2.text();
+
+            final BytesRef seekBytes = t2.bytes();
+            UnicodeUtil.UTF8toUTF16(seekBytes.bytes, seekBytes.offset, seekBytes.length, seekBuffer);
+
             isPrefix = true;
             if (DEBUG_SURROGATES) {
-              System.out.println("      seek found " + UnicodeUtil.toHexString(seekText));
+              System.out.println("      seek found " + UnicodeUtil.toHexString(t2.text()));
             }
             for(int j=0;j<i;j++) {
-              if (testPrefix[j] != seekText.charAt(j)) {
+              if (testPrefix[j] != seekBuffer.result[j]) {
                 isPrefix = false;
                 break;
               }
@@ -481,7 +488,7 @@ public class PreFlexFields extends Field
       }
       skipNext = false;
       final TermInfosReader tis = getTermsDict();
-      final Term t0 = protoTerm.createTerm(term.utf8ToString());
+      final Term t0 = protoTerm.createTerm(term);
 
       assert termEnum != null;
 
@@ -496,13 +503,7 @@ public class PreFlexFields extends Field
 
       final Term t = termEnum.term();
 
-      final BytesRef tr;
-      if (t != null) {
-        tr = scratchBytesRef;
-        scratchBytesRef.copy(t.text());
-      } else {
-        tr = null;
-      }
+      final BytesRef tr = t == null ? null : t.bytes();
 
       if (t != null && t.field() == fieldInfo.name && term.bytesEquals(tr)) {
         current = tr;
@@ -526,8 +527,7 @@ public class PreFlexFields extends Field
         if (termEnum.term() == null) {
           return null;
         } else {
-          scratchBytesRef.copy(termEnum.term().text());
-          return current = scratchBytesRef;
+          return current = termEnum.term().bytes();
         }
       }
       if (termEnum.next() && termEnum.term().field() == fieldInfo.name) {
@@ -541,8 +541,7 @@ public class PreFlexFields extends Field
           assert t == null || !t.field().equals(fieldInfo.name); // make sure fields are in fact interned
           current = null;
         } else {
-          scratchBytesRef.copy(t.text());
-          current = scratchBytesRef;
+          current = t.bytes();
         }
         return current;
       } else {
@@ -557,8 +556,7 @@ public class PreFlexFields extends Field
           assert t == null || !t.field().equals(fieldInfo.name); // make sure fields are in fact interned
           return null;
         } else {
-          scratchBytesRef.copy(t.text());
-          current = scratchBytesRef;
+          current = t.bytes();
           return current;
         }
       }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermBuffer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermBuffer.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermBuffer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermBuffer.java Mon Jul  5 08:33:25 2010
@@ -96,10 +96,9 @@ final class TermBuffer implements Clonea
       reset();
       return;
     }
-    final String termText = term.text();
-    final int termLen = termText.length();
-    text.setLength(termLen);
-    termText.getChars(0, termLen, text.result, 0);
+    
+    final BytesRef termBytes = term.bytes();
+    UnicodeUtil.UTF8toUTF16(termBytes.bytes, termBytes.offset, termBytes.length, text);
     dirty = true;
     field = term.field();
     this.term = term;
@@ -124,7 +123,7 @@ final class TermBuffer implements Clonea
       return null;
 
     if (term == null)
-      term = new Term(field, new String(text.result, 0, text.length), false);
+      term = new Term(field, new BytesRef(text.result, 0, text.length), false);
 
     return term;
   }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java Mon Jul  5 08:33:25 2010
@@ -189,7 +189,7 @@ public final class TermInfosReader {
 
     while (hi >= lo) {
       int mid = (lo + hi) >>> 1;
-      int delta = term.compareTo(indexTerms[mid]);
+      int delta = term.compareToUTF16(indexTerms[mid]);
       if (delta < 0)
 	hi = mid - 1;
       else if (delta > 0)
@@ -234,17 +234,17 @@ public final class TermInfosReader {
 
     // optimize sequential access: first try scanning cached enum w/o seeking
     if (enumerator.term() != null                 // term is at or past current
-	&& ((enumerator.prev() != null && term.compareTo(enumerator.prev())> 0)
-	    || term.compareTo(enumerator.term()) >= 0)) {
+	&& ((enumerator.prev() != null && term.compareToUTF16(enumerator.prev())> 0)
+	    || term.compareToUTF16(enumerator.term()) >= 0)) {
       int enumOffset = (int)(enumerator.position/totalIndexInterval)+1;
       if (indexTerms.length == enumOffset	  // but before end of block
-    || term.compareTo(indexTerms[enumOffset]) < 0) {
+    || term.compareToUTF16(indexTerms[enumOffset]) < 0) {
        // no need to seek
 
         final TermInfo ti;
 
         int numScans = enumerator.scanTo(term);
-        if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
+        if (enumerator.term() != null && term.compareToUTF16(enumerator.term()) == 0) {
           ti = enumerator.termInfo();
           if (numScans > 1) {
             // we only  want to put this TermInfo into the cache if
@@ -279,7 +279,7 @@ public final class TermInfosReader {
     seekEnum(enumerator, indexPos);
     enumerator.scanTo(term);
     final TermInfo ti;
-    if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
+    if (enumerator.term() != null && term.compareToUTF16(enumerator.term()) == 0) {
       ti = enumerator.termInfo();
       if (tiOrd == null) {
         termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, (int) enumerator.position));
@@ -328,9 +328,9 @@ public final class TermInfosReader {
     SegmentTermEnum enumerator = getThreadResources().termEnum;
     seekEnum(enumerator, indexOffset);
 
-    while(term.compareTo(enumerator.term()) > 0 && enumerator.next()) {}
+    while(term.compareToUTF16(enumerator.term()) > 0 && enumerator.next()) {}
 
-    if (term.compareTo(enumerator.term()) == 0)
+    if (term.compareToUTF16(enumerator.term()) == 0)
       return enumerator.position;
     else
       return -1;

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java Mon Jul  5 08:33:25 2010
@@ -95,13 +95,20 @@ import org.apache.lucene.util.BytesRef;
 
 public class FieldCacheTermsFilter extends Filter {
   private String field;
-  private String[] terms;
+  private BytesRef[] terms;
 
-  public FieldCacheTermsFilter(String field, String... terms) {
+  public FieldCacheTermsFilter(String field, BytesRef... terms) {
     this.field = field;
     this.terms = terms;
   }
 
+  public FieldCacheTermsFilter(String field, String... terms) {
+    this.field = field;
+    this.terms = new BytesRef[terms.length];
+    for (int i = 0; i < terms.length; i++)
+      this.terms[i] = new BytesRef(terms[i]);
+  }
+
   public FieldCache getFieldCache() {
     return FieldCache.DEFAULT;
   }
@@ -121,7 +128,7 @@ public class FieldCacheTermsFilter exten
       openBitSet = new OpenBitSet(this.fcsi.size());
       final BytesRef spare = new BytesRef();
       for (int i=0;i<terms.length;i++) {
-        int termNumber = this.fcsi.binarySearchLookup(new BytesRef(terms[i]), spare);
+        int termNumber = this.fcsi.binarySearchLookup(terms[i], spare);
         if (termNumber > 0) {
           openBitSet.fastSet(termNumber);
         }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=960484&r1=960483&r2=960484&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java Mon Jul  5 08:33:25 2010
@@ -499,14 +499,13 @@ class UnionDocsAndPositionsEnum extends 
     List<DocsAndPositionsEnum> docsEnums = new LinkedList<DocsAndPositionsEnum>();
     final Bits delDocs = MultiFields.getDeletedDocs(indexReader);
     for (int i = 0; i < terms.length; i++) {
-      final BytesRef text = new BytesRef(terms[i].text());
       DocsAndPositionsEnum postings = indexReader.termPositionsEnum(delDocs,
                                                                     terms[i].field(),
-                                                                    text);
+                                                                    terms[i].bytes());
       if (postings != null) {
         docsEnums.add(postings);
       } else {
-        if (MultiFields.getTermDocsEnum(indexReader, delDocs, terms[i].field(), text) != null) {
+        if (MultiFields.getTermDocsEnum(indexReader, delDocs, terms[i].field(), terms[i].bytes()) != null) {
           // term does exist, but has no positions
           throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed with Field.omitTermFreqAndPositions=true; cannot run PhraseQuery (term=" + terms[i].text() + ")");
         }



Mime
View raw message