lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r824918 [4/11] - in /lucene/java/branches/flex_1458: contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/ contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/ contrib/benchmark/src/test/org/apache/lucene/benc...
Date Tue, 13 Oct 2009 20:44:59 GMT
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java?rev=824918&r1=824917&r2=824918&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java Tue Oct 13 20:44:51 2009
@@ -26,9 +26,16 @@
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader.FieldOption;
 import org.apache.lucene.index.MergePolicy.MergeAbortedException;
+import org.apache.lucene.index.codecs.Codecs;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.TermsConsumer;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.index.codecs.DocsConsumer;
+import org.apache.lucene.index.codecs.PositionsConsumer;
 
 /**
  * The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
@@ -66,6 +73,9 @@
   /** Maximum number of contiguous documents to bulk-copy
       when merging stored fields */
   private final static int MAX_RAW_MERGE_DOCS = 4192;
+  
+  private final Codecs codecs;
+  private Codec codec;
 
   /** This ctor used only by test code.
    * 
@@ -75,6 +85,7 @@
   SegmentMerger(Directory dir, String name) {
     directory = dir;
     segment = name;
+    codecs = Codecs.getDefault();
     checkAbort = new CheckAbort(null, null) {
       public void work(double units) throws MergeAbortedException {
         // do nothing
@@ -82,8 +93,9 @@
     };
   }
 
-  SegmentMerger(IndexWriter writer, String name, MergePolicy.OneMerge merge) {
+  SegmentMerger(IndexWriter writer, String name, MergePolicy.OneMerge merge, Codecs codecs) {
     directory = writer.getDirectory();
+    this.codecs = codecs;
     segment = name;
     if (merge != null) {
       checkAbort = new CheckAbort(merge, directory);
@@ -169,26 +181,37 @@
     }
   }
 
-  final List createCompoundFile(String fileName)
+  final List createCompoundFile(String fileName) throws IOException {
+    // nocommit -- messy!
+    final SegmentWriteState state = new SegmentWriteState(null, directory, segment, fieldInfos, null, mergedDocs, 0, 0, Codecs.getDefault());
+    return createCompoundFile(fileName, new SegmentInfo(segment, mergedDocs, directory,
+                                                        Codecs.getDefault().getWriter(state)));
+  }
+
+  final List createCompoundFile(String fileName, final SegmentInfo info)
           throws IOException {
-    CompoundFileWriter cfsWriter =
-      new CompoundFileWriter(directory, fileName, checkAbort);
+    CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
 
-    List files =
-      new ArrayList(IndexFileNames.COMPOUND_EXTENSIONS.length + 1);    
-    
-    // Basic files
-    for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.length; i++) {
-      String ext = IndexFileNames.COMPOUND_EXTENSIONS[i];
+    List files = new ArrayList();
 
+    // Basic files
+    for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS_NOT_CODEC.length; i++) {
+      String ext = IndexFileNames.COMPOUND_EXTENSIONS_NOT_CODEC[i];
+       
+      // nocommit
+      /*
       if (ext.equals(IndexFileNames.PROX_EXTENSION) && !hasProx())
         continue;
+        
+      */
 
       if (mergeDocStores || (!ext.equals(IndexFileNames.FIELDS_EXTENSION) &&
-                            !ext.equals(IndexFileNames.FIELDS_INDEX_EXTENSION)))
+                             !ext.equals(IndexFileNames.FIELDS_INDEX_EXTENSION)))
         files.add(segment + "." + ext);
     }
 
+    codec.files(directory, info, files);
+    
     // Fieldable norm files
     for (int i = 0; i < fieldInfos.size(); i++) {
       FieldInfo fi = fieldInfos.fieldInfo(i);
@@ -557,34 +580,40 @@
     }
   }
 
-  private SegmentMergeQueue queue = null;
+  private SegmentFieldMergeQueue fieldsQueue;
+  private SegmentMergeQueue termsQueue;
+  
+  Codec getCodec() {
+    return codec;
+  }
 
   private final void mergeTerms() throws CorruptIndexException, IOException {
 
-    SegmentWriteState state = new SegmentWriteState(null, directory, segment, null, mergedDocs, 0, termIndexInterval);
+    SegmentWriteState state = new SegmentWriteState(null, directory, segment, fieldInfos, null, mergedDocs, 0, termIndexInterval, codecs);
 
-    final FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos);
+    // Let Codecs decide which codec will be used to write
+    // this segment:
+    codec = codecs.getWriter(state);
+    
+    final FieldsConsumer consumer = codec.fieldsConsumer(state);
 
     try {
-      queue = new SegmentMergeQueue(readers.size());
-
+      fieldsQueue = new SegmentFieldMergeQueue(readers.size());
+      termsQueue = new SegmentMergeQueue(readers.size());
       mergeTermInfos(consumer);
-
     } finally {
-      consumer.finish();
-      if (queue != null) queue.close();
+      consumer.close();
     }
   }
 
   boolean omitTermFreqAndPositions;
 
-  private final void mergeTermInfos(final FormatPostingsFieldsConsumer consumer) throws CorruptIndexException, IOException {
+  private final void mergeTermInfos(final FieldsConsumer consumer) throws CorruptIndexException, IOException {
     int base = 0;
     final int readerCount = readers.size();
     for (int i = 0; i < readerCount; i++) {
       IndexReader reader = (IndexReader) readers.get(i);
-      TermEnum termEnum = reader.terms();
-      SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader);
+      SegmentMergeInfo smi = new SegmentMergeInfo(base, reader);
       int[] docMap  = smi.getDocMap();
       if (docMap != null) {
         if (docMaps == null) {
@@ -599,47 +628,76 @@
 
       assert reader.numDocs() == reader.maxDoc() - smi.delCount;
 
-      if (smi.next())
-        queue.add(smi);				  // initialize queue
-      else
-        smi.close();
+      if (smi.nextField()) {
+        fieldsQueue.add(smi);				  // initialize queue
+      } else {
+        // segment is done: it has no fields
+      }
     }
 
     SegmentMergeInfo[] match = new SegmentMergeInfo[readers.size()];
 
-    String currentField = null;
-    FormatPostingsTermsConsumer termsConsumer = null;
+    while (fieldsQueue.size() > 0) {
 
-    while (queue.size() > 0) {
-      int matchSize = 0;			  // pop matching terms
-      match[matchSize++] = (SegmentMergeInfo) queue.pop();
-      Term term = match[0].term;
-      SegmentMergeInfo top = (SegmentMergeInfo) queue.top();
-
-      while (top != null && term.compareTo(top.term) == 0) {
-        match[matchSize++] = (SegmentMergeInfo) queue.pop();
-        top = (SegmentMergeInfo) queue.top();
-      }
-
-      if (currentField != term.field) {
-        currentField = term.field;
-        if (termsConsumer != null)
-          termsConsumer.finish();
-        final FieldInfo fieldInfo = fieldInfos.fieldInfo(currentField);
-        termsConsumer = consumer.addField(fieldInfo);
-        omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
+      while(true) {
+        SegmentMergeInfo smi = (SegmentMergeInfo) fieldsQueue.pop();
+        if (smi.nextTerm()) {
+          termsQueue.add(smi);
+        } else if (smi.nextField()) {
+          // field had no terms
+          fieldsQueue.add(smi);
+        } else {
+          // done with a segment
+        }
+        SegmentMergeInfo top = (SegmentMergeInfo) fieldsQueue.top();
+        if (top == null || (termsQueue.size() > 0 && ((SegmentMergeInfo) termsQueue.top()).field != top.field)) {
+          break;
+        }
       }
+        
+      if (termsQueue.size() > 0) {          
+        // merge one field
+
+        final String field  = ((SegmentMergeInfo) termsQueue.top()).field;
+        if (Codec.DEBUG) {
+          System.out.println("merge field=" + field + " segCount=" + termsQueue.size());
+        }
+        final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+        final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
+        omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
 
-      int df = appendPostings(termsConsumer, match, matchSize);		  // add new TermInfo
+        while(termsQueue.size() > 0) {
+          // pop matching terms
+          int matchSize = 0;
+          while(true) {
+            match[matchSize++] = (SegmentMergeInfo) termsQueue.pop();
+            SegmentMergeInfo top = (SegmentMergeInfo) termsQueue.top();
+            if (top == null || !top.term.termEquals(match[0].term)) {
+              break;
+            }
+          }
+
+          if (Codec.DEBUG) {
+            System.out.println("merge field=" + field + " term=" + match[0].term + " numReaders=" + matchSize);
+          }
+
+          int df = appendPostings(termsConsumer, match, matchSize);
 
-      checkAbort.work(df/3.0);
+          checkAbort.work(df/3.0);
 
-      while (matchSize > 0) {
-        SegmentMergeInfo smi = match[--matchSize];
-        if (smi.next())
-          queue.add(smi);			  // restore queue
-        else
-          smi.close();				  // done with a segment
+          // put SegmentMergeInfos back into repsective queues
+          while (matchSize > 0) {
+            SegmentMergeInfo smi = match[--matchSize];
+            if (smi.nextTerm()) {
+              termsQueue.add(smi);
+            } else if (smi.nextField()) {
+              fieldsQueue.add(smi);
+            } else {
+              // done with a segment
+            }
+          }
+        }
+        termsConsumer.finish();
       }
     }
   }
@@ -653,6 +711,8 @@
   int[] getDelCounts() {
     return delCounts;
   }
+  
+  private final UnicodeUtil.UTF16Result termBuffer = new UnicodeUtil.UTF16Result();
 
   /** Process postings from multiple segments all positioned on the
    *  same term. Writes out merged entries into freqOutput and
@@ -664,45 +724,80 @@
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
    */
-  private final int appendPostings(final FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n)
+  private final int appendPostings(final TermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n)
         throws CorruptIndexException, IOException {
 
-    final FormatPostingsDocsConsumer docConsumer = termsConsumer.addTerm(smis[0].term.text);
+    // nocommit -- maybe cutover TermsConsumer API to
+    // TermRef as well?
+    final TermRef text = smis[0].term;
+    UnicodeUtil.UTF8toUTF16(text.bytes, text.offset, text.length, termBuffer);
+
+    // Make space for terminator
+    final int length = termBuffer.length;
+    termBuffer.setLength(1+termBuffer.length);
+
+    // nocommit -- make this a static final constant somewhere:
+    termBuffer.result[length] = 0xffff;
+
+    final DocsConsumer docConsumer = termsConsumer.startTerm(termBuffer.result, 0);
+
     int df = 0;
     for (int i = 0; i < n; i++) {
+      if (Codec.DEBUG) {
+        System.out.println("    merge reader " + (i+1) + " of " + n + ": term=" + text);
+      }
+
       SegmentMergeInfo smi = smis[i];
-      TermPositions postings = smi.getPositions();
-      assert postings != null;
+      DocsEnum docs = smi.terms.docs(smi.reader.getDeletedDocs());
       int base = smi.base;
       int[] docMap = smi.getDocMap();
-      postings.seek(smi.termEnum);
 
-      while (postings.next()) {
+      while (true) {
+        int startDoc = docs.next();
+        if (startDoc == DocsEnum.NO_MORE_DOCS) {
+          break;
+        }
+        if (Codec.DEBUG) {
+          System.out.println("      merge read doc=" + startDoc);
+        }
+
         df++;
-        int doc = postings.doc();
-        if (docMap != null)
-          doc = docMap[doc];                      // map around deletions
-        doc += base;                              // convert to merged space
+        int doc;
+        if (docMap != null) {
+          // map around deletions
+          doc = docMap[startDoc];
+          assert doc != -1: "postings enum returned deleted docID " + startDoc + " freq=" + docs.freq() + " df=" + df;
+        } else {
+          doc = startDoc;
+        }
 
-        final int freq = postings.freq();
-        final FormatPostingsPositionsConsumer posConsumer = docConsumer.addDoc(doc, freq);
+        doc += base;                              // convert to merged space
+        assert doc < mergedDocs: "doc=" + doc + " maxDoc=" + mergedDocs;
 
+        final int freq = docs.freq();
+        final PositionsConsumer posConsumer = docConsumer.addDoc(doc, freq);
+        final PositionsEnum positions = docs.positions();
+
+        // nocommit -- omitTF should be "private", and this
+        // code (and FreqProxTermsWriter) should instead
+        // check if posConsumer is null?
+        
         if (!omitTermFreqAndPositions) {
           for (int j = 0; j < freq; j++) {
-            final int position = postings.nextPosition();
-            final int payloadLength = postings.getPayloadLength();
+            final int position = positions.next();
+            final int payloadLength = positions.getPayloadLength();
             if (payloadLength > 0) {
               if (payloadBuffer == null || payloadBuffer.length < payloadLength)
                 payloadBuffer = new byte[payloadLength];
-              postings.getPayload(payloadBuffer, 0);
+              positions.getPayload(payloadBuffer, 0);
             }
             posConsumer.addPosition(position, payloadBuffer, 0, payloadLength);
           }
-          posConsumer.finish();
+          posConsumer.finishDoc();
         }
       }
     }
-    docConsumer.finish();
+    termsConsumer.finishTerm(termBuffer.result, 0, df);
 
     return df;
   }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentReader.java?rev=824918&r1=824917&r2=824918&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentReader.java Tue Oct 13 20:44:51 2009
@@ -36,7 +36,16 @@
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BitVector;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.CloseableThreadLocal;
+import org.apache.lucene.util.cache.Cache;
+import org.apache.lucene.util.cache.SimpleLRUCache;
+import org.apache.lucene.index.codecs.Codecs;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.preflex.PreFlexFields;
+import org.apache.lucene.index.codecs.preflex.SegmentTermDocs;
+import org.apache.lucene.index.codecs.preflex.SegmentTermPositions;
+import org.apache.lucene.index.codecs.FieldsProducer;
 
 /** @version $Id */
 /**
@@ -48,6 +57,7 @@
 
   private SegmentInfo si;
   private int readBufferSize;
+  boolean isPreFlex;
 
   CloseableThreadLocal fieldsReaderLocal = new FieldsReaderLocal();
   CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal();
@@ -83,23 +93,35 @@
 
     final String segment;
     final FieldInfos fieldInfos;
-    final IndexInput freqStream;
-    final IndexInput proxStream;
-    final TermInfosReader tisNoIndex;
 
+    final FieldsProducer fields;
+    final boolean isPreFlex;
+    final Codecs codecs;
+    
     final Directory dir;
     final Directory cfsDir;
     final int readBufferSize;
     final int termsIndexDivisor;
 
-    TermInfosReader tis;
     FieldsReader fieldsReaderOrig;
     TermVectorsReader termVectorsReaderOrig;
     CompoundFileReader cfsReader;
     CompoundFileReader storeCFSReader;
 
-    CoreReaders(Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) throws IOException {
+    CoreReaders(Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor, Codecs codecs) throws IOException {
+
+      if (termsIndexDivisor < 1 && termsIndexDivisor != -1) {
+        throw new IllegalArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + termsIndexDivisor);
+      }
+
       segment = si.name;
+      if (Codec.DEBUG) {
+        System.out.println("sr: init core for segment=" + segment);
+      }
+      if (codecs == null) {
+        codecs = Codecs.getDefault();
+      }
+      this.codecs = codecs;      
       this.readBufferSize = readBufferSize;
       this.dir = dir;
 
@@ -116,23 +138,15 @@
         fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
 
         this.termsIndexDivisor = termsIndexDivisor;
-        TermInfosReader reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor);
-        if (termsIndexDivisor == -1) {
-          tisNoIndex = reader;
-        } else {
-          tis = reader;
-          tisNoIndex = null;
-        }
-
-        // make sure that all index files have been read or are kept open
-        // so that if an index update removes them we'll still have them
-        freqStream = cfsDir.openInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize);
 
-        if (fieldInfos.hasProx()) {
-          proxStream = cfsDir.openInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize);
-        } else {
-          proxStream = null;
+        // Ask codec for its Fields
+        if (Codec.DEBUG) {
+          System.out.println("sr.core.init: seg=" + si.name + " codec=" + si.getCodec());
         }
+        fields = si.getCodec().fieldsProducer(cfsDir, fieldInfos, si, readBufferSize, termsIndexDivisor);
+        assert fields != null;
+
+        isPreFlex = fields instanceof PreFlexFields;
         success = true;
       } finally {
         if (!success) {
@@ -157,64 +171,12 @@
       return cfsReader;
     }
 
-    synchronized TermInfosReader getTermsReader() {
-      if (tis != null) {
-        return tis;
-      } else {
-        return tisNoIndex;
-      }
-    }      
-
-    synchronized boolean termsIndexIsLoaded() {
-      return tis != null;
-    }      
-
-    // NOTE: only called from IndexWriter when a near
-    // real-time reader is opened, or applyDeletes is run,
-    // sharing a segment that's still being merged.  This
-    // method is not fully thread safe, and relies on the
-    // synchronization in IndexWriter
-    synchronized void loadTermsIndex(SegmentInfo si, int termsIndexDivisor) throws IOException {
-      if (tis == null) {
-        Directory dir0;
-        if (si.getUseCompoundFile()) {
-          // In some cases, we were originally opened when CFS
-          // was not used, but then we are asked to open the
-          // terms reader with index, the segment has switched
-          // to CFS
-          if (cfsReader == null) {
-            cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize);
-          }
-          dir0 = cfsReader;
-        } else {
-          dir0 = dir;
-        }
-
-        tis = new TermInfosReader(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor);
-      }
-    }
-
     synchronized void decRef() throws IOException {
 
       if (ref.decRef() == 0) {
 
-        // close everything, nothing is shared anymore with other readers
-        if (tis != null) {
-          tis.close();
-          // null so if an app hangs on to us we still free most ram
-          tis = null;
-        }
-        
-        if (tisNoIndex != null) {
-          tisNoIndex.close();
-        }
-        
-        if (freqStream != null) {
-          freqStream.close();
-        }
-
-        if (proxStream != null) {
-          proxStream.close();
+        if (fields != null) {
+          fields.close();
         }
 
         if (termVectorsReaderOrig != null) {
@@ -588,7 +550,7 @@
    * @deprecated
    */
   public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException {
-    return get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
+    return get(false, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, null);
   }
 
   /**
@@ -596,7 +558,7 @@
    * @throws IOException if there is a low-level IO error
    */
   public static SegmentReader get(boolean readOnly, SegmentInfo si, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
-    return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor);
+    return get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor, null);
   }
 
   /**
@@ -605,7 +567,7 @@
    * @deprecated
    */
   static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
-    return get(false, si.dir, si, readBufferSize, doOpenStores, termInfosIndexDivisor);
+    return get(false, si.dir, si, readBufferSize, doOpenStores, termInfosIndexDivisor, null);
   }
 
   /**
@@ -617,8 +579,13 @@
                                   SegmentInfo si,
                                   int readBufferSize,
                                   boolean doOpenStores,
-                                  int termInfosIndexDivisor)
+                                  int termInfosIndexDivisor,
+                                  Codecs codecs)
     throws CorruptIndexException, IOException {
+    if (codecs == null)  {
+      codecs = Codecs.getDefault();
+    }
+    
     SegmentReader instance;
     try {
       if (readOnly)
@@ -635,7 +602,7 @@
     boolean success = false;
 
     try {
-      instance.core = new CoreReaders(dir, si, readBufferSize, termInfosIndexDivisor);
+      instance.core = new CoreReaders(dir, si, readBufferSize, termInfosIndexDivisor, codecs);
       if (doOpenStores) {
         instance.core.openDocStores(si);
       }
@@ -660,6 +627,10 @@
     core.openDocStores(si);
   }
 
+  public synchronized Bits getDeletedDocs() {
+    return deletedDocs;
+  }
+
   private void loadDeletedDocs() throws IOException {
     // NOTE: the bitvector is stored using the regular directory, not cfs
     if (hasDeletions(si)) {
@@ -929,14 +900,32 @@
     return new ArrayList(si.files());
   }
 
-  public TermEnum terms() {
+  public TermEnum terms() throws IOException {
     ensureOpen();
-    return core.getTermsReader().terms();
+    if (isPreFlex) {
+      // For old API on an old segment, instead of
+      // converting old API -> new API -> old API, just give
+      // direct access to old:
+      return ((PreFlexFields) core.fields).tis.terms();
+    } else {
+      // Emulate old API on top of new index
+      return new LegacyTermEnum(null);
+    }
   }
 
+  /** @deprecated Please switch to the flex API ({@link
+   * #fields}) instead. */
   public TermEnum terms(Term t) throws IOException {
     ensureOpen();
-    return core.getTermsReader().terms(t);
+    if (isPreFlex) {
+      // For old API on an old segment, instead of
+      // converting old API -> new API -> old API, just give
+      // direct access to old:
+      return ((PreFlexFields) core.fields).tis.terms(t);
+    } else {
+      // Emulate old API on top of new index
+      return new LegacyTermEnum(t);
+    }
   }
 
   FieldInfos fieldInfos() {
@@ -952,6 +941,8 @@
     return (deletedDocs != null && deletedDocs.get(n));
   }
 
+  /** @deprecated Switch to the flex API ({@link
+   * IndexReader#termDocsEnum}) instead. */
   public TermDocs termDocs(Term term) throws IOException {
     if (term == null) {
       return new AllTermDocs(this);
@@ -959,24 +950,86 @@
       return super.termDocs(term);
     }
   }
+  
+  public Fields fields() throws IOException {
+    return core.fields;
+  }
 
+  /** @deprecated Switch to the flex API {@link
+   *  IndexReader#termDocsEnum} instead. */
   public TermDocs termDocs() throws IOException {
     ensureOpen();
-    return new SegmentTermDocs(this);
+    if (isPreFlex) {
+      // For old API on an old segment, instead of
+      // converting old API -> new API -> old API, just give
+      // direct access to old:
+      final PreFlexFields pre = (PreFlexFields) core.fields;
+      return new SegmentTermDocs(pre.freqStream, deletedDocs, pre.tis, core.fieldInfos);
+    } else {
+      // Emulate old API
+      return new LegacyTermDocs();
+    }
   }
 
+  /** @deprecated Switch to the flex API {@link
+   *  IndexReader#termDocsEnum} instead */
   public TermPositions termPositions() throws IOException {
     ensureOpen();
-    return new SegmentTermPositions(this);
+    if (isPreFlex) {
+      // For old API on an old segment, instead of
+      // converting old API -> new API -> old API, just give
+      // direct access to old:
+      final PreFlexFields pre = (PreFlexFields) core.fields;
+      return new SegmentTermPositions(pre.freqStream, pre.proxStream, deletedDocs, pre.tis, core.fieldInfos);
+    } else
+      // Emulate old API
+      return new LegacyTermPositions();
+  }
+
+  private final CloseableThreadLocal perThread = new CloseableThreadLocal();
+
+  // nocommit -- move term vectors under here
+  private static final class PerThread {
+    LegacyTermEnum terms;
+    
+    // Used for caching the least recently looked-up Terms
+    Cache termsCache;
+  }
+
+  private final static int DEFAULT_TERMS_CACHE_SIZE = 1024;
+
+  private PerThread getPerThread() throws IOException {
+    PerThread resources = (PerThread) perThread.get();
+    if (resources == null) {
+      resources = new PerThread();
+      resources.terms = new LegacyTermEnum(null);
+      // Cache does not have to be thread-safe, it is only used by one thread at the same time
+      resources.termsCache = new SimpleLRUCache(DEFAULT_TERMS_CACHE_SIZE);
+      perThread.set(resources);
+    }
+    return resources;
   }
 
+  
   public int docFreq(Term t) throws IOException {
     ensureOpen();
-    TermInfo ti = core.getTermsReader().get(t);
-    if (ti != null)
-      return ti.docFreq;
-    else
+    Terms terms = core.fields.terms(t.field);
+    if (terms != null) {
+      return terms.docFreq(new TermRef(t.text));
+    } else {
+      return 0;
+    }
+  }
+
+  public int docFreq(String field, TermRef term) throws IOException {
+    ensureOpen();
+
+    Terms terms = core.fields.terms(field);
+    if (terms != null) {
+      return terms.docFreq(term);
+    } else {
       return 0;
+    }
   }
 
   public int numDocs() {
@@ -1146,17 +1199,13 @@
     }
   }
 
-  boolean termsIndexLoaded() {
-    return core.termsIndexIsLoaded();
-  }
-
   // NOTE: only called from IndexWriter when a near
   // real-time reader is opened, or applyDeletes is run,
   // sharing a segment that's still being merged.  This
   // method is not thread safe, and relies on the
   // synchronization in IndexWriter
-  void loadTermsIndex(int termsIndexDivisor) throws IOException {
-    core.loadTermsIndex(si, termsIndexDivisor);
+  void loadTermsIndex() throws IOException {
+    core.fields.loadTermsIndex();
   }
 
   // for testing only
@@ -1323,12 +1372,9 @@
   // This is necessary so that cloned SegmentReaders (which
   // share the underlying postings data) will map to the
   // same entry in the FieldCache.  See LUCENE-1579.
+  // nocommit - what to return here?
   public final Object getFieldCacheKey() {
-    return core.freqStream;
-  }
-
-  public long getUniqueTermCount() {
-    return core.getTermsReader().size();
+    return core;
   }
 
   /**
@@ -1339,7 +1385,7 @@
    * @deprecated Remove this when tests are fixed!
    */
   static SegmentReader getOnlySegmentReader(Directory dir) throws IOException {
-    return getOnlySegmentReader(IndexReader.open(dir,false));
+    return getOnlySegmentReader(IndexReader.open(dir, false));
   }
 
   static SegmentReader getOnlySegmentReader(IndexReader reader) {
@@ -1360,4 +1406,254 @@
   public int getTermInfosIndexDivisor() {
     return core.termsIndexDivisor;
   }
+  
+  // Back compat: legacy TermEnum API over flex API
+  final private class LegacyTermEnum extends TermEnum {
+    FieldsEnum fields;
+    TermsEnum terms;
+    boolean done;
+    String currentField;
+    TermRef currentTerm;
+
+    public LegacyTermEnum(Term t) throws IOException {
+      //System.out.println("sr.lte.init: term=" + t);
+      fields = core.fields.iterator();
+      currentField = fields.next();
+      if (currentField == null) {
+        done = true;
+      } else if (t != null) {
+        // Pre-seek
+
+        // nocommit -- inefficient; do we need
+        // FieldsEnum.seek? (but this is slow only for
+        // legacy API, and, when field count is high)
+        while(currentField.compareTo(t.field) < 0) {
+          currentField = fields.next();
+          if (currentField == null) {
+            // Didn't find the field
+            done = true;
+            break;
+          }
+        }
+
+        if (!done) {
+          if (currentField == t.field) {
+            // Field matches -- get terms
+            terms = fields.terms();
+            TermRef tr = new TermRef(t.text());
+            TermsEnum.SeekStatus status = terms.seek(tr);
+            if (status == TermsEnum.SeekStatus.END) {
+              // leave currentTerm null
+            } else if (status == TermsEnum.SeekStatus.FOUND) {
+              currentTerm = tr;
+            } else {
+              currentTerm = terms.term();
+            }
+          }
+        }
+      } else {
+        terms = fields.terms();
+      }
+    }
+
+    public boolean next() throws IOException {
+
+      if (Codec.DEBUG) {
+        System.out.println("tdte.next done=" + done + " seg=" + core.segment);
+      }
+
+      if (done) {
+        return false;
+      }
+
+      while(true) {
+        if (terms == null) {
+          // Advance to the next field
+          currentField = fields.next();
+          if (currentField == null) {
+            if (Codec.DEBUG)
+              System.out.println("  fields.next returned false");
+            done = true;
+            return false;
+          }
+          terms = fields.terms();
+        }
+        currentTerm = terms.next();
+        if (currentTerm != null) {
+          // This field still has terms
+          return true;
+        } else {
+          // Done producing terms from this field
+          terms = null;
+        }
+      }
+    }
+
+    public Term term() {
+      if (terms != null && !done) {
+        if (currentTerm != null) {
+          return new Term(currentField, currentTerm.toString());
+        }
+      }
+      return null;
+    }
+
+    public int docFreq() {
+      return terms == null ? 0 : terms.docFreq();
+    }
+
+    public void close() {}
+  }
+
+  // Back compat: emulates legacy TermDocs API on top of
+  // flex API
+  private class LegacyTermDocs implements TermDocs {
+
+    String currentField;
+    final Fields fields;
+    TermsEnum terms;
+    DocsEnum docs;
+    int doc;
+
+    LegacyTermDocs() throws IOException {
+      fields = core.fields;
+    }
+
+    public void close() {}
+
+    public void seek(TermEnum termEnum) throws IOException {
+      // nocommit -- optimize for the special cases here
+      seek(termEnum.term());
+    }
+
+    public boolean skipTo(int target) throws IOException {
+      if (docs == null) return false;
+      doc = docs.advance(target);
+      return doc != docs.NO_MORE_DOCS;
+    }
+
+    public int read(int[] docs, int[] freqs) throws IOException {
+      if (this.docs == null) {
+        return 0;
+      }
+      return this.docs.read(docs, freqs);
+    }
+
+    public void seek(Term term) throws IOException {
+
+      if (Codec.DEBUG) {
+        System.out.println("\nwrapper termdocs.seek term=" + term);
+      }
+
+      docs = null;
+
+      if (terms != null && !term.field.equals(currentField)) {
+        if (Codec.DEBUG) {
+          System.out.println("  switch field");
+        }
+        if (terms != null) {
+          terms = null;
+        }
+      }
+
+      if (terms == null) {
+        currentField = term.field;
+        Terms terms1 = fields.terms(term.field);
+        if (terms1 == null) {
+          // no such field
+          return;
+        } else {
+          terms = terms1.iterator();
+        }
+      }
+
+      if (terms.seek(new TermRef(term.text)) == TermsEnum.SeekStatus.FOUND) {
+        // Term exists
+        docs = terms.docs(deletedDocs);
+        if (Codec.DEBUG) {
+          System.out.println("  init docs enum");
+        }
+      } else {
+        docs = null;
+        if (Codec.DEBUG) {
+          System.out.println("  clear docs enum");
+        }
+      }
+    }
+
+    public int doc() {
+      if (docs == null) return 0;
+      else return doc;
+    }
+
+    public int freq() {
+      if (docs == null) return 0;
+      return docs.freq();
+    }
+
+    public boolean next() throws IOException {
+      if (docs == null) return false;
+      doc = docs.next();
+      return doc != DocsEnum.NO_MORE_DOCS;
+    }
+  }
+
+  // Back compat: implements legacy TermPositions API on top
+  // of flex API
+  final private class LegacyTermPositions extends LegacyTermDocs implements TermPositions {
+
+    PositionsEnum positions;
+
+    LegacyTermPositions() throws IOException {
+      super();
+    }
+
+    public void seek(TermEnum termEnum) throws IOException {
+      super.seek(termEnum);
+      if (docs != null)
+        positions = docs.positions();
+    }
+
+    public boolean skipTo(int target) throws IOException {
+      boolean result = super.skipTo(target);
+      positions = null;
+      return result;
+    }
+
+    public int read(int[] docs, int[] freqs) throws IOException {
+      throw new UnsupportedOperationException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead.");
+    }
+
+    public void seek(Term term) throws IOException {
+      super.seek(term);
+      positions = null;
+    }
+
+    public boolean next() throws IOException {
+      boolean result = super.next();
+      positions = null;
+      return result;
+    }
+
+    public int nextPosition() throws IOException {     
+      if (positions == null) {
+        positions = docs.positions();
+      }
+      return positions.next();
+    }
+
+    public int getPayloadLength() {
+      return positions.getPayloadLength();
+    }
+
+    public byte[] getPayload(byte[] data, int offset) throws IOException {
+      return positions.getPayload(data, offset);
+    }
+
+    public boolean isPayloadAvailable() {
+      return positions.hasPayload();
+    }
+  }
+
+
 }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentWriteState.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentWriteState.java?rev=824918&r1=824917&r2=824918&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentWriteState.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentWriteState.java Tue Oct 13 20:44:51 2009
@@ -21,26 +21,67 @@
 import java.util.Collection;
 
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.Codecs;
 
-class SegmentWriteState {
+/**
+ * This class is not meant for public usage; it's only
+ * public in order to expose access across packages.  It's
+ * used internally when updating the index.
+ */
+public class SegmentWriteState {
   DocumentsWriter docWriter;
-  Directory directory;
-  String segmentName;
+  // nocommit -- made public
+  public Directory directory;
+  // nocommit -- made public
+  public String segmentName;
+  // nocommit -- made public
+  public FieldInfos fieldInfos;
   String docStoreSegmentName;
-  int numDocs;
-  int termIndexInterval;
+  // nocommit -- made public
+  public int numDocs;
   int numDocsInStore;
-  Collection flushedFiles;
+  // nocommit -- made public
+  public Collection flushedFiles;
 
-  public SegmentWriteState(DocumentsWriter docWriter, Directory directory, String segmentName, String docStoreSegmentName, int numDocs,
-                           int numDocsInStore, int termIndexInterval) {
+  // Actual codec used
+  Codec codec;
+
+  /** Expert: The fraction of terms in the "dictionary" which should be stored
+   * in RAM.  Smaller values use more memory, but make searching slightly
+   * faster, while larger values use less memory and make searching slightly
+   * slower.  Searching is typically not dominated by dictionary lookup, so
+   * tweaking this is rarely useful.*/
+  // nocommit -- made public
+  public int termIndexInterval;
+
+  /** Expert: The fraction of {@link TermDocs} entries stored in skip tables,
+   * used to accelerate {@link TermDocs#skipTo(int)}.  Larger values result in
+   * smaller indexes, greater acceleration, but fewer accelerable cases, while
+   * smaller values result in bigger indexes, less acceleration and more
+   * accelerable cases. More detailed experiments would be useful here. */
+  // nocommit -- made public
+  public int skipInterval = 16;
+  
+  /** Expert: The maximum number of skip levels. Smaller values result in 
+   * slightly smaller indexes, but slower skipping in big posting lists.
+   */
+  // nocommit -- made public
+  public int maxSkipLevels = 10;
+
+  public SegmentWriteState(DocumentsWriter docWriter, Directory directory, String segmentName, FieldInfos fieldInfos,
+                           String docStoreSegmentName, int numDocs,
+                           int numDocsInStore, int termIndexInterval,
+                           Codecs codecs) {
     this.docWriter = docWriter;
     this.directory = directory;
     this.segmentName = segmentName;
+    this.fieldInfos = fieldInfos;
     this.docStoreSegmentName = docStoreSegmentName;
     this.numDocs = numDocs;
     this.numDocsInStore = numDocsInStore;
     this.termIndexInterval = termIndexInterval;
+    this.codec = codecs.getWriter(this);
     flushedFiles = new HashSet();
   }
 

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Term.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Term.java?rev=824918&r1=824917&r2=824918&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Term.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Term.java Tue Oct 13 20:44:51 2009
@@ -1,7 +1,5 @@
 package org.apache.lucene.index;
 
-import org.apache.lucene.util.StringHelper;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -19,6 +17,8 @@
  * limitations under the License.
  */
 
+import org.apache.lucene.util.StringHelper;
+
 /**
   A Term represents a word from text.  This is the unit of search.  It is
   composed of two elements, the text of the word, as a string, and the name of
@@ -49,7 +49,8 @@
     this(fld, "", true);
   }
 
-  Term(String fld, String txt, boolean intern) {
+  // nocommit -- made public
+  public Term(String fld, String txt, boolean intern) {
     field = intern ? StringHelper.intern(fld) : fld;	  // field names are interned
     text = txt;					          // unless already known to be
   }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermDocs.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermDocs.java?rev=824918&r1=824917&r2=824918&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermDocs.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermDocs.java Tue Oct 13 20:44:51 2009
@@ -26,7 +26,8 @@
  ordered by document number.
 
  @see IndexReader#termDocs()
- */
+ @deprecated Use {@link DocsEnum} instead
+*/
 
 public interface TermDocs {
   /** Sets this to the data for a term.

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermEnum.java?rev=824918&r1=824917&r2=824918&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermEnum.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermEnum.java Tue Oct 13 20:44:51 2009
@@ -22,7 +22,8 @@
 /** Abstract class for enumerating terms.
 
   <p>Term enumerations are always ordered by Term.compareTo().  Each term in
-  the enumeration is greater than all that precede it.  */
+  the enumeration is greater than all that precede it.
+* @deprecated Use TermsEnum instead */
 
 public abstract class TermEnum {
   /** Increments the enumeration to the next element.  True if one exists.*/

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermPositions.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermPositions.java?rev=824918&r1=824917&r2=824918&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermPositions.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermPositions.java Tue Oct 13 20:44:51 2009
@@ -26,6 +26,7 @@
  * positions of each occurrence of a term in a document.
  *
  * @see IndexReader#termPositions()
+ * @deprecated Use {@link PositionsEnum} instead 
  */
 
 public interface TermPositions

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,170 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.ArrayUtil;
+import java.io.UnsupportedEncodingException;
+
+/** Represents the UTF8 bytes[] for a term's text.  This is
+ *  used when reading with the flex API, to avoid having to
+ *  materialize full char[]. */
+public class TermRef {
+
+  public byte[] bytes;
+  public int offset;
+  public int length;
+
+  public TermRef() {
+  }
+
+  public TermRef(String text) {
+    copy(text);
+  }
+
+  public void copy(String text) {
+    try {
+      bytes = text.getBytes("UTF-8");
+    } catch (UnsupportedEncodingException uee) {
+      // should not happen:
+      throw new RuntimeException("unable to encode to UTF-8");
+    }
+    offset = 0;
+    length = bytes.length;
+  }
+
+  public int compareTerm(TermRef other) {
+    final int minLength;
+    if (length < other.length) {
+      minLength = length;
+    } else {
+      minLength = other.length;
+    }
+    int upto = offset;
+    int otherUpto = other.offset;
+    final byte[] otherBytes = other.bytes;
+    for(int i=0;i<minLength;i++) {
+      // compare bytes as unsigned
+      final int b1 = bytes[upto++]&0xff;
+      final int b2 = otherBytes[otherUpto++]&0xff;
+      final int diff =  b1-b2;
+      if (diff != 0) {
+        return diff;
+      }
+    }
+    return length - other.length;
+  }
+
+  public boolean termEquals(TermRef other) {
+    if (length == other.length) {
+      int upto = offset;
+      int otherUpto = other.offset;
+      final byte[] otherBytes = other.bytes;
+      for(int i=0;i<length;i++) {
+        if (bytes[upto++] != otherBytes[otherUpto++]) {
+          return false;
+        }
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  public Object clone() {
+    TermRef other = new TermRef();
+    other.bytes = new byte[length];
+    System.arraycopy(bytes, offset, other.bytes, 0, length);
+    other.length = length;
+    return other;
+  }
+
+  public boolean startsWith(TermRef other) {
+    // nocommit: is this correct?
+    if (length < other.length) {
+      return false;
+    }
+    for(int i=0;i<other.length;i++) {
+      if (bytes[offset+i] != other.bytes[other.offset+i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = 1;
+    result = prime * result + hash(bytes);
+    return result;
+  }
+  
+  private int hash(byte a[]) {
+    if (a == null)
+        return 0;
+    int result = 1;
+    int upTo = offset;
+    for(int i = 0; i < length; i++)
+        result = 31 * result + bytes[upTo++];
+    return result;
+}
+
+  @Override
+  public boolean equals(Object other) {
+
+    return this.termEquals((TermRef) other);
+  }
+
+  public String toString() {
+    try {
+      return new String(bytes, offset, length, "UTF-8");
+    } catch (UnsupportedEncodingException uee) {
+      // should not happen
+      throw new RuntimeException(uee);
+    }
+  }
+
+  public String toBytesString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append('[');
+    final int end = offset + length;
+    for(int i=offset;i<end;i++) {
+      if (i > offset) {
+        sb.append(' ');
+      }
+      sb.append(""+bytes[i]);
+    }
+    sb.append(']');
+    return sb.toString();
+  }
+
+  public void copy(TermRef other) {
+    if (bytes == null) {
+      bytes = new byte[other.length];
+    } else {
+      bytes = ArrayUtil.grow(bytes, other.length);
+    }
+    System.arraycopy(other.bytes, other.offset, bytes, 0, other.length);
+    length = other.length;
+    offset = 0;
+  }
+
+  public void grow(int newLength) {
+    bytes = ArrayUtil.grow(bytes, newLength);
+  }
+}
\ No newline at end of file

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Terms.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Terms.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Terms.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Terms.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,60 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.util.Bits;
+
+/**
+ * NOTE: this API is experimental and will likely change
+ */
+
+public abstract class Terms {
+
+  // nocommit -- char[] or byte[] version?
+  /** Returns an iterator that will step through all terms */
+  public abstract TermsEnum iterator() throws IOException;
+  
+  /** Returns the docFreq of the specified term text. */
+  public int docFreq(TermRef text) throws IOException {
+    // nocommit -- make thread private cache so we share
+    // single enum
+    // NOTE: subclasses may have more efficient impl
+    final TermsEnum terms = iterator();
+    if (terms.seek(text) == TermsEnum.SeekStatus.FOUND) {
+      return terms.docFreq();
+    } else {
+      return 0;
+    }
+  }
+
+  /** Get DocsEnum for the specified term. */
+  public DocsEnum docs(Bits skipDocs, TermRef text) throws IOException {
+    // NOTE: subclasses may have more efficient impl
+    final TermsEnum terms = iterator();
+    if (terms.seek(text) == TermsEnum.SeekStatus.FOUND) {
+      return terms.docs(skipDocs);
+    } else {
+      return null;
+    }
+  }
+
+  public long getUniqueTermCount() throws IOException {
+    throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()");
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Terms.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,84 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Bits;
+
+/**
+ * NOTE: this API is experimental and will likely change
+ */
+
+/** Iterator to seek ({@link #seek}) or step through ({@link
+ * #next} terms, obtain frequency information ({@link
+ * #docFreq}), and obtain a {@link DocsEnum} for the current
+ * term ({@link #docs)}.
+ * 
+ * <p>On obtaining a TermsEnum, you must first call
+ * {@link #next} or {@link #seek}. */
+public abstract class TermsEnum extends AttributeSource {
+
+  /** Represents returned result from {@link TermsEnum.seek}.
+   *  If status is FOUND, then the precise term was found.
+   *  If status is NOT_FOUND, then a different term was
+   *  found.  If the status is END, the end of the iteration
+   *  was hit. */
+  public static enum SeekStatus {END, FOUND, NOT_FOUND};
+
+  /** Seeks to the specified term.  Returns SeekResult to
+   *  indicate whether exact term was found, a different
+   *  term was found, or EOF was hit. */
+  public abstract SeekStatus seek(TermRef text) throws IOException;
+
+  /** Seeks to the specified term by ordinal (position) as
+   *  previously returned by {@link #ord}.  See {@link
+   *  #seek(TermRef). */
+  public abstract SeekStatus seek(long ord) throws IOException;
+  
+  /** Increments the enumeration to the next element.
+   *  Returns the resulting TermRef, or null if the end was
+   *  hit.  The returned TermRef may be re-used across calls
+   *  to next. */
+  public abstract TermRef next() throws IOException;
+
+  /** Returns current term.  This is undefined after next()
+   *  returns null or seek returns {@link SeekStatus#END}. */
+  public abstract TermRef term() throws IOException;
+
+  /** Returns ordinal position for current term.  Not all
+   *  codecs implement this, so be prepared to catch an
+   *  {@link UnsupportedOperationException}.  This is
+   *  undefined after next() returns null or seek returns
+   *  {@link SeekStatus#END}. */
+  public abstract long ord() throws IOException;
+
+  /** Returns the docFreq of the current term.  This is
+   *  undefined after next() returns null or seek returns
+   *  {@link SeekStatus#END}.*/
+  public abstract int docFreq();
+
+  /** Get {@link DocsEnum} for the current term.  The
+   *  returned {@link DocsEnum} may share state with this
+   *  TermsEnum instance, so you should not call this
+   *  TermsEnum's {@link #seek} or {@link #next} until you
+   *  are done using the DocsEnum. */
+  public abstract DocsEnum docs(Bits skipDocs) throws IOException;
+}
+

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsHashPerField.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsHashPerField.java?rev=824918&r1=824917&r2=824918&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsHashPerField.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsHashPerField.java Tue Oct 13 20:44:51 2009
@@ -350,6 +350,8 @@
     final char[] tokenText = termAtt.termBuffer();;
     final int tokenTextLen = termAtt.termLength();
 
+    // System.out.println("thpf.add: field=" + fieldInfo.name + " text=" + new String(tokenText, 0, tokenTextLen) + " c0=" + ((int) tokenText[0]) );
+
     // Compute hashcode & replace any invalid UTF16 sequences
     int downto = tokenTextLen;
     int code = 0;

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/Codec.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/Codec.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/Codec.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/Codec.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,96 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+
+public abstract class Codec {
+
+  public static boolean DEBUG = false;
+
+  private static final int CODEC_HEADER = 0x1af65;
+
+  /** Unique name that's used to retrieve this codec when
+   *  reading the index */
+  public String name;
+
+  /** Writes a new segment */
+  public abstract FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException;
+
+  /** Reads a segment.  NOTE: by the time this call
+   *  returns, it must hold open any files it will need to
+   *  use; else, those files may be deleted. */
+  // nocommit -- add a "required capabilities" here; this
+  // way merging could say only "TERMS_LINEAR_SCAN" but
+  // searching would say "TERMS_RANDOM_ACCESS"?
+  public abstract FieldsProducer fieldsProducer(Directory dir, FieldInfos fieldInfos, SegmentInfo si, int readBufferSize, int indexDivisor) throws IOException;
+
+  /** Gathers files associated with this segment */
+  public abstract void files(Directory dir, SegmentInfo segmentInfo, Collection files) throws IOException;
+
+  /** Records all file extensions this codec uses */
+  public abstract void getExtensions(Collection extensions);
+
+  /** @return Actual version of the file */
+  public static int checkHeader(IndexInput in, String codec, int version) throws IOException {
+
+    // Safety to guard against reading a bogus string:
+    int header = in.readInt();
+    if (header != CODEC_HEADER) {
+      throw new CorruptIndexException("codec header mismatch: " + header + " vs " + CODEC_HEADER);
+    }
+
+    final String actualCodec = in.readString();
+    if (!codec.equals(actualCodec)) {
+      throw new CorruptIndexException("codec mismatch: expected '" + codec + "' but got '" + actualCodec + "'");
+    }
+
+    int actualVersion = in.readInt();
+    if (actualVersion > version) {
+      throw new CorruptIndexException("version '" + actualVersion + "' is too new (expected <= '" + version + "'");
+    }
+
+    return actualVersion;
+  }
+
+  public static void writeHeader(IndexOutput out, String codec, int version) throws IOException {
+    final long start = out.getFilePointer();
+    out.writeInt(CODEC_HEADER);
+    out.writeString(codec);
+    out.writeInt(version);
+
+    // So we can easily compute headerSize (below)
+    if (out.getFilePointer()-start != codec.length() + 9) {
+      System.out.println(out.getFilePointer()-start + " vs " + (codec.length() + 8));
+      throw new IllegalArgumentException("codec must be simple ASCII, less than 128 characters in length [got " + codec + "]");
+    }
+  }
+
+  public static int headerSize(String codec) {
+    return 9 + codec.length();
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/Codec.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/Codecs.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/Codecs.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/Codecs.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/Codecs.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,91 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.codecs.intblock.IntBlockCodec;
+import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
+import org.apache.lucene.index.codecs.pulsing.PulsingCodec;
+import org.apache.lucene.index.codecs.sep.SepCodec;
+import org.apache.lucene.index.codecs.standard.StandardCodec;
+
+/** Holds a set of codecs, keyed by name.  You subclass
+ *  this, instantiate it, and register your codecs, then
+ *  pass this instance to IndexReader/IndexWriter (via
+ *  package private APIs) to use different codecs when
+ *  reading & writing segments. */
+
+public abstract class Codecs {
+
+  private final HashMap codecs = new HashMap();
+
+  private final Collection knownExtensions = new HashSet();
+
+  public void register(Codec codec) {
+    if (codec.name == null) {
+      throw new IllegalArgumentException("code.name is null");
+    }
+
+    if (!codecs.containsKey(codec.name)) {
+      codecs.put(codec.name, codec);
+      codec.getExtensions(knownExtensions);
+    } else if (codecs.get(codec.name) != codec) {
+      throw new IllegalArgumentException("codec '" + codec.name + "' is already registered as a different codec instance");
+    }
+  }
+
+  public Collection getAllExtensions() {
+    return knownExtensions;
+  }
+
+  public Codec lookup(String name) {
+    final Codec codec = (Codec) codecs.get(name);
+    if (codec == null)
+      throw new IllegalArgumentException("required codec '" + name + "' not found");
+    return codec;
+  }
+
+  public abstract Codec getWriter(SegmentWriteState state);
+
+  static private final Codecs defaultCodecs = new DefaultCodecs();
+
+  public static Codecs getDefault() {
+    return defaultCodecs;
+  }
+}
+
+class DefaultCodecs extends Codecs {
+  DefaultCodecs() {
+    register(new StandardCodec());
+    register(new IntBlockCodec());
+    register(new PreFlexCodec());
+    register(new PulsingCodec());
+    register(new SepCodec());
+  }
+
+  public Codec getWriter(SegmentWriteState state) {
+    return lookup("Standard");
+    //return lookup("Pulsing");
+    //return lookup("Sep");
+    //return lookup("IntBlock");
+  }
+}
\ No newline at end of file

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/Codecs.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,57 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.index.FieldInfo;
+
+/**
+ * NOTE: this API is experimental and will likely change
+ */
+
+// nocommit -- name this "StandardDocsConsumer"?  eg the
+// RAMCodec doesn't need most of these methods...
+public abstract class DocsConsumer {
+
+  // nocommit
+  public String desc;
+  /*
+  public boolean setDesc(String desc) {
+    this.desc = desc;
+    return true;
+  }
+  */
+
+  public abstract void start(IndexOutput termsOut) throws IOException;
+
+  public abstract void startTerm() throws IOException;
+
+  /** Adds a new doc in this term.  Return null if this
+   *  consumer doesn't need to see the positions for this
+   *  doc. */
+  public abstract PositionsConsumer addDoc(int docID, int termDocFreq) throws IOException;
+
+  /** Finishes the current term */
+  public abstract void finishTerm(int numDocs, boolean isIndexTerm) throws IOException;
+
+  public abstract void setField(FieldInfo fieldInfo);
+
+  public abstract void close() throws IOException;
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsProducer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsProducer.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsProducer.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsProducer.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,65 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+
+
+// nocommit -- this is tied to StandarTermsDictWriter;
+// shouldn't it be named StandardDocsProducer?  hmm, though,
+// it's API is fairly generic in that any other terms dict
+// codec could re-use it
+
+/** StandardTermsDictReader interacts with a single instance
+ *  of this to manage creation of multiple docs enum
+ *  instances.  It provides an IndexInput (termsIn) where
+ *  this class may read any previously stored data that it
+ *  had written in its corresponding DocsConsumer. */
+public abstract class DocsProducer {
+  
+  public abstract class Reader {
+    public class State {}
+    
+    public abstract void readTerm(int docFreq, boolean isIndexTerm) throws IOException;
+
+    /** Returns a docs enum for the last term read */
+    public abstract DocsEnum docs(Bits deletedDocs) throws IOException;
+    
+    // nocommit: fooling around with reusable
+    public abstract State captureState(State reusableState);
+    
+    public abstract void setState(State state) throws IOException;
+    
+    public boolean canCaptureState() {
+      return false;
+    }
+  }
+
+  public abstract void start(IndexInput termsIn) throws IOException;
+
+  /** Returns a new private reader for stepping through
+   *  terms, getting DocsEnum. */
+  public abstract Reader reader(FieldInfo fieldInfo, IndexInput termsIn) throws IOException;
+
+  public abstract void close() throws IOException;
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsProducer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,38 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.FieldInfo;
+
+import java.io.IOException;
+
+/** Abstract API that consumes terms, doc, freq, prox and
+ *  payloads postings.  Concrete implementations of this
+ *  actually do "something" with the postings (write it into
+ *  the index in a specific format).
+ *
+ * NOTE: this API is experimental and will likely change
+ */
+public abstract class FieldsConsumer {
+
+  /** Add a new field */
+  public abstract TermsConsumer addField(FieldInfo field) throws IOException;
+
+  /** Called when we are done adding everything. */
+  public abstract void close() throws IOException;
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/FieldsProducer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/FieldsProducer.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/FieldsProducer.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/FieldsProducer.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,34 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Fields;
+
+import java.io.IOException;
+
+/** Abstract API that consumes terms, doc, freq, prox and
+ *  payloads postings.  Concrete implementations of this
+ *  actually do "something" with the postings (write it into
+ *  the index in a specific format).
+ *
+ * NOTE: this API is experimental and will likely change
+ */
+public abstract class FieldsProducer extends Fields {
+  public abstract void close() throws IOException;
+  public abstract void loadTermsIndex() throws IOException;
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/FieldsProducer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java?rev=824918&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java Tue Oct 13 20:44:51 2009
@@ -0,0 +1,279 @@
+package org.apache.lucene.index.codecs;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.BufferedIndexInput;
+import org.apache.lucene.store.IndexInput;
+
+/**
+ * This abstract class reads skip lists with multiple levels.
+ * 
+ * See {@link MultiLevelSkipListWriter} for the information about the encoding 
+ * of the multi level skip lists. 
+ * 
+ * Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)}
+ * which defines the actual format of the skip data.
+ */
+
+// nocommit -- made public
+public abstract class MultiLevelSkipListReader {
+  // the maximum number of skip levels possible for this index
+  protected int maxNumberOfSkipLevels; 
+  
+  // number of levels in this skip list
+  private int numberOfSkipLevels;
+  
+  // Expert: defines the number of top skip levels to buffer in memory.
+  // Reducing this number results in less memory usage, but possibly
+  // slower performance due to more random I/Os.
+  // Please notice that the space each level occupies is limited by
+  // the skipInterval. The top level can not contain more than
+  // skipLevel entries, the second top level can not contain more
+  // than skipLevel^2 entries and so forth.
+  private int numberOfLevelsToBuffer = 1;
+  
+  private int docCount;
+  private boolean haveSkipped;
+  
+  private IndexInput[] skipStream;    // skipStream for each level
+  private long skipPointer[];         // the start pointer of each skip level
+  private int skipInterval[];         // skipInterval of each level
+  private int[] numSkipped;           // number of docs skipped per level
+    
+  private int[] skipDoc;              // doc id of current skip entry per level 
+  private int lastDoc;                // doc id of last read skip entry with docId <= target
+  private long[] childPointer;        // child pointer of current skip entry per level
+  private long lastChildPointer;      // childPointer of last read skip entry with docId <= target
+  
+  private boolean inputIsBuffered;
+  
+  public MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
+    this.skipStream = new IndexInput[maxSkipLevels];
+    this.skipPointer = new long[maxSkipLevels];
+    this.childPointer = new long[maxSkipLevels];
+    this.numSkipped = new int[maxSkipLevels];
+    this.maxNumberOfSkipLevels = maxSkipLevels;
+    this.skipInterval = new int[maxSkipLevels];
+    this.skipStream [0]= skipStream;
+    this.inputIsBuffered = (skipStream instanceof BufferedIndexInput);
+    this.skipInterval[0] = skipInterval;
+    for (int i = 1; i < maxSkipLevels; i++) {
+      // cache skip intervals
+      this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval;
+    }
+    skipDoc = new int[maxSkipLevels];
+  }
+
+  
+  /** Returns the id of the doc to which the last call of {@link #skipTo(int)}
+   *  has skipped.  */
+  // nocommit made public
+  public int getDoc() {
+    return lastDoc;
+  }
+  
+  
+  /** Skips entries to the first beyond the current whose document number is
+   *  greater than or equal to <i>target</i>. Returns the current doc count. 
+   */
+  // nocommit made public
+  public int skipTo(int target) throws IOException {
+    if (!haveSkipped) {
+      // first time, load skip levels
+      loadSkipLevels();
+      haveSkipped = true;
+    }
+  
+    // walk up the levels until highest level is found that has a skip
+    // for this target
+    int level = 0;
+    while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
+      level++;
+    }    
+
+    while (level >= 0) {
+      if (target > skipDoc[level]) {
+        if (!loadNextSkip(level)) {
+          continue;
+        }
+      } else {
+        // no more skips on this level, go down one level
+        if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
+          seekChild(level - 1);
+        } 
+        level--;
+      }
+    }
+    
+    return numSkipped[0] - skipInterval[0] - 1;
+  }
+  
+  private boolean loadNextSkip(int level) throws IOException {
+    // we have to skip, the target document is greater than the current
+    // skip list entry        
+    setLastSkipData(level);
+      
+    numSkipped[level] += skipInterval[level];
+      
+    if (numSkipped[level] > docCount) {
+      // this skip list is exhausted
+      skipDoc[level] = Integer.MAX_VALUE;
+      if (numberOfSkipLevels > level) numberOfSkipLevels = level; 
+      return false;
+    }
+
+    // read next skip entry
+    skipDoc[level] += readSkipData(level, skipStream[level]);
+    
+    if (level != 0) {
+      // read the child pointer if we are not on the leaf level
+      childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
+    }
+    
+    return true;
+
+  }
+  
+  /** Seeks the skip entry on the given level */
+  protected void seekChild(int level) throws IOException {
+    skipStream[level].seek(lastChildPointer);
+    numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1];
+    skipDoc[level] = lastDoc;
+    if (level > 0) {
+        childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
+    }
+  }
+
+  // nocommit -- made public
+  public void close() throws IOException {
+    for (int i = 1; i < skipStream.length; i++) {
+      if (skipStream[i] != null) {
+        skipStream[i].close();
+      }
+    }
+  }
+
+  /** initializes the reader */
+  // nocommit -- made public
+  public void init(long skipPointer, int df) {
+    this.skipPointer[0] = skipPointer;
+    this.docCount = df;
+    Arrays.fill(skipDoc, 0);
+    Arrays.fill(numSkipped, 0);
+    Arrays.fill(childPointer, 0);
+    
+    haveSkipped = false;
+    for (int i = 1; i < numberOfSkipLevels; i++) {
+      skipStream[i] = null;
+    }
+  }
+  
+  /** Loads the skip levels  */
+  private void loadSkipLevels() throws IOException {
+    numberOfSkipLevels = docCount == 0 ? 0 : (int) Math.floor(Math.log(docCount) / Math.log(skipInterval[0]));
+    if (numberOfSkipLevels > maxNumberOfSkipLevels) {
+      numberOfSkipLevels = maxNumberOfSkipLevels;
+    }
+
+    skipStream[0].seek(skipPointer[0]);
+    
+    int toBuffer = numberOfLevelsToBuffer;
+    
+    for (int i = numberOfSkipLevels - 1; i > 0; i--) {
+      // the length of the current level
+      long length = skipStream[0].readVLong();
+      
+      // the start pointer of the current level
+      skipPointer[i] = skipStream[0].getFilePointer();
+      if (toBuffer > 0) {
+        // buffer this level
+        skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
+        toBuffer--;
+      } else {
+        // clone this stream, it is already at the start of the current level
+        skipStream[i] = (IndexInput) skipStream[0].clone();
+        if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) {
+          ((BufferedIndexInput) skipStream[i]).setBufferSize((int) length);
+        }
+        
+        // move base stream beyond the current level
+        skipStream[0].seek(skipStream[0].getFilePointer() + length);
+      }
+    }
+   
+    // use base stream for the lowest level
+    skipPointer[0] = skipStream[0].getFilePointer();
+  }
+  
+  /**
+   * Subclasses must implement the actual skip data encoding in this method.
+   *  
+   * @param level the level skip data shall be read from
+   * @param skipStream the skip stream to read from
+   */  
+  protected abstract int readSkipData(int level, IndexInput skipStream) throws IOException;
+  
+  /** Copies the values of the last read skip entry on this level */
+  protected void setLastSkipData(int level) {
+    lastDoc = skipDoc[level];
+    lastChildPointer = childPointer[level];
+  }
+
+  
+  /** used to buffer the top skip levels */
+  private final static class SkipBuffer extends IndexInput {
+    private byte[] data;
+    private long pointer;
+    private int pos;
+    
+    SkipBuffer(IndexInput input, int length) throws IOException {
+      data = new byte[length];
+      pointer = input.getFilePointer();
+      input.readBytes(data, 0, length);
+    }
+    
+    public void close() throws IOException {
+      data = null;
+    }
+
+    public long getFilePointer() {
+      return pointer + pos;
+    }
+
+    public long length() {
+      return data.length;
+    }
+
+    public byte readByte() throws IOException {
+      return data[pos++];
+    }
+
+    public void readBytes(byte[] b, int offset, int len) throws IOException {
+      System.arraycopy(data, pos, b, offset, len);
+      pos += len;
+    }
+
+    public void seek(long pos) throws IOException {
+      this.pos =  (int) (pos - pointer);
+    }
+    
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message