lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1058390 [7/16] - in /lucene/dev/branches/bulkpostings: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ dev-tools/idea/lucene/contr...
Date Thu, 13 Jan 2011 02:09:56 GMT
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java Thu Jan 13 02:09:33 2011
@@ -18,20 +18,13 @@ package org.apache.lucene.util.automaton
  */
 
 import java.io.IOException;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
 
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CodecUtil;
-import org.apache.lucene.util.IntsRef;
 
 /** Represents an FST using a compact byte[] format.
  *  <p> The format is similar to what's used by Morfologik
@@ -40,7 +33,7 @@ import org.apache.lucene.util.IntsRef;
  */
 public class FST<T> {
   public static enum INPUT_TYPE {BYTE1, BYTE2, BYTE4};
-  private final INPUT_TYPE inputType;
+  public final INPUT_TYPE inputType;
 
   private final static int BIT_FINAL_ARC = 1 << 0;
   private final static int BIT_LAST_ARC = 1 << 1;
@@ -76,7 +69,7 @@ public class FST<T> {
 
   // if non-null, this FST accepts the empty string and
   // produces this output
-  private T emptyOutput;
+  T emptyOutput;
   private byte[] emptyOutputBytes;
 
   private byte[] bytes;
@@ -94,11 +87,16 @@ public class FST<T> {
   public int arcCount;
   public int arcWithOutputCount;
 
+  // If arc has this label then that arc is final/accepted
+  public static int END_LABEL = -1;
+
   public final static class Arc<T> {
-    int label;  // really a "unsigned" byte
+    public int label;
+    public T output;
+
     int target;
+
     byte flags;
-    T output;
     T nextFinalOutput;
     int nextArc;
 
@@ -108,13 +106,26 @@ public class FST<T> {
     int arcIdx;
     int numArcs;
 
-    // Must call this before re-using an Arc instance on a
-    // new node
-    public void reset() {
-      bytesPerArc = 0;
+    /** Returns this */
+    public Arc<T> copyFrom(Arc<T> other) {
+      label = other.label;
+      target = other.target;
+      flags = other.flags;
+      output = other.output;
+      nextFinalOutput = other.nextFinalOutput;
+      nextArc = other.nextArc;
+      if (other.bytesPerArc != 0) {
+        bytesPerArc = other.bytesPerArc;
+        posArcsStart = other.posArcsStart;
+        arcIdx = other.arcIdx;
+        numArcs = other.numArcs;
+      } else {
+        bytesPerArc = 0;
+      }
+      return this;
     }
 
-    public boolean flag(int flag) {
+    boolean flag(int flag) {
       return FST.flag(flags, flag);
     }
 
@@ -122,7 +133,7 @@ public class FST<T> {
       return flag(BIT_LAST_ARC);
     }
 
-    public boolean isFinal() {
+    boolean isFinal() {
       return flag(BIT_FINAL_ARC);
     }
   };
@@ -156,7 +167,7 @@ public class FST<T> {
       // messy
       bytes = new byte[numBytes];
       in.readBytes(bytes, 0, numBytes);
-      emptyOutput = outputs.read(new BytesReader(numBytes-1));
+      emptyOutput = outputs.read(getBytesReader(numBytes-1));
     } else {
       emptyOutput = null;
     }
@@ -203,9 +214,9 @@ public class FST<T> {
     this.startNode = startNode;
   }
 
-  public void setEmptyOutput(T v) throws IOException {
-    if (emptyOutput != null) {
-      throw new IllegalStateException("empty output is already set");
+  void setEmptyOutput(T v) throws IOException {
+    if (emptyOutput != null && !emptyOutput.equals(v)) {
+      throw new IllegalStateException("empty output is already set: " + outputs.outputToString(emptyOutput) + " vs " + outputs.outputToString(v));
     }
     emptyOutput = v;
 
@@ -271,7 +282,7 @@ public class FST<T> {
     }
   }
 
-  private int readLabel(DataInput in) throws IOException {
+  int readLabel(DataInput in) throws IOException {
     final int v;
     if (inputType == INPUT_TYPE.BYTE1) {
       v = in.readByte()&0xFF;
@@ -285,21 +296,8 @@ public class FST<T> {
 
   // returns true if the node at this address has any
   // outgoing arcs
-  public boolean hasArcs(int address) {
-    return address != FINAL_END_NODE && address != NON_FINAL_END_NODE;
-  }
-
-  public int getStartNode() {
-    if (startNode == -1) {
-      throw new IllegalStateException("call finish first");
-    }
-    return startNode;
-  }
-
-  // returns null if this FST does not accept the empty
-  // string, else, the output for the empty string
-  public T getEmptyOutput() {
-    return emptyOutput;
+  public boolean targetHasArcs(Arc<T> arc) {
+    return arc.target > 0;
   }
 
   // serializes new node by appending its bytes to the end
@@ -364,7 +362,7 @@ public class FST<T> {
         assert arc.nextFinalOutput == NO_OUTPUT;
       }
 
-      boolean targetHasArcs = hasArcs(target.address);
+      boolean targetHasArcs = target.address > 0;
 
       if (!targetHasArcs) {
         flags += BIT_STOP_NODE;
@@ -453,10 +451,49 @@ public class FST<T> {
     return endAddress-1;
   }
 
-  public Arc<T> readFirstArc(int address, Arc<T> arc) throws IOException {
-    //System.out.println("readFirstArc addr=" + address);
+  /** Fills virtual 'start' arc, ie, an empty incoming arc to
+   *  the FST's start node */
+  public Arc<T> getFirstArc(Arc<T> arc) {
+    if (emptyOutput != null) {
+      arc.flags = BIT_FINAL_ARC | BIT_LAST_ARC;
+      arc.nextFinalOutput = emptyOutput;
+    } else {
+      arc.flags = BIT_LAST_ARC;
+    }
+
+    // If there are no nodes, ie, the FST only accepts the
+    // empty string, then startNode is 0, and then readFirstTargetArc
+    arc.target = startNode;
+    return arc;
+  }
+
+  /** Follow the follow arc and read the first arc of its
+   *  target; this changes the provide arc (2nd arg) in-place
+   *  and returns it. */
+  public Arc<T> readFirstTargetArc(Arc<T> follow, Arc<T> arc) throws IOException {
     //int pos = address;
-    final BytesReader in = new BytesReader(address);
+    //System.out.println("    readFirstTarget follow.target=" + follow.target + " isFinal=" + follow.isFinal());
+    if (follow.isFinal()) {
+      // Insert "fake" final first arc:
+      arc.label = -1;
+      arc.output = follow.nextFinalOutput;
+      if (follow.target <= 0) {
+        arc.flags = BIT_LAST_ARC;
+      } else {
+        arc.flags = 0;
+        arc.nextArc = follow.target;
+      }
+      //System.out.println("    insert isFinal; nextArc=" + follow.target + " isLast=" + arc.isLast() + " output=" + outputs.outputToString(arc.output));
+      return arc;
+    } else {
+      return readFirstRealArc(follow.target, arc);
+    }
+  }
+
+  // Not private beacaus NodeHash needs access:
+  Arc<T> readFirstRealArc(int address, Arc<T> arc) throws IOException {
+
+    final BytesReader in = getBytesReader(address);
 
     arc.flags = in.readByte();
 
@@ -473,19 +510,66 @@ public class FST<T> {
       arc.bytesPerArc = 0;
     }
     arc.nextArc = in.pos;
+    arc.label = 0;
     return readNextArc(arc);
   }
 
+  /** In-place read; returns the arc. */
   public Arc<T> readNextArc(Arc<T> arc) throws IOException {
+    if (arc.label == -1) {
+      // This was a fake inserted "final" arc
+      if (arc.nextArc <= 0) {
+        // This arc went to virtual final node, ie has no outgoing arcs
+        return null;
+      }
+      return readFirstRealArc(arc.nextArc, arc);
+    } else {
+      return readNextRealArc(arc);
+    }
+  }
+
+  /** Peeks at next arc's label; does not alter arc.  Do
+   *  not call this if arc.isLast()! */
+  public int readNextArcLabel(Arc<T> arc) throws IOException {
+    assert !arc.isLast();
+
+    final BytesReader in;
+    if (arc.label == END_LABEL) {
+      //System.out.println("    nextArc fake " + arc.nextArc);
+      in = getBytesReader(arc.nextArc);
+      byte flags = bytes[in.pos];
+      if (flag(flags, BIT_ARCS_AS_FIXED_ARRAY)) {
+        //System.out.println("    nextArc fake array");
+        in.pos--;
+        in.readVInt();
+        in.readByte();
+      }
+    } else {
+      if (arc.bytesPerArc != 0) {
+        //System.out.println("    nextArc real array");
+        // arcs are at fixed entries
+        in = getBytesReader(arc.posArcsStart - (1+arc.arcIdx)*arc.bytesPerArc);
+      } else {
+        // arcs are packed
+        //System.out.println("    nextArc real packed");
+        in = getBytesReader(arc.nextArc);
+      }
+    }
+    // skip flags
+    in.readByte();
+    return readLabel(in);
+  }
+
+  Arc<T> readNextRealArc(Arc<T> arc) throws IOException {
     // this is a continuing arc in a fixed array
     final BytesReader in;
     if (arc.bytesPerArc != 0) {
       // arcs are at fixed entries
       arc.arcIdx++;
-      in = new BytesReader(arc.posArcsStart - arc.arcIdx*arc.bytesPerArc);
+      in = getBytesReader(arc.posArcsStart - arc.arcIdx*arc.bytesPerArc);
     } else {
       // arcs are packed
-      in = new BytesReader(arc.nextArc);
+      in = getBytesReader(arc.nextArc);
     }
     arc.flags = in.readByte();
     arc.label = readLabel(in);
@@ -504,6 +588,7 @@ public class FST<T> {
 
     if (arc.flag(BIT_STOP_NODE)) {
       arc.target = FINAL_END_NODE;
+      arc.flags |= BIT_FINAL_ARC;
       arc.nextArc = in.pos;
     } else if (arc.flag(BIT_TARGET_NEXT)) {
       arc.nextArc = in.pos;
@@ -524,14 +609,30 @@ public class FST<T> {
     return arc;
   }
 
-  public Arc<T> findArc(int address, int labelToMatch, Arc<T> arc) throws IOException {
+  /** Finds an arc leaving the incoming arc, replacing the arc in place.
+   *  This returns null if the arc was not found, else the incoming arc. */
+  public Arc<T> findTargetArc(int labelToMatch, Arc<T> follow, Arc<T> arc) throws IOException {
+
+    if (labelToMatch == END_LABEL) {
+      if (follow.isFinal()) {
+        arc.output = follow.nextFinalOutput;
+        arc.label = END_LABEL;
+        return arc;
+      } else {
+        return null;
+      }
+    }
+
+    if (!targetHasArcs(follow)) {
+      return null;
+    }
+
     // TODO: maybe make an explicit thread state that holds
     // reusable stuff eg BytesReader:
-    final BytesReader in = new BytesReader(address);
+    final BytesReader in = getBytesReader(follow.target);
 
     if ((in.readByte() & BIT_ARCS_AS_FIXED_ARRAY) != 0) {
       // Arcs are full array; do binary search:
-      //System.out.println("findArc: array label=" + labelToMatch);
       arc.numArcs = in.readVInt();
       arc.bytesPerArc = in.readByte() & 0xFF;
       arc.posArcsStart = in.pos;
@@ -548,19 +649,20 @@ public class FST<T> {
           high = mid - 1;
         else {
           arc.arcIdx = mid-1;
-          return readNextArc(arc);
+          return readNextRealArc(arc);
         }
       }
 
       return null;
     }
-    //System.out.println("findArc: scan");
-
-    readFirstArc(address, arc);
 
+    // Linear scan
+    readFirstTargetArc(follow, arc);
     while(true) {
       if (arc.label == labelToMatch) {
         return arc;
+      } else if (arc.label > labelToMatch) {
+        return null;
       } else if (arc.isLast()) {
         return null;
       } else {
@@ -569,191 +671,6 @@ public class FST<T> {
     }
   }
 
-  /** Looks up the output for this input, or null if the
-   *  input is not accepted. FST must be
-   *  INPUT_TYPE.BYTE4. */
-  public T get(IntsRef input) throws IOException {
-    assert inputType == INPUT_TYPE.BYTE4;
-
-    if (input.length == 0) {
-      return getEmptyOutput();
-    }
-
-    // TODO: would be nice not to alloc this on every lookup
-    final FST.Arc<T> arc = new FST.Arc<T>();
-    int node = getStartNode();
-    T output = NO_OUTPUT;
-    for(int i=0;i<input.length;i++) {
-      if (!hasArcs(node)) {
-        // hit end of FST before input end
-        return null;
-      }
-
-      if (findArc(node, input.ints[input.offset + i], arc) != null) {
-        node = arc.target;
-        if (arc.output != NO_OUTPUT) {
-          output = outputs.add(output, arc.output);
-        }
-      } else {
-        return null;
-      }
-    }
-
-    if (!arc.isFinal()) {
-      // hit input's end before end node
-      return null;
-    }
-
-    if (arc.nextFinalOutput != NO_OUTPUT) {
-      output = outputs.add(output, arc.nextFinalOutput);
-    }
-
-    return output;
-  }
-
-  /** Logically casts input to UTF32 ints then looks up the output
-   *  or null if the input is not accepted.  FST must be
-   *  INPUT_TYPE.BYTE4.  */
-  public T get(char[] input, int offset, int length) throws IOException {
-    assert inputType == INPUT_TYPE.BYTE4;
-
-    if (length == 0) {
-      return getEmptyOutput();
-    }
-
-    // TODO: would be nice not to alloc this on every lookup
-    final FST.Arc<T> arc = new FST.Arc<T>();
-    int node = getStartNode();
-    int charIdx = offset;
-    final int charLimit = offset + length;
-    T output = NO_OUTPUT;
-    while(charIdx < charLimit) {
-      if (!hasArcs(node)) {
-        // hit end of FST before input end
-        return null;
-      }
-
-      final int utf32 = Character.codePointAt(input, charIdx);
-      charIdx += Character.charCount(utf32);
-
-      if (findArc(node, utf32, arc) != null) {
-        node = arc.target;
-        if (arc.output != NO_OUTPUT) {
-          output = outputs.add(output, arc.output);
-        }
-      } else {
-        return null;
-      }
-    }
-
-    if (!arc.isFinal()) {
-      // hit input's end before end node
-      return null;
-    }
-
-    if (arc.nextFinalOutput != NO_OUTPUT) {
-      output = outputs.add(output, arc.nextFinalOutput);
-    }
-
-    return output;
-  }
-
-
-  /** Logically casts input to UTF32 ints then looks up the output
-   *  or null if the input is not accepted.  FST must be
-   *  INPUT_TYPE.BYTE4.  */
-  public T get(CharSequence input) throws IOException {
-    assert inputType == INPUT_TYPE.BYTE4;
-
-    final int len = input.length();
-    if (len == 0) {
-      return getEmptyOutput();
-    }
-
-    // TODO: would be nice not to alloc this on every lookup
-    final FST.Arc<T> arc = new FST.Arc<T>();
-    int node = getStartNode();
-    int charIdx = 0;
-    final int charLimit = input.length();
-    T output = NO_OUTPUT;
-    while(charIdx < charLimit) {
-      if (!hasArcs(node)) {
-        // hit end of FST before input end
-        return null;
-      }
-
-      final int utf32 = Character.codePointAt(input, charIdx);
-      charIdx += Character.charCount(utf32);
-
-      if (findArc(node, utf32, arc) != null) {
-        node = arc.target;
-        if (arc.output != NO_OUTPUT) {
-          output = outputs.add(output, arc.output);
-        }
-      } else {
-        return null;
-      }
-    }
-
-    if (!arc.isFinal()) {
-      // hit input's end before end node
-      return null;
-    }
-
-    if (arc.nextFinalOutput != NO_OUTPUT) {
-      output = outputs.add(output, arc.nextFinalOutput);
-    }
-
-    return output;
-  }
-
-  /** Looks up the output for this input, or null if the
-   *  input is not accepted */
-  public T get(BytesRef input) throws IOException {
-    assert inputType == INPUT_TYPE.BYTE1;
-
-    if (input.length == 0) {
-      return getEmptyOutput();
-    }
-
-    // TODO: would be nice not to alloc this on every lookup
-    final FST.Arc<T> arc = new FST.Arc<T>();
-    int node = getStartNode();
-    T output = NO_OUTPUT;
-    for(int i=0;i<input.length;i++) {
-      if (!hasArcs(node)) {
-        // hit end of FST before input end
-        return null;
-      }
-
-      if (findArc(node, input.bytes[i+input.offset], arc) != null) {
-        node = arc.target;
-        if (arc.output != NO_OUTPUT) {
-          output = outputs.add(output, arc.output);
-        }
-      } else {
-        return null;
-      }
-    }
-
-    if (!arc.isFinal()) {
-      // hit input's end before end node
-      return null;
-    }
-
-    if (arc.nextFinalOutput != NO_OUTPUT) {
-      output = outputs.add(output, arc.nextFinalOutput);
-    }
-
-    return output;
-  }
-
-  /** Returns true if this FST has no nodes */
-  public boolean noNodes() {
-    //System.out.println("isempty startNode=" + startNode);
-    return startNode == 0;
-  }
-
   private void seekToNextNode(BytesReader in) throws IOException {
 
     while(true) {
@@ -779,85 +696,6 @@ public class FST<T> {
     }
   }
 
-  // NOTE: this consumes alot of RAM!
-  // final arcs have a flat end (not arrow)
-  // arcs w/ NEXT opto are in blue
-  /*
-    eg:
-      PrintStream ps = new PrintStream("out.dot");
-      fst.toDot(ps);
-      ps.close();
-      System.out.println("SAVED out.dot");
-      
-    then dot -Tpng out.dot > /x/tmp/out.png
-  */
-  public void toDot(PrintStream out) throws IOException {
-
-    final List<Integer> queue = new ArrayList<Integer>();
-    queue.add(startNode);
-
-    final Set<Integer> seen = new HashSet<Integer>();
-    seen.add(startNode);
-    
-    out.println("digraph FST {");
-    out.println("  rankdir = LR;");
-    //out.println("  " + startNode + " [shape=circle label=" + startNode + "];");
-    out.println("  " + startNode + " [label=\"\" shape=circle];");
-    out.println("  initial [shape=point color=white label=\"\"];");
-    if (emptyOutput != null) {
-      out.println("  initial -> " + startNode + " [arrowhead=tee label=\"(" + outputs.outputToString(emptyOutput) + ")\"];");
-    } else {
-      out.println("  initial -> " + startNode);
-    }
-
-    final Arc<T> arc = new Arc<T>();
-
-    while(queue.size() != 0) {
-      Integer node = queue.get(queue.size()-1);
-      queue.remove(queue.size()-1);
-
-      if (node == FINAL_END_NODE || node == NON_FINAL_END_NODE) {
-        continue;
-      }
-
-      // scan all arcs
-      readFirstArc(node, arc);
-      while(true) {
-
-        if (!seen.contains(arc.target)) {
-          //out.println("  " + arc.target + " [label=" + arc.target + "];");
-          out.println("  " + arc.target + " [label=\"\" shape=circle];");
-          seen.add(arc.target);
-          queue.add(arc.target);
-        }
-        String outs;
-        if (arc.output != NO_OUTPUT) {
-          outs = "/" + outputs.outputToString(arc.output);
-        } else {
-          outs = "";
-        }
-        if (arc.isFinal() && arc.nextFinalOutput != NO_OUTPUT) {
-          outs += " (" + outputs.outputToString(arc.nextFinalOutput) + ")";
-        }
-        out.print("  " + node + " -> " + arc.target + " [label=\"" + arc.label + outs + "\"");
-        if (arc.isFinal()) {
-          out.print(" arrowhead=tee");
-        }
-        if (arc.flag(BIT_TARGET_NEXT)) {
-          out.print(" color=blue");
-        }
-        out.println("];");
-        
-        if (arc.isLast()) {
-          break;
-        } else {
-          readNextArc(arc);
-        }
-      }
-    }
-    out.println("}");
-  }
-  
   public int getNodeCount() {
     // 1+ in order to count the -1 implicit final node
     return 1+nodeCount;
@@ -872,7 +710,7 @@ public class FST<T> {
   }
 
   // Non-static: writes to FST's byte[]
-  private class BytesWriter extends DataOutput {
+  class BytesWriter extends DataOutput {
     int posWrite;
 
     public BytesWriter() {
@@ -899,8 +737,13 @@ public class FST<T> {
     }
   }
 
+  final BytesReader getBytesReader(int pos) {
+    // TODO: maybe re-use via ThreadLocal?
+    return new BytesReader(pos);
+  }
+
   // Non-static: reads byte[] from FST
-  private class BytesReader extends DataInput {
+  class BytesReader extends DataInput {
     int pos;
 
     public BytesReader(int pos) {

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/IntsRefFSTEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/IntsRefFSTEnum.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/IntsRefFSTEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/IntsRefFSTEnum.java Thu Jan 13 02:09:33 2011
@@ -17,9 +17,7 @@ package org.apache.lucene.util.automaton
  * limitations under the License.
  */
 
-import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.RamUsageEstimator;
 
 import java.io.IOException;
 
@@ -27,163 +25,23 @@ import java.io.IOException;
   * @lucene.experimental
 */
 
-public class IntsRefFSTEnum<T> {
-  private final FST<T> fst;
-
-  private IntsRef current = new IntsRef(10);
-  @SuppressWarnings("unchecked") private FST.Arc<T>[] arcs = new FST.Arc[10];
-  // outputs are cumulative
-  @SuppressWarnings("unchecked") private T[] output = (T[]) new Object[10];
-
-  private boolean lastFinal;
-  private boolean didEmpty;
-  private final T NO_OUTPUT;
+public final class IntsRefFSTEnum<T> extends FSTEnum<T> {
+  private final IntsRef current = new IntsRef(10);
   private final InputOutput<T> result = new InputOutput<T>();
+  private IntsRef target;
 
   public static class InputOutput<T> {
     public IntsRef input;
     public T output;
   }
-  
+
+  /** doFloor controls the behavior of advance: if it's true
+   *  doFloor is true, advance positions to the biggest
+   *  term before target.  */
   public IntsRefFSTEnum(FST<T> fst) {
-    this.fst = fst;
+    super(fst);
     result.input = current;
-    NO_OUTPUT = fst.outputs.getNoOutput();
-  }
-
-  public void reset() {
-    lastFinal = false;
-    didEmpty = false;
-    current.length = 0;
-    result.output = NO_OUTPUT;
-  }
-
-  /** NOTE: target must be >= where we are already
-   *  positioned */
-  public InputOutput<T> advance(IntsRef target) throws IOException {
-
-    assert target.compareTo(current) >= 0;
-
-    //System.out.println("    advance len=" + target.length + " curlen=" + current.length);
-
-    // special case empty string
-    if (current.length == 0) {
-      if (target.length == 0) {
-        final T output = fst.getEmptyOutput();      
-        if (output != null) {
-          if (!didEmpty) {
-            current.length = 0;
-            lastFinal = true;
-            result.output = output;
-            didEmpty = true;
-          }
-          return result;
-        } else {
-          return next();
-        }
-      }
-      
-      if (fst.noNodes()) {
-        return null;
-      }
-    }
-
-    // TODO: possibly caller could/should provide common
-    // prefix length?  ie this work may be redundant if
-    // caller is in fact intersecting against its own
-    // automaton
-
-    // what prefix does target share w/ current
-    int idx = 0;
-    while (idx < current.length && idx < target.length) {
-      if (current.ints[idx] != target.ints[target.offset + idx]) {
-        break;
-      }
-      idx++;
-    }
-
-    //System.out.println("  shared " + idx);
-
-    FST.Arc<T> arc;
-    if (current.length == 0) {
-      // new enum (no seek/next yet)
-      arc = fst.readFirstArc(fst.getStartNode(), getArc(0));
-      //System.out.println("  new enum");
-    } else if (idx < current.length) {
-      // roll back to shared point
-      lastFinal = false;
-      current.length = idx;
-      arc = arcs[idx];
-      if (arc.isLast()) {
-        if (idx == 0) {
-          return null;
-        } else {
-          return next();
-        }
-      }
-      arc = fst.readNextArc(arc);
-    } else if (idx == target.length) {
-      // degenerate case -- seek to term we are already on
-      assert target.equals(current);
-      return result;
-    } else {
-      // current is a full prefix of target
-      if (lastFinal) {
-        arc = fst.readFirstArc(arcs[current.length-1].target, getArc(current.length));
-      } else {
-        return next();
-      }
-    }
-
-    lastFinal = false;
-
-    assert arc == arcs[current.length];
-    int targetLabel = target.ints[target.offset+current.length];
-
-    while(true) {
-      //System.out.println("    cycle len=" + current.length + " target=" + ((char) targetLabel) + " vs " + ((char) arc.label));
-      if (arc.label == targetLabel) {
-        grow();
-        current.ints[current.length] = arc.label;
-        appendOutput(arc.output);
-        current.length++;
-        grow();
-        if (current.length == target.length) {
-          result.output = output[current.length-1];
-          if (arc.isFinal()) {
-            // target is exact match
-            if (fst.hasArcs(arc.target)) {
-              // target is also a proper prefix of other terms
-              lastFinal = true;
-              appendFinalOutput(arc.nextFinalOutput);
-            }
-          } else {
-            // target is not a match but is a prefix of
-            // other terms
-            current.length--;
-            push();
-          }
-          return result;
-        } else if (!fst.hasArcs(arc.target)) {
-          // we only match a prefix of the target
-          return next();
-        } else {
-          targetLabel = target.ints[target.offset+current.length];
-          arc = fst.readFirstArc(arc.target, getArc(current.length));
-        }
-      } else if (arc.label > targetLabel) {
-        // we are now past the target
-        push();
-        return result;
-      } else if (arc.isLast()) {
-        if (current.length == 0) {
-          return null;
-        }
-        return next();
-      } else {
-        arc = fst.readNextArc(getArc(current.length));
-      }
-    }
+    current.offset = 1;
   }
 
   public InputOutput<T> current() {
@@ -192,124 +50,58 @@ public class IntsRefFSTEnum<T> {
 
   public InputOutput<T> next() throws IOException {
     //System.out.println("  enum.next");
-
-    if (current.length == 0) {
-      final T output = fst.getEmptyOutput();
-      if (output != null) {
-        if (!didEmpty) {
-          current.length = 0;
-          lastFinal = true;
-          result.output = output;
-          didEmpty = true;
-          return result;
-        } else {
-          lastFinal = false;
-        }
-      }
-      if (fst.noNodes()) {
-        return null;
-      }
-      fst.readFirstArc(fst.getStartNode(), getArc(0));
-      push();
-    } else if (lastFinal) {
-      lastFinal = false;
-      assert current.length > 0;
-      // resume pushing
-      fst.readFirstArc(arcs[current.length-1].target, getArc(current.length));
-      push();
-    } else {
-      //System.out.println("    pop/push");
-      pop();
-      if (current.length == 0) {
-        // enum done
-        return null;
-      } else {
-        current.length--;
-        fst.readNextArc(arcs[current.length]);
-        push();
-      }
-    }
-
-    return result;
+    doNext();
+    return setResult();
   }
 
-  private void grow() {
-    final int l = current.length + 1;
-    current.grow(l);
-    if (arcs.length < l) {
-      @SuppressWarnings("unchecked") final FST.Arc<T>[] newArcs =
-        new FST.Arc[ArrayUtil.oversize(l, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
-      System.arraycopy(arcs, 0, newArcs, 0, arcs.length);
-      arcs = newArcs;
-    }
-    if (output.length < l) {
-      @SuppressWarnings("unchecked") final T[] newOutput =
-        (T[]) new Object[ArrayUtil.oversize(l, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
-      System.arraycopy(output, 0, newOutput, 0, output.length);
-      output = newOutput;
-    }
-  }
-
-  private void appendOutput(T addedOutput) {
-    T newOutput;
-    if (current.length == 0) {
-      newOutput = addedOutput;
-    } else if (addedOutput == NO_OUTPUT) {
-      output[current.length] = output[current.length-1];
-      return;
+  /** Seeks to smallest term that's >= target. */
+  public InputOutput<T> seekCeil(IntsRef target) throws IOException {
+    this.target = target;
+    targetLength = target.length;
+    super.doSeekCeil();
+    return setResult();
+  }
+
+  /** Seeks to biggest term that's <= target. */
+  public InputOutput<T> seekFloor(IntsRef target) throws IOException {
+    this.target = target;
+    targetLength = target.length;
+    super.doSeekFloor();
+    return setResult();
+  }
+
+  @Override
+  protected int getTargetLabel() {
+    if (upto-1 == target.length) {
+      return FST.END_LABEL;
     } else {
-      newOutput = fst.outputs.add(output[current.length-1], addedOutput);
+      return target.ints[target.offset + upto - 1];
     }
-    output[current.length] = newOutput;
   }
 
-  private void appendFinalOutput(T addedOutput) {
-    if (current.length == 0) {
-      result.output = addedOutput;
-    } else {
-      result.output = fst.outputs.add(output[current.length-1], addedOutput);
-    }
+  @Override
+  protected int getCurrentLabel() {
+    // current.offset fixed at 1
+    return current.ints[upto];
   }
 
-  private void push() throws IOException {
-
-    FST.Arc<T> arc = arcs[current.length];
-    assert arc != null;
-
-    while(true) {
-      grow();
-      
-      current.ints[current.length] = arc.label;
-      appendOutput(arc.output);
-      //System.out.println("    push: append label=" + ((char) arc.label) + " output=" + fst.outputs.outputToString(arc.output));
-      current.length++;
-      grow();
-
-      if (!fst.hasArcs(arc.target)) {
-        break;
-      }
-
-      if (arc.isFinal()) {
-        appendFinalOutput(arc.nextFinalOutput);
-        lastFinal = true;
-        return;
-      }
-
-      arc = fst.readFirstArc(arc.target, getArc(current.length));
-    }
-    result.output = output[current.length-1];
+  @Override
+  protected void setCurrentLabel(int label) {
+    current.ints[upto] = label;
   }
 
-  private void pop() {
-    while (current.length > 0 && arcs[current.length-1].isLast()) {
-      current.length--;
-    }
+  @Override
+  protected void grow() {
+    current.grow(upto+1);
   }
 
-  private FST.Arc<T> getArc(int idx) {
-    if (arcs[idx] == null) {
-      arcs[idx] = new FST.Arc<T>();
+  private InputOutput<T> setResult() {
+    if (upto == 0) {
+      return null;
+    } else {
+      current.length = upto-1;
+      result.output = output[upto];
+      return result;
     }
-    return arcs[idx];
   }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java Thu Jan 13 02:09:33 2011
@@ -35,7 +35,7 @@ final class NodeHash<T> {
   }
 
   private boolean nodesEqual(Builder.UnCompiledNode<T> node, int address) throws IOException {
-    fst.readFirstArc(address, scratchArc);
+    fst.readFirstRealArc(address, scratchArc);
     if (scratchArc.bytesPerArc != 0 && node.numArcs != scratchArc.numArcs) {
       return false;
     }
@@ -56,7 +56,7 @@ final class NodeHash<T> {
           return false;
         }
       }
-      fst.readNextArc(scratchArc);
+      fst.readNextRealArc(scratchArc);
     }
 
     return false;
@@ -89,7 +89,7 @@ final class NodeHash<T> {
     final int PRIME = 31;
     //System.out.println("hash frozen");
     int h = 0;
-    fst.readFirstArc(node, scratchArc);
+    fst.readFirstRealArc(node, scratchArc);
     while(true) {
       //System.out.println("  label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal());
       h = PRIME * h + scratchArc.label;
@@ -102,7 +102,7 @@ final class NodeHash<T> {
       if (scratchArc.isLast()) {
         break;
       }
-      fst.readNextArc(scratchArc);
+      fst.readNextRealArc(scratchArc);
     }
     //System.out.println("  ret " + (h&Integer.MAX_VALUE));
     return h & Integer.MAX_VALUE;

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java Thu Jan 13 02:09:33 2011
@@ -94,7 +94,7 @@ public class TestSearch extends LuceneTe
       }
       writer.close();
 
-      Searcher searcher = new IndexSearcher(directory, true);
+      IndexSearcher searcher = new IndexSearcher(directory, true);
 
       String[] queries = {
         "a b",

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java Thu Jan 13 02:09:33 2011
@@ -102,7 +102,7 @@ public class TestSearchForDuplicates ext
       writer.close();
 
       // try a search without OR
-      Searcher searcher = new IndexSearcher(directory, true);
+      IndexSearcher searcher = new IndexSearcher(directory, true);
 
       QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, PRIORITY_FIELD, analyzer);
 
@@ -133,7 +133,7 @@ public class TestSearchForDuplicates ext
   }
 
 
-  private void printHits(PrintWriter out, ScoreDoc[] hits, Searcher searcher ) throws IOException {
+  private void printHits(PrintWriter out, ScoreDoc[] hits, IndexSearcher searcher) throws IOException {
     out.println(hits.length + " total results\n");
     for (int i = 0 ; i < hits.length; i++) {
       if ( i < 10 || (i > 94 && i < 105) ) {
@@ -143,11 +143,11 @@ public class TestSearchForDuplicates ext
     }
   }
 
-  private void checkHits(ScoreDoc[] hits, int expectedCount, Searcher searcher) throws IOException {
+  private void checkHits(ScoreDoc[] hits, int expectedCount, IndexSearcher searcher) throws IOException {
     assertEquals("total results", expectedCount, hits.length);
     for (int i = 0 ; i < hits.length; i++) {
       if ( i < 10 || (i > 94 && i < 105) ) {
-      Document d = searcher.doc(hits[i].doc);
+        Document d = searcher.doc(hits[i].doc);
         assertEquals("check " + i, String.valueOf(i), d.get(ID_FIELD));
       }
     }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/document/TestDocument.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/document/TestDocument.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/document/TestDocument.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/document/TestDocument.java Thu Jan 13 02:09:33 2011
@@ -6,7 +6,6 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
@@ -157,7 +156,7 @@ public class TestDocument extends Lucene
     writer.addDocument(makeDocumentWithFields());
     IndexReader reader = writer.getReader();
     
-    Searcher searcher = new IndexSearcher(reader);
+    IndexSearcher searcher = new IndexSearcher(reader);
     
     // search for something that does exists
     Query query = new TermQuery(new Term("keyword", "test1"));
@@ -239,7 +238,7 @@ public class TestDocument extends Lucene
     writer.addDocument(doc);
     
     IndexReader reader = writer.getReader();
-    Searcher searcher = new IndexSearcher(reader);
+    IndexSearcher searcher = new IndexSearcher(reader);
     
     Query query = new TermQuery(new Term("keyword", "test"));
     

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/RandomIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/RandomIndexWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/RandomIndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/RandomIndexWriter.java Thu Jan 13 02:09:33 2011
@@ -87,6 +87,7 @@ public class RandomIndexWriter implement
     if (LuceneTestCase.VERBOSE) {
       System.out.println("RIW config=" + w.getConfig());
       System.out.println("codec default=" + w.getConfig().getCodecProvider().getDefaultFieldCodec());
+      w.setInfoStream(System.out);
     }
   } 
 

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java Thu Jan 13 02:09:33 2011
@@ -143,6 +143,12 @@ public class Test2BTerms extends LuceneT
             setMergePolicy(newLogMergePolicy(false, 10))
     );
 
+    MergePolicy mp = w.getConfig().getMergePolicy();
+    if (mp instanceof LogByteSizeMergePolicy) {
+      // 1 petabyte:
+      ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024);
+    }
+
     Document doc = new Document();
     Field field = new Field("field", new MyTokenStream(TERMS_PER_DOC));
     field.setOmitTermFreqAndPositions(true);

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Thu Jan 13 02:09:33 2011
@@ -360,7 +360,7 @@ public class TestBackwardsCompatibility 
 
     // First document should be #21 since it's norm was
     // increased:
-    Document d = searcher.doc(hits[0].doc);
+    Document d = searcher.getIndexReader().document(hits[0].doc);
     assertEquals("didn't get the right document first", "21", d.get("id"));
 
     doTestHits(hits, 34, searcher.getIndexReader());
@@ -408,7 +408,7 @@ public class TestBackwardsCompatibility 
     // make sure searching sees right # hits
     IndexSearcher searcher = new IndexSearcher(dir, true);
     ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
-    Document d = searcher.doc(hits[0].doc);
+    Document d = searcher.getIndexReader().document(hits[0].doc);
     assertEquals("wrong first document", "21", d.get("id"));
     doTestHits(hits, 44, searcher.getIndexReader());
     searcher.close();

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestCodecs.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestCodecs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestCodecs.java Thu Jan 13 02:09:33 2011
@@ -23,14 +23,15 @@ import java.util.HashSet;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.index.codecs.FieldsConsumer;
 import org.apache.lucene.index.codecs.FieldsProducer;
 import org.apache.lucene.index.codecs.PostingsConsumer;
 import org.apache.lucene.index.codecs.TermsConsumer;
 import org.apache.lucene.index.codecs.mocksep.MockSepCodec;
+import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.PhraseQuery;
@@ -40,6 +41,7 @@ import org.apache.lucene.store.Directory
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.Version;
+import org.apache.lucene.util._TestUtil;
 
 // TODO: test multiple codecs here?
 
@@ -68,43 +70,6 @@ public class TestCodecs extends LuceneTe
   private final static int DOC_FREQ_RAND = 500; // must be > 16 to test skipping
   private final static int TERM_DOC_FREQ_RAND = 20;
 
-  // start is inclusive and end is exclusive
-  public int nextInt(final int start, final int end) {
-    return start + random.nextInt(end-start);
-  }
-
-  private int nextInt(final int lim) {
-    return random.nextInt(lim);
-  }
-
-  char[] getRandomText() {
-
-    final int len = 1+this.nextInt(10);
-    final char[] buffer = new char[len+1];
-    for(int i=0;i<len;i++) {
-      buffer[i] = (char) this.nextInt(97, 123);
-      /*
-      final int t = nextInt(5);
-      if (0 == t && i < len-1) {
-        // Make a surrogate pair
-        // High surrogate
-        buffer[i++] = (char) nextInt(0xd800, 0xdc00);
-        // Low surrogate
-        buffer[i] = (char) nextInt(0xdc00, 0xe000);
-      } else if (t <= 1)
-        buffer[i] = (char) nextInt(0x80);
-      else if (2 == t)
-        buffer[i] = (char) nextInt(0x80, 0x800);
-      else if (3 == t)
-        buffer[i] = (char) nextInt(0x800, 0xd800);
-      else
-        buffer[i] = (char) nextInt(0xe000, 0xffff);
-    */
-    }
-    buffer[len] = 0xffff;
-    return buffer;
-  }
-
   class FieldData implements Comparable {
     final FieldInfo fieldInfo;
     final TermData[] terms;
@@ -163,7 +128,7 @@ public class TestCodecs extends LuceneTe
     }
 
     public int compareTo(final Object o) {
-      return text2.compareTo(((TermData) o).text2);
+      return text.compareTo(((TermData) o).text);
     }
 
     public void write(final TermsConsumer termsConsumer) throws Throwable {
@@ -191,7 +156,7 @@ public class TestCodecs extends LuceneTe
   final private static String SEGMENT = "0";
 
   TermData[] makeRandomTerms(final boolean omitTF, final boolean storePayloads) {
-    final int numTerms = 1+this.nextInt(NUM_TERMS_RAND);
+    final int numTerms = 1+random.nextInt(NUM_TERMS_RAND);
     //final int numTerms = 2;
     final TermData[] terms = new TermData[numTerms];
 
@@ -200,18 +165,16 @@ public class TestCodecs extends LuceneTe
     for(int i=0;i<numTerms;i++) {
 
       // Make term text
-      char[] text;
       String text2;
       while(true) {
-        text = this.getRandomText();
-        text2 = new String(text, 0, text.length-1);
-        if (!termsSeen.contains(text2)) {
+        text2 = _TestUtil.randomUnicodeString(random);
+        if (!termsSeen.contains(text2) && !text2.endsWith(".")) {
           termsSeen.add(text2);
           break;
         }
       }
 
-      final int docFreq = 1+this.nextInt(DOC_FREQ_RAND);
+      final int docFreq = 1+random.nextInt(DOC_FREQ_RAND);
       final int[] docs = new int[docFreq];
       PositionData[][] positions;
 
@@ -222,21 +185,21 @@ public class TestCodecs extends LuceneTe
 
       int docID = 0;
       for(int j=0;j<docFreq;j++) {
-        docID += this.nextInt(1, 10);
+        docID += _TestUtil.nextInt(random, 1, 10);
         docs[j] = docID;
 
         if (!omitTF) {
-          final int termFreq = 1+this.nextInt(TERM_DOC_FREQ_RAND);
+          final int termFreq = 1+random.nextInt(TERM_DOC_FREQ_RAND);
           positions[j] = new PositionData[termFreq];
           int position = 0;
           for(int k=0;k<termFreq;k++) {
-            position += this.nextInt(1, 10);
+            position += _TestUtil.nextInt(random, 1, 10);
 
             final BytesRef payload;
-            if (storePayloads && this.nextInt(4) == 0) {
-              final byte[] bytes = new byte[1+this.nextInt(5)];
+            if (storePayloads && random.nextInt(4) == 0) {
+              final byte[] bytes = new byte[1+random.nextInt(5)];
               for(int l=0;l<bytes.length;l++) {
-                bytes[l] = (byte) this.nextInt(255);
+                bytes[l] = (byte) random.nextInt(255);
               }
               payload = new BytesRef(bytes);
             } else {
@@ -269,7 +232,7 @@ public class TestCodecs extends LuceneTe
     final FieldData[] fields = new FieldData[] {field};
 
     final Directory dir = newDirectory();
-    this.write(fieldInfos, dir, fields);
+    this.write(fieldInfos, dir, fields, true);
     final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, true, SegmentCodecs.build(fieldInfos, CodecProvider.getDefault()), fieldInfos.hasVectors());
     si.setHasProx(false);
 
@@ -317,19 +280,25 @@ public class TestCodecs extends LuceneTe
 
     final Directory dir = newDirectory();
 
-    this.write(fieldInfos, dir, fields);
+    if (VERBOSE) {
+      System.out.println("TEST: now write postings");
+    }
+    this.write(fieldInfos, dir, fields, false);
     final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, true, SegmentCodecs.build(fieldInfos, CodecProvider.getDefault()), fieldInfos.hasVectors());
 
+    if (VERBOSE) {
+      System.out.println("TEST: now read postings");
+    }
     final FieldsProducer terms = si.getSegmentCodecs().codec().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 1024, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR));
 
     final Verify[] threads = new Verify[NUM_TEST_THREADS-1];
     for(int i=0;i<NUM_TEST_THREADS-1;i++) {
-      threads[i] = new Verify(fields, terms);
+      threads[i] = new Verify(si, fields, terms);
       threads[i].setDaemon(true);
       threads[i].start();
     }
 
-    new Verify(fields, terms).run();
+    new Verify(si, fields, terms).run();
 
     for(int i=0;i<NUM_TEST_THREADS-1;i++) {
       threads[i].join();
@@ -409,11 +378,13 @@ public class TestCodecs extends LuceneTe
   private class Verify extends Thread {
     final Fields termsDict;
     final FieldData[] fields;
+    final SegmentInfo si;
     volatile boolean failed;
 
-    Verify(final FieldData[] fields, final Fields termsDict) {
+    Verify(final SegmentInfo si, final FieldData[] fields, final Fields termsDict) {
       this.fields = fields;
       this.termsDict = termsDict;
+      this.si = si;
     }
 
     @Override
@@ -446,7 +417,7 @@ public class TestCodecs extends LuceneTe
         assertEquals(positions[i].pos, pos);
         if (positions[i].payload != null) {
           assertTrue(posEnum.hasPayload());
-          if (TestCodecs.this.nextInt(3) < 2) {
+          if (TestCodecs.random.nextInt(3) < 2) {
             // Verify the payload bytes
             final BytesRef otherPayload = posEnum.getPayload();
             assertTrue("expected=" + positions[i].payload.toString() + " got=" + otherPayload.toString(), positions[i].payload.equals(otherPayload));
@@ -460,22 +431,28 @@ public class TestCodecs extends LuceneTe
     public void _run() throws Throwable {
 
       for(int iter=0;iter<NUM_TEST_ITER;iter++) {
-        final FieldData field = fields[TestCodecs.this.nextInt(fields.length)];
+        final FieldData field = fields[TestCodecs.random.nextInt(fields.length)];
         final TermsEnum termsEnum = termsDict.terms(field.fieldInfo.name).iterator();
 
-        // Test straight enum of the terms:
+        if (si.getSegmentCodecs().codecs[field.fieldInfo.codecId] instanceof PreFlexCodec) {
+          // code below expects unicode sort order
+          continue;
+        }
+
         int upto = 0;
+        // Test straight enum of the terms:
         while(true) {
           final BytesRef term = termsEnum.next();
           if (term == null) {
             break;
           }
-          assertTrue(new BytesRef(field.terms[upto++].text2).bytesEquals(term));
+          final BytesRef expected = new BytesRef(field.terms[upto++].text2);
+          assertTrue("expected=" + expected + " vs actual " + term, expected.bytesEquals(term));
         }
         assertEquals(upto, field.terms.length);
 
         // Test random seek:
-        TermData term = field.terms[TestCodecs.this.nextInt(field.terms.length)];
+        TermData term = field.terms[TestCodecs.random.nextInt(field.terms.length)];
         TermsEnum.SeekStatus status = termsEnum.seek(new BytesRef(term.text2));
         assertEquals(status, TermsEnum.SeekStatus.FOUND);
         assertEquals(term.docs.length, termsEnum.docFreq());
@@ -486,7 +463,7 @@ public class TestCodecs extends LuceneTe
         }
 
         // Test random seek by ord:
-        final int idx = TestCodecs.this.nextInt(field.terms.length);
+        final int idx = TestCodecs.random.nextInt(field.terms.length);
         term = field.terms[idx];
         try {
           status = termsEnum.seek(idx);
@@ -507,8 +484,7 @@ public class TestCodecs extends LuceneTe
 
         // Test seek to non-existent terms:
         for(int i=0;i<100;i++) {
-          final char[] text = TestCodecs.this.getRandomText();
-          final String text2 = new String(text, 0, text.length-1) + ".";
+          final String text2 = _TestUtil.randomUnicodeString(random) + ".";
           status = termsEnum.seek(new BytesRef(text2));
           assertTrue(status == TermsEnum.SeekStatus.NOT_FOUND ||
                      status == TermsEnum.SeekStatus.END);
@@ -533,7 +509,7 @@ public class TestCodecs extends LuceneTe
         // Seek to non-existent empty-string term
         status = termsEnum.seek(new BytesRef(""));
         assertNotNull(status);
-        assertEquals(status, TermsEnum.SeekStatus.NOT_FOUND);
+        //assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status);
 
         // Make sure we're now pointing to first term
         assertTrue(termsEnum.term().bytesEquals(new BytesRef(field.terms[0].text2)));
@@ -543,7 +519,7 @@ public class TestCodecs extends LuceneTe
         upto = 0;
         do {
           term = field.terms[upto];
-          if (TestCodecs.this.nextInt(3) == 1) {
+          if (TestCodecs.random.nextInt(3) == 1) {
             final DocsEnum docs = termsEnum.docs(null, null);
             final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(null, null);
 
@@ -558,10 +534,10 @@ public class TestCodecs extends LuceneTe
               // Maybe skip:
               final int left = term.docs.length-upto2;
               int doc;
-              if (TestCodecs.this.nextInt(3) == 1 && left >= 1) {
-                final int inc = 1+TestCodecs.this.nextInt(left-1);
+              if (TestCodecs.random.nextInt(3) == 1 && left >= 1) {
+                final int inc = 1+TestCodecs.random.nextInt(left-1);
                 upto2 += inc;
-                if (TestCodecs.this.nextInt(2) == 1) {
+                if (TestCodecs.random.nextInt(2) == 1) {
                   doc = docsEnum.advance(term.docs[upto2]);
                   assertEquals(term.docs[upto2], doc);
                 } else {
@@ -586,7 +562,7 @@ public class TestCodecs extends LuceneTe
               assertEquals(term.docs[upto2], doc);
               if (!field.omitTF) {
                 assertEquals(term.positions[upto2].length, docsEnum.freq());
-                if (TestCodecs.this.nextInt(2) == 1) {
+                if (TestCodecs.random.nextInt(2) == 1) {
                   this.verifyPositions(term.positions[upto2], postings);
                 }
               }
@@ -603,15 +579,19 @@ public class TestCodecs extends LuceneTe
     }
   }
 
-  private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields) throws Throwable {
+  private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields, boolean allowPreFlex) throws Throwable {
 
-    final int termIndexInterval = this.nextInt(13, 27);
+    final int termIndexInterval = _TestUtil.nextInt(random, 13, 27);
     final SegmentCodecs codecInfo = SegmentCodecs.build(fieldInfos, CodecProvider.getDefault());
     final SegmentWriteState state = new SegmentWriteState(null, dir, SEGMENT, fieldInfos, 10000, termIndexInterval, codecInfo);
 
     final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state);
     Arrays.sort(fields);
     for (final FieldData field : fields) {
+      if (!allowPreFlex && codecInfo.codecs[field.fieldInfo.codecId] instanceof PreFlexCodec) {
+        // code below expects unicode sort order
+        continue;
+      }
       field.write(consumer);
     }
     consumer.close();

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDoc.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDoc.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDoc.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDoc.java Thu Jan 13 02:09:33 2011
@@ -201,11 +201,12 @@ public class TestDoc extends LuceneTestC
       r2.close();
       
       final SegmentInfo info = new SegmentInfo(merged, si1.docCount + si2.docCount, si1.dir,
-                                               useCompoundFile, merger.fieldInfos().hasProx(), merger.getSegmentCodecs(),
+                                               false, merger.fieldInfos().hasProx(), merger.getSegmentCodecs(),
                                                merger.fieldInfos().hasVectors());
       
       if (useCompoundFile) {
         Collection<String> filesToDelete = merger.createCompoundFile(merged + ".cfs", info);
+        info.setUseCompoundFile(true);
         for (final String fileToDelete : filesToDelete) 
           si1.dir.deleteFile(fileToDelete);
       }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestFlex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestFlex.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestFlex.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestFlex.java Thu Jan 13 02:09:33 2011
@@ -71,7 +71,11 @@ public class TestFlex extends LuceneTest
     IndexReader r = w.getReader();
     TermsEnum terms = r.getSequentialSubReaders()[0].fields().terms("f").iterator();
     assertTrue(terms.next() != null);
-    assertEquals(0, terms.ord());
+    try {
+      assertEquals(0, terms.ord());
+    } catch (UnsupportedOperationException uoe) {
+      // ok -- codec is not required to support this op
+    }
     r.close();
     w.close();
     d.close();

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReader.java Thu Jan 13 02:09:33 2011
@@ -981,6 +981,7 @@ public class TestIndexReader extends Luc
           // new IndexFileDeleter, have it delete
           // unreferenced files, then verify that in fact
           // no files were deleted:
+          IndexWriter.unlock(dir);
           TestIndexWriter.assertNoUnreferencedFiles(dir, "reader.close() failed to delete unreferenced files");
 
           // Finally, verify index is not corrupt, and, if
@@ -1333,8 +1334,8 @@ public class TestIndexReader extends Luc
       it1 = fields1.iterator();
       while (it1.hasNext()) {
         String curField = it1.next();
-        byte[] norms1 = index1.norms(curField);
-        byte[] norms2 = index2.norms(curField);
+        byte[] norms1 = MultiNorms.norms(index1, curField);
+        byte[] norms2 = MultiNorms.norms(index2, curField);
         if (norms1 != null && norms2 != null)
         {
           assertEquals(norms1.length, norms2.length);

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java Thu Jan 13 02:09:33 2011
@@ -272,13 +272,13 @@ public class TestIndexReaderClone extend
    * @throws Exception
    */
   private void performDefaultTests(IndexReader r1) throws Exception {
-    float norm1 = Similarity.getDefault().decodeNormValue(r1.norms("field1")[4]);
+    float norm1 = Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]);
 
     IndexReader pr1Clone = (IndexReader) r1.clone();
     pr1Clone.deleteDocument(10);
     pr1Clone.setNorm(4, "field1", 0.5f);
-    assertTrue(Similarity.getDefault().decodeNormValue(r1.norms("field1")[4]) == norm1);
-    assertTrue(Similarity.getDefault().decodeNormValue(pr1Clone.norms("field1")[4]) != norm1);
+    assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]) == norm1);
+    assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(pr1Clone, "field1")[4]) != norm1);
 
     final Bits delDocs = MultiFields.getDeletedDocs(r1);
     assertTrue(delDocs == null || !delDocs.get(10));
@@ -428,7 +428,7 @@ public class TestIndexReaderClone extend
     IndexReader orig = IndexReader.open(dir1, false);
     orig.setNorm(1, "field1", 17.0f);
     final byte encoded = Similarity.getDefault().encodeNormValue(17.0f);
-    assertEquals(encoded, orig.norms("field1")[1]);
+    assertEquals(encoded, MultiNorms.norms(orig, "field1")[1]);
 
     // the cloned segmentreader should have 2 references, 1 to itself, and 1 to
     // the original segmentreader
@@ -437,7 +437,7 @@ public class TestIndexReaderClone extend
     clonedReader.close();
 
     IndexReader r = IndexReader.open(dir1, false);
-    assertEquals(encoded, r.norms("field1")[1]);
+    assertEquals(encoded, MultiNorms.norms(r, "field1")[1]);
     r.close();
     dir1.close();
   }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java Thu Jan 13 02:09:33 2011
@@ -42,8 +42,9 @@ public class TestIndexReaderCloneNorms e
 
   private class SimilarityOne extends DefaultSimilarity {
     @Override
-    public float lengthNorm(String fieldName, int numTerms) {
-      return 1;
+    public float computeNorm(String fieldName, FieldInvertState state) {
+      // diable length norm
+      return state.getBoost();
     }
   }
 
@@ -272,7 +273,7 @@ public class TestIndexReaderCloneNorms e
   private void verifyIndex(IndexReader ir) throws IOException {
     for (int i = 0; i < NUM_FIELDS; i++) {
       String field = "f" + i;
-      byte b[] = ir.norms(field);
+      byte b[] = MultiNorms.norms(ir, field);
       assertEquals("number of norms mismatches", numDocNorms, b.length);
       ArrayList<Float> storedNorms = (i == 1 ? modifiedNorms : norms);
       for (int j = 0; j < b.length; j++) {

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Thu Jan 13 02:09:33 2011
@@ -43,11 +43,11 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.document.Field.Index;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.Field.TermVector;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.IndexSearcher;
@@ -157,7 +157,7 @@ public class TestIndexWriter extends Luc
       String[] startFiles = dir.listAll();
       SegmentInfos infos = new SegmentInfos();
       infos.read(dir);
-      new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, CodecProvider.getDefault());
+      new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())).rollback();
       String[] endFiles = dir.listAll();
 
       Arrays.sort(startFiles);

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java Thu Jan 13 02:09:33 2011
@@ -832,8 +832,8 @@ public class TestIndexWriterDelete exten
       }
     }
 
-    TestIndexWriter.assertNoUnreferencedFiles(dir, "docsWriter.abort() failed to delete unreferenced files");
     modifier.close();
+    TestIndexWriter.assertNoUnreferencedFiles(dir, "docsWriter.abort() failed to delete unreferenced files");
     dir.close();
   }
 

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java Thu Jan 13 02:09:33 2011
@@ -27,7 +27,6 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.Searcher;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.MockDirectoryWrapper;
@@ -40,7 +39,7 @@ import org.apache.lucene.util.BytesRef;
  *
  */
 public class TestLazyProxSkipping extends LuceneTestCase {
-    private Searcher searcher;
+    private IndexSearcher searcher;
     private int seeksCounter = 0;
     
     private String field = "tokens";

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java Thu Jan 13 02:09:33 2011
@@ -25,6 +25,9 @@ import java.util.List;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
@@ -38,6 +41,7 @@ import org.apache.lucene.search.TermQuer
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.util.NamedThreadFactory;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LineFileDocs;
@@ -61,7 +65,7 @@ public class TestNRTThreads extends Luce
       CodecProvider.getDefault().setDefaultFieldCodec("Standard");
     }
 
-    final LineFileDocs docs = new LineFileDocs(true);
+    final LineFileDocs docs = new LineFileDocs(random);
     final File tempDir = _TestUtil.getTempDir("nrtopenfiles");
     final MockDirectoryWrapper dir = new MockDirectoryWrapper(random, FSDirectory.open(tempDir));
     final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
@@ -191,6 +195,8 @@ public class TestNRTThreads extends Luce
     // silly starting guess:
     final AtomicInteger totTermCount = new AtomicInteger(100);
 
+    final ExecutorService es = Executors.newCachedThreadPool(new NamedThreadFactory("NRT search threads"));
+
     while(System.currentTimeMillis() < stopTime && !failed.get()) {
       if (random.nextBoolean()) {
         if (VERBOSE) {
@@ -228,7 +234,7 @@ public class TestNRTThreads extends Luce
 
       if (r.numDocs() > 0) {
 
-        final IndexSearcher s = new IndexSearcher(r);
+        final IndexSearcher s = new IndexSearcher(r, es);
 
         // run search threads
         final long searchStopTime = System.currentTimeMillis() + 500;
@@ -302,6 +308,9 @@ public class TestNRTThreads extends Luce
       }
     }
 
+    es.shutdown();
+    es.awaitTermination(1, TimeUnit.SECONDS);
+
     if (VERBOSE) {
       System.out.println("TEST: all searching done [" + (System.currentTimeMillis()-t0) + " ms]");
     }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNorms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNorms.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNorms.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNorms.java Thu Jan 13 02:09:33 2011
@@ -41,8 +41,9 @@ public class TestNorms extends LuceneTes
 
   private class SimilarityOne extends DefaultSimilarity {
     @Override
-    public float lengthNorm(String fieldName, int numTerms) {
-      return 1;
+    public float computeNorm(String fieldName, FieldInvertState state) {
+      // Disable length norm
+      return state.getBoost();
     }
   }
 
@@ -179,7 +180,7 @@ public class TestNorms extends LuceneTes
     IndexReader ir = IndexReader.open(dir, false);
     for (int i = 0; i < NUM_FIELDS; i++) {
       String field = "f"+i;
-      byte b[] = ir.norms(field);
+      byte b[] = MultiNorms.norms(ir, field);
       assertEquals("number of norms mismatches",numDocNorms,b.length);
       ArrayList<Float> storedNorms = (i==1 ? modifiedNorms : norms);
       for (int j = 0; j < b.length; j++) {
@@ -236,4 +237,52 @@ public class TestNorms extends LuceneTes
     return norm;
   }
   
+  class CustomNormEncodingSimilarity extends DefaultSimilarity {
+    @Override
+    public byte encodeNormValue(float f) {
+      return (byte) f;
+    }
+    
+    @Override
+    public float decodeNormValue(byte b) {
+      return (float) b;
+    }
+
+    @Override
+    public float computeNorm(String field, FieldInvertState state) {
+      return (float) state.getLength();
+    }
+  }
+  
+  // LUCENE-1260
+  public void testCustomEncoder() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
+    config.setSimilarity(new CustomNormEncodingSimilarity());
+    RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
+    Document doc = new Document();
+    Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED);
+    Field bar = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED);
+    doc.add(foo);
+    doc.add(bar);
+    
+    for (int i = 0; i < 100; i++) {
+      bar.setValue("singleton");
+      writer.addDocument(doc);
+    }
+    
+    IndexReader reader = writer.getReader();
+    writer.close();
+    
+    byte fooNorms[] = MultiNorms.norms(reader, "foo");
+    for (int i = 0; i < reader.maxDoc(); i++)
+      assertEquals(0, fooNorms[i]);
+    
+    byte barNorms[] = MultiNorms.norms(reader, "bar");
+    for (int i = 0; i < reader.maxDoc(); i++)
+      assertEquals(1, barNorms[i]);
+    
+    reader.close();
+    dir.close();
+  }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java Thu Jan 13 02:09:33 2011
@@ -35,13 +35,13 @@ import org.apache.lucene.search.Explanat
 public class TestOmitTf extends LuceneTestCase {
   
   public static class SimpleSimilarity extends Similarity {
-    @Override public float lengthNorm(String field, int numTerms) { return 1.0f; }
+    @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); }
     @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; }
     @Override public float tf(float freq) { return freq; }
     @Override public float sloppyFreq(int distance) { return 2.0f; }
     @Override public float idf(int docFreq, int numDocs) { return 1.0f; }
     @Override public float coord(int overlap, int maxOverlap) { return 1.0f; }
-    @Override public IDFExplanation idfExplain(Collection<Term> terms, Searcher searcher) throws IOException {
+    @Override public IDFExplanation idfExplain(Collection<Term> terms, IndexSearcher searcher) throws IOException {
       return new IDFExplanation() {
         @Override
         public float getIdf() {
@@ -279,7 +279,7 @@ public class TestOmitTf extends LuceneTe
     /*
      * Verify the index
      */         
-    Searcher searcher = new IndexSearcher(dir, true);
+    IndexSearcher searcher = new IndexSearcher(dir, true);
     searcher.setSimilarity(new SimpleSimilarity());
         
     Term a = new Term("noTf", term);

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java Thu Jan 13 02:09:33 2011
@@ -56,6 +56,7 @@ public class TestPerFieldCodecSupport ex
     conf.setMergePolicy(logByteSizeMergePolicy);
 
     final IndexWriter writer = new IndexWriter(dir, conf);
+    writer.setInfoStream(VERBOSE ? System.out : null);
     return writer;
   }
 
@@ -110,12 +111,15 @@ public class TestPerFieldCodecSupport ex
   }
 
   /*
-   * Test is hetrogenous index segements are merge sucessfully
+   * Test that heterogeneous index segments are merged sucessfully
    */
   @Test
   public void testChangeCodecAndMerge() throws IOException {
     Directory dir = newDirectory();
     CodecProvider provider = new MockCodecProvider();
+    if (VERBOSE) {
+      System.out.println("TEST: make new index");
+    }
     IndexWriterConfig iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT,
              new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setCodecProvider(provider);
     iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
@@ -125,6 +129,9 @@ public class TestPerFieldCodecSupport ex
     addDocs(writer, 10);
     writer.commit();
     assertQuery(new Term("content", "aaa"), dir, 10, provider);
+    if (VERBOSE) {
+      System.out.println("TEST: addDocs3");
+    }
     addDocs3(writer, 10);
     writer.commit();
     writer.close();
@@ -144,6 +151,9 @@ public class TestPerFieldCodecSupport ex
     iwconf.setCodecProvider(provider);
     writer = newWriter(dir, iwconf);
     // swap in new codec for currently written segments
+    if (VERBOSE) {
+      System.out.println("TEST: add docs w/ Standard codec for content field");
+    }
     addDocs2(writer, 10);
     writer.commit();
     Codec origContentCodec = provider.lookup("MockSep");
@@ -152,9 +162,12 @@ public class TestPerFieldCodecSupport ex
         origContentCodec, origContentCodec, newContentCodec);
     assertEquals(30, writer.maxDoc());
     assertQuery(new Term("content", "bbb"), dir, 10, provider);
-    assertQuery(new Term("content", "ccc"), dir, 10, provider);
+    assertQuery(new Term("content", "ccc"), dir, 10, provider);   ////
     assertQuery(new Term("content", "aaa"), dir, 10, provider);
 
+    if (VERBOSE) {
+      System.out.println("TEST: add more docs w/ new codec");
+    }
     addDocs2(writer, 10);
     writer.commit();
     assertQuery(new Term("content", "ccc"), dir, 10, provider);
@@ -162,6 +175,9 @@ public class TestPerFieldCodecSupport ex
     assertQuery(new Term("content", "aaa"), dir, 10, provider);
     assertEquals(40, writer.maxDoc());
 
+    if (VERBOSE) {
+      System.out.println("TEST: now optimize");
+    }
     writer.optimize();
     assertEquals(40, writer.maxDoc());
     writer.close();
@@ -206,6 +222,9 @@ public class TestPerFieldCodecSupport ex
 
   public void assertQuery(Term t, Directory dir, int num, CodecProvider codecs)
       throws CorruptIndexException, IOException {
+    if (VERBOSE) {
+      System.out.println("\nTEST: assertQuery " + t);
+    }
     IndexReader reader = IndexReader.open(dir, null, true,
         IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, codecs);
     IndexSearcher searcher = new IndexSearcher(reader);

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java Thu Jan 13 02:09:33 2011
@@ -181,11 +181,11 @@ public class TestSegmentReader extends L
         assertEquals(reader.hasNorms(f.name()), !DocHelper.noNorms.containsKey(f.name()));
         if (!reader.hasNorms(f.name())) {
           // test for fake norms of 1.0 or null depending on the flag
-          byte [] norms = reader.norms(f.name());
+          byte [] norms = MultiNorms.norms(reader, f.name());
           byte norm1 = Similarity.getDefault().encodeNormValue(1.0f);
           assertNull(norms);
           norms = new byte[reader.maxDoc()];
-          reader.norms(f.name(),norms, 0);
+          MultiNorms.norms(reader, f.name(),norms, 0);
           for (int j=0; j<reader.maxDoc(); j++) {
             assertEquals(norms[j], norm1);
           }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSegmentTermEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSegmentTermEnum.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSegmentTermEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSegmentTermEnum.java Thu Jan 13 02:09:33 2011
@@ -74,7 +74,7 @@ public class TestSegmentTermEnum extends
 
   public void testPrevTermAtEnd() throws IOException
   {
-    IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
+    IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
     addDoc(writer, "aaa bbb");
     writer.close();
     SegmentReader reader = getOnlySegmentReader(IndexReader.open(dir, false));
@@ -82,7 +82,14 @@ public class TestSegmentTermEnum extends
     assertNotNull(terms.next());
     assertEquals("aaa", terms.term().utf8ToString());
     assertNotNull(terms.next());
-    long ordB = terms.ord();
+    long ordB;
+    try {
+      ordB = terms.ord();
+    } catch (UnsupportedOperationException uoe) {
+      // ok -- codec is not required to support ord
+      reader.close();
+      return;
+    }
     assertEquals("bbb", terms.term().utf8ToString());
     assertNull(terms.next());
 

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java Thu Jan 13 02:09:33 2011
@@ -67,10 +67,15 @@ public class MockFixedIntBlockCodec exte
 
   // only for testing
   public IntStreamFactory getIntFactory() {
-    return new MockIntFactory();
+    return new MockIntFactory(blockSize);
   }
 
-  private class MockIntFactory extends IntStreamFactory {
+  public static class MockIntFactory extends IntStreamFactory {
+    private final int blockSize;
+
+    public MockIntFactory(int blockSize) {
+      this.blockSize = blockSize;
+    }
 
     @Override
     public IntIndexInput openInput(Directory dir, String fileName, int readBufferSize) throws IOException {
@@ -96,6 +101,7 @@ public class MockFixedIntBlockCodec exte
         @Override
         protected void flushBlock() throws IOException {
           for(int i=0;i<buffer.length;i++) {
+            assert buffer[i] >= 0;
             out.writeVInt(buffer[i]);
           }
         }
@@ -105,7 +111,7 @@ public class MockFixedIntBlockCodec exte
 
   @Override
   public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
-    PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new MockIntFactory());
+    PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new MockIntFactory(blockSize));
 
     boolean success = false;
     TermsIndexWriterBase indexWriter;
@@ -139,7 +145,7 @@ public class MockFixedIntBlockCodec exte
     PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir,
                                                                       state.segmentInfo,
                                                                       state.readBufferSize,
-                                                                      new MockIntFactory(), state.codecId);
+                                                                      new MockIntFactory(blockSize), state.codecId);
 
     TermsIndexReaderBase indexReader;
     boolean success = false;

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java Thu Jan 13 02:09:33 2011
@@ -67,7 +67,13 @@ public class MockVariableIntBlockCodec e
     return name + "(baseBlockSize="+ baseBlockSize + ")";
   }
 
-  private class MockIntFactory extends IntStreamFactory {
+  public static class MockIntFactory extends IntStreamFactory {
+
+    private final int baseBlockSize;
+
+    public MockIntFactory(int baseBlockSize) {
+      this.baseBlockSize = baseBlockSize;
+    }
 
     @Override
     public IntIndexInput openInput(Directory dir, String fileName, int readBufferSize) throws IOException {
@@ -104,6 +110,7 @@ public class MockVariableIntBlockCodec e
 
         @Override
         protected int add(int value) throws IOException {
+          assert value >= 0;
           buffer[pendingCount++] = value;
           // silly variable block length int encoder: if
           // first value <= 3, we write N vints at once;
@@ -128,7 +135,7 @@ public class MockVariableIntBlockCodec e
 
   @Override
   public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
-    PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new MockIntFactory());
+    PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new MockIntFactory(baseBlockSize));
 
     boolean success = false;
     TermsIndexWriterBase indexWriter;
@@ -162,7 +169,7 @@ public class MockVariableIntBlockCodec e
     PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir,
                                                                       state.segmentInfo,
                                                                       state.readBufferSize,
-                                                                      new MockIntFactory(), state.codecId);
+                                                                      new MockIntFactory(baseBlockSize), state.codecId);
 
     TermsIndexReaderBase indexReader;
     boolean success = false;

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java Thu Jan 13 02:09:33 2011
@@ -42,6 +42,7 @@ public class MockSingleIntIndexOutput ex
   /** Write an int to the primary file */
   @Override
   public void write(int v) throws IOException {
+    assert v >= 0;
     out.writeVInt(v);
   }
 

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java Thu Jan 13 02:09:33 2011
@@ -46,18 +46,14 @@ class PreFlexFieldsWriter extends Fields
                                    state.segmentName,
                                    state.fieldInfos,
                                    state.termIndexInterval);
-    state.flushedFiles.add(IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.TERMS_EXTENSION));
-    state.flushedFiles.add(IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.TERMS_INDEX_EXTENSION));
 
     final String freqFile = IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.FREQ_EXTENSION);
     freqOut = state.directory.createOutput(freqFile);
-    state.flushedFiles.add(freqFile);
     totalNumDocs = state.numDocs;
 
     if (state.fieldInfos.hasProx()) {
       final String proxFile = IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.PROX_EXTENSION);
       proxOut = state.directory.createOutput(proxFile);
-      state.flushedFiles.add(proxFile);
     } else {
       proxOut = null;
     }

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java Thu Jan 13 02:09:33 2011
@@ -20,7 +20,8 @@ package org.apache.lucene.search;
 import java.io.IOException;
 
 import junit.framework.Assert;
-import org.apache.lucene.index.IndexReader;
+
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 
 /**
  * A unit test helper class to test when the filter is getting cached and when it is not.
@@ -41,10 +42,10 @@ public class CachingWrapperFilterHelper 
   }
   
   @Override
-  public synchronized DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+  public synchronized DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
 
     final int saveMissCount = missCount;
-    DocIdSet docIdSet = super.getDocIdSet(reader);
+    DocIdSet docIdSet = super.getDocIdSet(context);
 
     if (shouldHaveCache) {
       Assert.assertEquals("Cache should have data ", saveMissCount, missCount);



Mime
View raw message