Return-Path: Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: (qmail 88174 invoked from network); 13 Jan 2011 02:10:59 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 13 Jan 2011 02:10:59 -0000 Received: (qmail 12423 invoked by uid 500); 13 Jan 2011 02:10:59 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 12416 invoked by uid 99); 13 Jan 2011 02:10:58 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 13 Jan 2011 02:10:58 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED,T_FRT_PROFILE2 X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 13 Jan 2011 02:10:55 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 945772388BEF; Thu, 13 Jan 2011 02:10:03 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1058390 [7/16] - in /lucene/dev/branches/bulkpostings: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ dev-tools/idea/lucene/contr... Date: Thu, 13 Jan 2011 02:09:56 -0000 To: commits@lucene.apache.org From: rmuir@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110113021003.945772388BEF@eris.apache.org> Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java Thu Jan 13 02:09:33 2011 @@ -18,20 +18,13 @@ package org.apache.lucene.util.automaton */ import java.io.IOException; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CodecUtil; -import org.apache.lucene.util.IntsRef; /** Represents an FST using a compact byte[] format. *

The format is similar to what's used by Morfologik @@ -40,7 +33,7 @@ import org.apache.lucene.util.IntsRef; */ public class FST { public static enum INPUT_TYPE {BYTE1, BYTE2, BYTE4}; - private final INPUT_TYPE inputType; + public final INPUT_TYPE inputType; private final static int BIT_FINAL_ARC = 1 << 0; private final static int BIT_LAST_ARC = 1 << 1; @@ -76,7 +69,7 @@ public class FST { // if non-null, this FST accepts the empty string and // produces this output - private T emptyOutput; + T emptyOutput; private byte[] emptyOutputBytes; private byte[] bytes; @@ -94,11 +87,16 @@ public class FST { public int arcCount; public int arcWithOutputCount; + // If arc has this label then that arc is final/accepted + public static int END_LABEL = -1; + public final static class Arc { - int label; // really a "unsigned" byte + public int label; + public T output; + int target; + byte flags; - T output; T nextFinalOutput; int nextArc; @@ -108,13 +106,26 @@ public class FST { int arcIdx; int numArcs; - // Must call this before re-using an Arc instance on a - // new node - public void reset() { - bytesPerArc = 0; + /** Returns this */ + public Arc copyFrom(Arc other) { + label = other.label; + target = other.target; + flags = other.flags; + output = other.output; + nextFinalOutput = other.nextFinalOutput; + nextArc = other.nextArc; + if (other.bytesPerArc != 0) { + bytesPerArc = other.bytesPerArc; + posArcsStart = other.posArcsStart; + arcIdx = other.arcIdx; + numArcs = other.numArcs; + } else { + bytesPerArc = 0; + } + return this; } - public boolean flag(int flag) { + boolean flag(int flag) { return FST.flag(flags, flag); } @@ -122,7 +133,7 @@ public class FST { return flag(BIT_LAST_ARC); } - public boolean isFinal() { + boolean isFinal() { return flag(BIT_FINAL_ARC); } }; @@ -156,7 +167,7 @@ public class FST { // messy bytes = new byte[numBytes]; in.readBytes(bytes, 0, numBytes); - emptyOutput = outputs.read(new BytesReader(numBytes-1)); + emptyOutput = outputs.read(getBytesReader(numBytes-1)); } else { emptyOutput = null; } @@ -203,9 +214,9 @@ public class FST { this.startNode = startNode; } - public void setEmptyOutput(T v) throws IOException { - if (emptyOutput != null) { - throw new IllegalStateException("empty output is already set"); + void setEmptyOutput(T v) throws IOException { + if (emptyOutput != null && !emptyOutput.equals(v)) { + throw new IllegalStateException("empty output is already set: " + outputs.outputToString(emptyOutput) + " vs " + outputs.outputToString(v)); } emptyOutput = v; @@ -271,7 +282,7 @@ public class FST { } } - private int readLabel(DataInput in) throws IOException { + int readLabel(DataInput in) throws IOException { final int v; if (inputType == INPUT_TYPE.BYTE1) { v = in.readByte()&0xFF; @@ -285,21 +296,8 @@ public class FST { // returns true if the node at this address has any // outgoing arcs - public boolean hasArcs(int address) { - return address != FINAL_END_NODE && address != NON_FINAL_END_NODE; - } - - public int getStartNode() { - if (startNode == -1) { - throw new IllegalStateException("call finish first"); - } - return startNode; - } - - // returns null if this FST does not accept the empty - // string, else, the output for the empty string - public T getEmptyOutput() { - return emptyOutput; + public boolean targetHasArcs(Arc arc) { + return arc.target > 0; } // serializes new node by appending its bytes to the end @@ -364,7 +362,7 @@ public class FST { assert arc.nextFinalOutput == NO_OUTPUT; } - boolean targetHasArcs = hasArcs(target.address); + boolean targetHasArcs = target.address > 0; if (!targetHasArcs) { flags += BIT_STOP_NODE; @@ -453,10 +451,49 @@ public class FST { return endAddress-1; } - public Arc readFirstArc(int address, Arc arc) throws IOException { - //System.out.println("readFirstArc addr=" + address); + /** Fills virtual 'start' arc, ie, an empty incoming arc to + * the FST's start node */ + public Arc getFirstArc(Arc arc) { + if (emptyOutput != null) { + arc.flags = BIT_FINAL_ARC | BIT_LAST_ARC; + arc.nextFinalOutput = emptyOutput; + } else { + arc.flags = BIT_LAST_ARC; + } + + // If there are no nodes, ie, the FST only accepts the + // empty string, then startNode is 0, and then readFirstTargetArc + arc.target = startNode; + return arc; + } + + /** Follow the follow arc and read the first arc of its + * target; this changes the provide arc (2nd arg) in-place + * and returns it. */ + public Arc readFirstTargetArc(Arc follow, Arc arc) throws IOException { //int pos = address; - final BytesReader in = new BytesReader(address); + //System.out.println(" readFirstTarget follow.target=" + follow.target + " isFinal=" + follow.isFinal()); + if (follow.isFinal()) { + // Insert "fake" final first arc: + arc.label = -1; + arc.output = follow.nextFinalOutput; + if (follow.target <= 0) { + arc.flags = BIT_LAST_ARC; + } else { + arc.flags = 0; + arc.nextArc = follow.target; + } + //System.out.println(" insert isFinal; nextArc=" + follow.target + " isLast=" + arc.isLast() + " output=" + outputs.outputToString(arc.output)); + return arc; + } else { + return readFirstRealArc(follow.target, arc); + } + } + + // Not private beacaus NodeHash needs access: + Arc readFirstRealArc(int address, Arc arc) throws IOException { + + final BytesReader in = getBytesReader(address); arc.flags = in.readByte(); @@ -473,19 +510,66 @@ public class FST { arc.bytesPerArc = 0; } arc.nextArc = in.pos; + arc.label = 0; return readNextArc(arc); } + /** In-place read; returns the arc. */ public Arc readNextArc(Arc arc) throws IOException { + if (arc.label == -1) { + // This was a fake inserted "final" arc + if (arc.nextArc <= 0) { + // This arc went to virtual final node, ie has no outgoing arcs + return null; + } + return readFirstRealArc(arc.nextArc, arc); + } else { + return readNextRealArc(arc); + } + } + + /** Peeks at next arc's label; does not alter arc. Do + * not call this if arc.isLast()! */ + public int readNextArcLabel(Arc arc) throws IOException { + assert !arc.isLast(); + + final BytesReader in; + if (arc.label == END_LABEL) { + //System.out.println(" nextArc fake " + arc.nextArc); + in = getBytesReader(arc.nextArc); + byte flags = bytes[in.pos]; + if (flag(flags, BIT_ARCS_AS_FIXED_ARRAY)) { + //System.out.println(" nextArc fake array"); + in.pos--; + in.readVInt(); + in.readByte(); + } + } else { + if (arc.bytesPerArc != 0) { + //System.out.println(" nextArc real array"); + // arcs are at fixed entries + in = getBytesReader(arc.posArcsStart - (1+arc.arcIdx)*arc.bytesPerArc); + } else { + // arcs are packed + //System.out.println(" nextArc real packed"); + in = getBytesReader(arc.nextArc); + } + } + // skip flags + in.readByte(); + return readLabel(in); + } + + Arc readNextRealArc(Arc arc) throws IOException { // this is a continuing arc in a fixed array final BytesReader in; if (arc.bytesPerArc != 0) { // arcs are at fixed entries arc.arcIdx++; - in = new BytesReader(arc.posArcsStart - arc.arcIdx*arc.bytesPerArc); + in = getBytesReader(arc.posArcsStart - arc.arcIdx*arc.bytesPerArc); } else { // arcs are packed - in = new BytesReader(arc.nextArc); + in = getBytesReader(arc.nextArc); } arc.flags = in.readByte(); arc.label = readLabel(in); @@ -504,6 +588,7 @@ public class FST { if (arc.flag(BIT_STOP_NODE)) { arc.target = FINAL_END_NODE; + arc.flags |= BIT_FINAL_ARC; arc.nextArc = in.pos; } else if (arc.flag(BIT_TARGET_NEXT)) { arc.nextArc = in.pos; @@ -524,14 +609,30 @@ public class FST { return arc; } - public Arc findArc(int address, int labelToMatch, Arc arc) throws IOException { + /** Finds an arc leaving the incoming arc, replacing the arc in place. + * This returns null if the arc was not found, else the incoming arc. */ + public Arc findTargetArc(int labelToMatch, Arc follow, Arc arc) throws IOException { + + if (labelToMatch == END_LABEL) { + if (follow.isFinal()) { + arc.output = follow.nextFinalOutput; + arc.label = END_LABEL; + return arc; + } else { + return null; + } + } + + if (!targetHasArcs(follow)) { + return null; + } + // TODO: maybe make an explicit thread state that holds // reusable stuff eg BytesReader: - final BytesReader in = new BytesReader(address); + final BytesReader in = getBytesReader(follow.target); if ((in.readByte() & BIT_ARCS_AS_FIXED_ARRAY) != 0) { // Arcs are full array; do binary search: - //System.out.println("findArc: array label=" + labelToMatch); arc.numArcs = in.readVInt(); arc.bytesPerArc = in.readByte() & 0xFF; arc.posArcsStart = in.pos; @@ -548,19 +649,20 @@ public class FST { high = mid - 1; else { arc.arcIdx = mid-1; - return readNextArc(arc); + return readNextRealArc(arc); } } return null; } - //System.out.println("findArc: scan"); - - readFirstArc(address, arc); + // Linear scan + readFirstTargetArc(follow, arc); while(true) { if (arc.label == labelToMatch) { return arc; + } else if (arc.label > labelToMatch) { + return null; } else if (arc.isLast()) { return null; } else { @@ -569,191 +671,6 @@ public class FST { } } - /** Looks up the output for this input, or null if the - * input is not accepted. FST must be - * INPUT_TYPE.BYTE4. */ - public T get(IntsRef input) throws IOException { - assert inputType == INPUT_TYPE.BYTE4; - - if (input.length == 0) { - return getEmptyOutput(); - } - - // TODO: would be nice not to alloc this on every lookup - final FST.Arc arc = new FST.Arc(); - int node = getStartNode(); - T output = NO_OUTPUT; - for(int i=0;i arc = new FST.Arc(); - int node = getStartNode(); - int charIdx = offset; - final int charLimit = offset + length; - T output = NO_OUTPUT; - while(charIdx < charLimit) { - if (!hasArcs(node)) { - // hit end of FST before input end - return null; - } - - final int utf32 = Character.codePointAt(input, charIdx); - charIdx += Character.charCount(utf32); - - if (findArc(node, utf32, arc) != null) { - node = arc.target; - if (arc.output != NO_OUTPUT) { - output = outputs.add(output, arc.output); - } - } else { - return null; - } - } - - if (!arc.isFinal()) { - // hit input's end before end node - return null; - } - - if (arc.nextFinalOutput != NO_OUTPUT) { - output = outputs.add(output, arc.nextFinalOutput); - } - - return output; - } - - - /** Logically casts input to UTF32 ints then looks up the output - * or null if the input is not accepted. FST must be - * INPUT_TYPE.BYTE4. */ - public T get(CharSequence input) throws IOException { - assert inputType == INPUT_TYPE.BYTE4; - - final int len = input.length(); - if (len == 0) { - return getEmptyOutput(); - } - - // TODO: would be nice not to alloc this on every lookup - final FST.Arc arc = new FST.Arc(); - int node = getStartNode(); - int charIdx = 0; - final int charLimit = input.length(); - T output = NO_OUTPUT; - while(charIdx < charLimit) { - if (!hasArcs(node)) { - // hit end of FST before input end - return null; - } - - final int utf32 = Character.codePointAt(input, charIdx); - charIdx += Character.charCount(utf32); - - if (findArc(node, utf32, arc) != null) { - node = arc.target; - if (arc.output != NO_OUTPUT) { - output = outputs.add(output, arc.output); - } - } else { - return null; - } - } - - if (!arc.isFinal()) { - // hit input's end before end node - return null; - } - - if (arc.nextFinalOutput != NO_OUTPUT) { - output = outputs.add(output, arc.nextFinalOutput); - } - - return output; - } - - /** Looks up the output for this input, or null if the - * input is not accepted */ - public T get(BytesRef input) throws IOException { - assert inputType == INPUT_TYPE.BYTE1; - - if (input.length == 0) { - return getEmptyOutput(); - } - - // TODO: would be nice not to alloc this on every lookup - final FST.Arc arc = new FST.Arc(); - int node = getStartNode(); - T output = NO_OUTPUT; - for(int i=0;i /x/tmp/out.png - */ - public void toDot(PrintStream out) throws IOException { - - final List queue = new ArrayList(); - queue.add(startNode); - - final Set seen = new HashSet(); - seen.add(startNode); - - out.println("digraph FST {"); - out.println(" rankdir = LR;"); - //out.println(" " + startNode + " [shape=circle label=" + startNode + "];"); - out.println(" " + startNode + " [label=\"\" shape=circle];"); - out.println(" initial [shape=point color=white label=\"\"];"); - if (emptyOutput != null) { - out.println(" initial -> " + startNode + " [arrowhead=tee label=\"(" + outputs.outputToString(emptyOutput) + ")\"];"); - } else { - out.println(" initial -> " + startNode); - } - - final Arc arc = new Arc(); - - while(queue.size() != 0) { - Integer node = queue.get(queue.size()-1); - queue.remove(queue.size()-1); - - if (node == FINAL_END_NODE || node == NON_FINAL_END_NODE) { - continue; - } - - // scan all arcs - readFirstArc(node, arc); - while(true) { - - if (!seen.contains(arc.target)) { - //out.println(" " + arc.target + " [label=" + arc.target + "];"); - out.println(" " + arc.target + " [label=\"\" shape=circle];"); - seen.add(arc.target); - queue.add(arc.target); - } - String outs; - if (arc.output != NO_OUTPUT) { - outs = "/" + outputs.outputToString(arc.output); - } else { - outs = ""; - } - if (arc.isFinal() && arc.nextFinalOutput != NO_OUTPUT) { - outs += " (" + outputs.outputToString(arc.nextFinalOutput) + ")"; - } - out.print(" " + node + " -> " + arc.target + " [label=\"" + arc.label + outs + "\""); - if (arc.isFinal()) { - out.print(" arrowhead=tee"); - } - if (arc.flag(BIT_TARGET_NEXT)) { - out.print(" color=blue"); - } - out.println("];"); - - if (arc.isLast()) { - break; - } else { - readNextArc(arc); - } - } - } - out.println("}"); - } - public int getNodeCount() { // 1+ in order to count the -1 implicit final node return 1+nodeCount; @@ -872,7 +710,7 @@ public class FST { } // Non-static: writes to FST's byte[] - private class BytesWriter extends DataOutput { + class BytesWriter extends DataOutput { int posWrite; public BytesWriter() { @@ -899,8 +737,13 @@ public class FST { } } + final BytesReader getBytesReader(int pos) { + // TODO: maybe re-use via ThreadLocal? + return new BytesReader(pos); + } + // Non-static: reads byte[] from FST - private class BytesReader extends DataInput { + class BytesReader extends DataInput { int pos; public BytesReader(int pos) { Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/IntsRefFSTEnum.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/IntsRefFSTEnum.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/IntsRefFSTEnum.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/IntsRefFSTEnum.java Thu Jan 13 02:09:33 2011 @@ -17,9 +17,7 @@ package org.apache.lucene.util.automaton * limitations under the License. */ -import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.IntsRef; -import org.apache.lucene.util.RamUsageEstimator; import java.io.IOException; @@ -27,163 +25,23 @@ import java.io.IOException; * @lucene.experimental */ -public class IntsRefFSTEnum { - private final FST fst; - - private IntsRef current = new IntsRef(10); - @SuppressWarnings("unchecked") private FST.Arc[] arcs = new FST.Arc[10]; - // outputs are cumulative - @SuppressWarnings("unchecked") private T[] output = (T[]) new Object[10]; - - private boolean lastFinal; - private boolean didEmpty; - private final T NO_OUTPUT; +public final class IntsRefFSTEnum extends FSTEnum { + private final IntsRef current = new IntsRef(10); private final InputOutput result = new InputOutput(); + private IntsRef target; public static class InputOutput { public IntsRef input; public T output; } - + + /** doFloor controls the behavior of advance: if it's true + * doFloor is true, advance positions to the biggest + * term before target. */ public IntsRefFSTEnum(FST fst) { - this.fst = fst; + super(fst); result.input = current; - NO_OUTPUT = fst.outputs.getNoOutput(); - } - - public void reset() { - lastFinal = false; - didEmpty = false; - current.length = 0; - result.output = NO_OUTPUT; - } - - /** NOTE: target must be >= where we are already - * positioned */ - public InputOutput advance(IntsRef target) throws IOException { - - assert target.compareTo(current) >= 0; - - //System.out.println(" advance len=" + target.length + " curlen=" + current.length); - - // special case empty string - if (current.length == 0) { - if (target.length == 0) { - final T output = fst.getEmptyOutput(); - if (output != null) { - if (!didEmpty) { - current.length = 0; - lastFinal = true; - result.output = output; - didEmpty = true; - } - return result; - } else { - return next(); - } - } - - if (fst.noNodes()) { - return null; - } - } - - // TODO: possibly caller could/should provide common - // prefix length? ie this work may be redundant if - // caller is in fact intersecting against its own - // automaton - - // what prefix does target share w/ current - int idx = 0; - while (idx < current.length && idx < target.length) { - if (current.ints[idx] != target.ints[target.offset + idx]) { - break; - } - idx++; - } - - //System.out.println(" shared " + idx); - - FST.Arc arc; - if (current.length == 0) { - // new enum (no seek/next yet) - arc = fst.readFirstArc(fst.getStartNode(), getArc(0)); - //System.out.println(" new enum"); - } else if (idx < current.length) { - // roll back to shared point - lastFinal = false; - current.length = idx; - arc = arcs[idx]; - if (arc.isLast()) { - if (idx == 0) { - return null; - } else { - return next(); - } - } - arc = fst.readNextArc(arc); - } else if (idx == target.length) { - // degenerate case -- seek to term we are already on - assert target.equals(current); - return result; - } else { - // current is a full prefix of target - if (lastFinal) { - arc = fst.readFirstArc(arcs[current.length-1].target, getArc(current.length)); - } else { - return next(); - } - } - - lastFinal = false; - - assert arc == arcs[current.length]; - int targetLabel = target.ints[target.offset+current.length]; - - while(true) { - //System.out.println(" cycle len=" + current.length + " target=" + ((char) targetLabel) + " vs " + ((char) arc.label)); - if (arc.label == targetLabel) { - grow(); - current.ints[current.length] = arc.label; - appendOutput(arc.output); - current.length++; - grow(); - if (current.length == target.length) { - result.output = output[current.length-1]; - if (arc.isFinal()) { - // target is exact match - if (fst.hasArcs(arc.target)) { - // target is also a proper prefix of other terms - lastFinal = true; - appendFinalOutput(arc.nextFinalOutput); - } - } else { - // target is not a match but is a prefix of - // other terms - current.length--; - push(); - } - return result; - } else if (!fst.hasArcs(arc.target)) { - // we only match a prefix of the target - return next(); - } else { - targetLabel = target.ints[target.offset+current.length]; - arc = fst.readFirstArc(arc.target, getArc(current.length)); - } - } else if (arc.label > targetLabel) { - // we are now past the target - push(); - return result; - } else if (arc.isLast()) { - if (current.length == 0) { - return null; - } - return next(); - } else { - arc = fst.readNextArc(getArc(current.length)); - } - } + current.offset = 1; } public InputOutput current() { @@ -192,124 +50,58 @@ public class IntsRefFSTEnum { public InputOutput next() throws IOException { //System.out.println(" enum.next"); - - if (current.length == 0) { - final T output = fst.getEmptyOutput(); - if (output != null) { - if (!didEmpty) { - current.length = 0; - lastFinal = true; - result.output = output; - didEmpty = true; - return result; - } else { - lastFinal = false; - } - } - if (fst.noNodes()) { - return null; - } - fst.readFirstArc(fst.getStartNode(), getArc(0)); - push(); - } else if (lastFinal) { - lastFinal = false; - assert current.length > 0; - // resume pushing - fst.readFirstArc(arcs[current.length-1].target, getArc(current.length)); - push(); - } else { - //System.out.println(" pop/push"); - pop(); - if (current.length == 0) { - // enum done - return null; - } else { - current.length--; - fst.readNextArc(arcs[current.length]); - push(); - } - } - - return result; + doNext(); + return setResult(); } - private void grow() { - final int l = current.length + 1; - current.grow(l); - if (arcs.length < l) { - @SuppressWarnings("unchecked") final FST.Arc[] newArcs = - new FST.Arc[ArrayUtil.oversize(l, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - System.arraycopy(arcs, 0, newArcs, 0, arcs.length); - arcs = newArcs; - } - if (output.length < l) { - @SuppressWarnings("unchecked") final T[] newOutput = - (T[]) new Object[ArrayUtil.oversize(l, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - System.arraycopy(output, 0, newOutput, 0, output.length); - output = newOutput; - } - } - - private void appendOutput(T addedOutput) { - T newOutput; - if (current.length == 0) { - newOutput = addedOutput; - } else if (addedOutput == NO_OUTPUT) { - output[current.length] = output[current.length-1]; - return; + /** Seeks to smallest term that's >= target. */ + public InputOutput seekCeil(IntsRef target) throws IOException { + this.target = target; + targetLength = target.length; + super.doSeekCeil(); + return setResult(); + } + + /** Seeks to biggest term that's <= target. */ + public InputOutput seekFloor(IntsRef target) throws IOException { + this.target = target; + targetLength = target.length; + super.doSeekFloor(); + return setResult(); + } + + @Override + protected int getTargetLabel() { + if (upto-1 == target.length) { + return FST.END_LABEL; } else { - newOutput = fst.outputs.add(output[current.length-1], addedOutput); + return target.ints[target.offset + upto - 1]; } - output[current.length] = newOutput; } - private void appendFinalOutput(T addedOutput) { - if (current.length == 0) { - result.output = addedOutput; - } else { - result.output = fst.outputs.add(output[current.length-1], addedOutput); - } + @Override + protected int getCurrentLabel() { + // current.offset fixed at 1 + return current.ints[upto]; } - private void push() throws IOException { - - FST.Arc arc = arcs[current.length]; - assert arc != null; - - while(true) { - grow(); - - current.ints[current.length] = arc.label; - appendOutput(arc.output); - //System.out.println(" push: append label=" + ((char) arc.label) + " output=" + fst.outputs.outputToString(arc.output)); - current.length++; - grow(); - - if (!fst.hasArcs(arc.target)) { - break; - } - - if (arc.isFinal()) { - appendFinalOutput(arc.nextFinalOutput); - lastFinal = true; - return; - } - - arc = fst.readFirstArc(arc.target, getArc(current.length)); - } - result.output = output[current.length-1]; + @Override + protected void setCurrentLabel(int label) { + current.ints[upto] = label; } - private void pop() { - while (current.length > 0 && arcs[current.length-1].isLast()) { - current.length--; - } + @Override + protected void grow() { + current.grow(upto+1); } - private FST.Arc getArc(int idx) { - if (arcs[idx] == null) { - arcs[idx] = new FST.Arc(); + private InputOutput setResult() { + if (upto == 0) { + return null; + } else { + current.length = upto-1; + result.output = output[upto]; + return result; } - return arcs[idx]; } } Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java Thu Jan 13 02:09:33 2011 @@ -35,7 +35,7 @@ final class NodeHash { } private boolean nodesEqual(Builder.UnCompiledNode node, int address) throws IOException { - fst.readFirstArc(address, scratchArc); + fst.readFirstRealArc(address, scratchArc); if (scratchArc.bytesPerArc != 0 && node.numArcs != scratchArc.numArcs) { return false; } @@ -56,7 +56,7 @@ final class NodeHash { return false; } } - fst.readNextArc(scratchArc); + fst.readNextRealArc(scratchArc); } return false; @@ -89,7 +89,7 @@ final class NodeHash { final int PRIME = 31; //System.out.println("hash frozen"); int h = 0; - fst.readFirstArc(node, scratchArc); + fst.readFirstRealArc(node, scratchArc); while(true) { //System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal()); h = PRIME * h + scratchArc.label; @@ -102,7 +102,7 @@ final class NodeHash { if (scratchArc.isLast()) { break; } - fst.readNextArc(scratchArc); + fst.readNextRealArc(scratchArc); } //System.out.println(" ret " + (h&Integer.MAX_VALUE)); return h & Integer.MAX_VALUE; Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java Thu Jan 13 02:09:33 2011 @@ -94,7 +94,7 @@ public class TestSearch extends LuceneTe } writer.close(); - Searcher searcher = new IndexSearcher(directory, true); + IndexSearcher searcher = new IndexSearcher(directory, true); String[] queries = { "a b", Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java Thu Jan 13 02:09:33 2011 @@ -102,7 +102,7 @@ public class TestSearchForDuplicates ext writer.close(); // try a search without OR - Searcher searcher = new IndexSearcher(directory, true); + IndexSearcher searcher = new IndexSearcher(directory, true); QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, PRIORITY_FIELD, analyzer); @@ -133,7 +133,7 @@ public class TestSearchForDuplicates ext } - private void printHits(PrintWriter out, ScoreDoc[] hits, Searcher searcher ) throws IOException { + private void printHits(PrintWriter out, ScoreDoc[] hits, IndexSearcher searcher) throws IOException { out.println(hits.length + " total results\n"); for (int i = 0 ; i < hits.length; i++) { if ( i < 10 || (i > 94 && i < 105) ) { @@ -143,11 +143,11 @@ public class TestSearchForDuplicates ext } } - private void checkHits(ScoreDoc[] hits, int expectedCount, Searcher searcher) throws IOException { + private void checkHits(ScoreDoc[] hits, int expectedCount, IndexSearcher searcher) throws IOException { assertEquals("total results", expectedCount, hits.length); for (int i = 0 ; i < hits.length; i++) { if ( i < 10 || (i > 94 && i < 105) ) { - Document d = searcher.doc(hits[i].doc); + Document d = searcher.doc(hits[i].doc); assertEquals("check " + i, String.valueOf(i), d.get(ID_FIELD)); } } Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/document/TestDocument.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/document/TestDocument.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/document/TestDocument.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/document/TestDocument.java Thu Jan 13 02:09:33 2011 @@ -6,7 +6,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Searcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -157,7 +156,7 @@ public class TestDocument extends Lucene writer.addDocument(makeDocumentWithFields()); IndexReader reader = writer.getReader(); - Searcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = new IndexSearcher(reader); // search for something that does exists Query query = new TermQuery(new Term("keyword", "test1")); @@ -239,7 +238,7 @@ public class TestDocument extends Lucene writer.addDocument(doc); IndexReader reader = writer.getReader(); - Searcher searcher = new IndexSearcher(reader); + IndexSearcher searcher = new IndexSearcher(reader); Query query = new TermQuery(new Term("keyword", "test")); Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/RandomIndexWriter.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/RandomIndexWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/RandomIndexWriter.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/RandomIndexWriter.java Thu Jan 13 02:09:33 2011 @@ -87,6 +87,7 @@ public class RandomIndexWriter implement if (LuceneTestCase.VERBOSE) { System.out.println("RIW config=" + w.getConfig()); System.out.println("codec default=" + w.getConfig().getCodecProvider().getDefaultFieldCodec()); + w.setInfoStream(System.out); } } Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/Test2BTerms.java Thu Jan 13 02:09:33 2011 @@ -143,6 +143,12 @@ public class Test2BTerms extends LuceneT setMergePolicy(newLogMergePolicy(false, 10)) ); + MergePolicy mp = w.getConfig().getMergePolicy(); + if (mp instanceof LogByteSizeMergePolicy) { + // 1 petabyte: + ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024); + } + Document doc = new Document(); Field field = new Field("field", new MyTokenStream(TERMS_PER_DOC)); field.setOmitTermFreqAndPositions(true); Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java Thu Jan 13 02:09:33 2011 @@ -360,7 +360,7 @@ public class TestBackwardsCompatibility // First document should be #21 since it's norm was // increased: - Document d = searcher.doc(hits[0].doc); + Document d = searcher.getIndexReader().document(hits[0].doc); assertEquals("didn't get the right document first", "21", d.get("id")); doTestHits(hits, 34, searcher.getIndexReader()); @@ -408,7 +408,7 @@ public class TestBackwardsCompatibility // make sure searching sees right # hits IndexSearcher searcher = new IndexSearcher(dir, true); ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs; - Document d = searcher.doc(hits[0].doc); + Document d = searcher.getIndexReader().document(hits[0].doc); assertEquals("wrong first document", "21", d.get("id")); doTestHits(hits, 44, searcher.getIndexReader()); searcher.close(); Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestCodecs.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestCodecs.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestCodecs.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestCodecs.java Thu Jan 13 02:09:33 2011 @@ -23,14 +23,15 @@ import java.util.HashSet; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.Field; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.PostingsConsumer; import org.apache.lucene.index.codecs.TermsConsumer; import org.apache.lucene.index.codecs.mocksep.MockSepCodec; +import org.apache.lucene.index.codecs.preflex.PreFlexCodec; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PhraseQuery; @@ -40,6 +41,7 @@ import org.apache.lucene.store.Directory import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.Version; +import org.apache.lucene.util._TestUtil; // TODO: test multiple codecs here? @@ -68,43 +70,6 @@ public class TestCodecs extends LuceneTe private final static int DOC_FREQ_RAND = 500; // must be > 16 to test skipping private final static int TERM_DOC_FREQ_RAND = 20; - // start is inclusive and end is exclusive - public int nextInt(final int start, final int end) { - return start + random.nextInt(end-start); - } - - private int nextInt(final int lim) { - return random.nextInt(lim); - } - - char[] getRandomText() { - - final int len = 1+this.nextInt(10); - final char[] buffer = new char[len+1]; - for(int i=0;i= 1) { - final int inc = 1+TestCodecs.this.nextInt(left-1); + if (TestCodecs.random.nextInt(3) == 1 && left >= 1) { + final int inc = 1+TestCodecs.random.nextInt(left-1); upto2 += inc; - if (TestCodecs.this.nextInt(2) == 1) { + if (TestCodecs.random.nextInt(2) == 1) { doc = docsEnum.advance(term.docs[upto2]); assertEquals(term.docs[upto2], doc); } else { @@ -586,7 +562,7 @@ public class TestCodecs extends LuceneTe assertEquals(term.docs[upto2], doc); if (!field.omitTF) { assertEquals(term.positions[upto2].length, docsEnum.freq()); - if (TestCodecs.this.nextInt(2) == 1) { + if (TestCodecs.random.nextInt(2) == 1) { this.verifyPositions(term.positions[upto2], postings); } } @@ -603,15 +579,19 @@ public class TestCodecs extends LuceneTe } } - private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields) throws Throwable { + private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields, boolean allowPreFlex) throws Throwable { - final int termIndexInterval = this.nextInt(13, 27); + final int termIndexInterval = _TestUtil.nextInt(random, 13, 27); final SegmentCodecs codecInfo = SegmentCodecs.build(fieldInfos, CodecProvider.getDefault()); final SegmentWriteState state = new SegmentWriteState(null, dir, SEGMENT, fieldInfos, 10000, termIndexInterval, codecInfo); final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state); Arrays.sort(fields); for (final FieldData field : fields) { + if (!allowPreFlex && codecInfo.codecs[field.fieldInfo.codecId] instanceof PreFlexCodec) { + // code below expects unicode sort order + continue; + } field.write(consumer); } consumer.close(); Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDoc.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDoc.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDoc.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDoc.java Thu Jan 13 02:09:33 2011 @@ -201,11 +201,12 @@ public class TestDoc extends LuceneTestC r2.close(); final SegmentInfo info = new SegmentInfo(merged, si1.docCount + si2.docCount, si1.dir, - useCompoundFile, merger.fieldInfos().hasProx(), merger.getSegmentCodecs(), + false, merger.fieldInfos().hasProx(), merger.getSegmentCodecs(), merger.fieldInfos().hasVectors()); if (useCompoundFile) { Collection filesToDelete = merger.createCompoundFile(merged + ".cfs", info); + info.setUseCompoundFile(true); for (final String fileToDelete : filesToDelete) si1.dir.deleteFile(fileToDelete); } Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestFlex.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestFlex.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestFlex.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestFlex.java Thu Jan 13 02:09:33 2011 @@ -71,7 +71,11 @@ public class TestFlex extends LuceneTest IndexReader r = w.getReader(); TermsEnum terms = r.getSequentialSubReaders()[0].fields().terms("f").iterator(); assertTrue(terms.next() != null); - assertEquals(0, terms.ord()); + try { + assertEquals(0, terms.ord()); + } catch (UnsupportedOperationException uoe) { + // ok -- codec is not required to support this op + } r.close(); w.close(); d.close(); Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReader.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReader.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReader.java Thu Jan 13 02:09:33 2011 @@ -981,6 +981,7 @@ public class TestIndexReader extends Luc // new IndexFileDeleter, have it delete // unreferenced files, then verify that in fact // no files were deleted: + IndexWriter.unlock(dir); TestIndexWriter.assertNoUnreferencedFiles(dir, "reader.close() failed to delete unreferenced files"); // Finally, verify index is not corrupt, and, if @@ -1333,8 +1334,8 @@ public class TestIndexReader extends Luc it1 = fields1.iterator(); while (it1.hasNext()) { String curField = it1.next(); - byte[] norms1 = index1.norms(curField); - byte[] norms2 = index2.norms(curField); + byte[] norms1 = MultiNorms.norms(index1, curField); + byte[] norms2 = MultiNorms.norms(index2, curField); if (norms1 != null && norms2 != null) { assertEquals(norms1.length, norms2.length); Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java Thu Jan 13 02:09:33 2011 @@ -272,13 +272,13 @@ public class TestIndexReaderClone extend * @throws Exception */ private void performDefaultTests(IndexReader r1) throws Exception { - float norm1 = Similarity.getDefault().decodeNormValue(r1.norms("field1")[4]); + float norm1 = Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]); IndexReader pr1Clone = (IndexReader) r1.clone(); pr1Clone.deleteDocument(10); pr1Clone.setNorm(4, "field1", 0.5f); - assertTrue(Similarity.getDefault().decodeNormValue(r1.norms("field1")[4]) == norm1); - assertTrue(Similarity.getDefault().decodeNormValue(pr1Clone.norms("field1")[4]) != norm1); + assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]) == norm1); + assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(pr1Clone, "field1")[4]) != norm1); final Bits delDocs = MultiFields.getDeletedDocs(r1); assertTrue(delDocs == null || !delDocs.get(10)); @@ -428,7 +428,7 @@ public class TestIndexReaderClone extend IndexReader orig = IndexReader.open(dir1, false); orig.setNorm(1, "field1", 17.0f); final byte encoded = Similarity.getDefault().encodeNormValue(17.0f); - assertEquals(encoded, orig.norms("field1")[1]); + assertEquals(encoded, MultiNorms.norms(orig, "field1")[1]); // the cloned segmentreader should have 2 references, 1 to itself, and 1 to // the original segmentreader @@ -437,7 +437,7 @@ public class TestIndexReaderClone extend clonedReader.close(); IndexReader r = IndexReader.open(dir1, false); - assertEquals(encoded, r.norms("field1")[1]); + assertEquals(encoded, MultiNorms.norms(r, "field1")[1]); r.close(); dir1.close(); } Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java Thu Jan 13 02:09:33 2011 @@ -42,8 +42,9 @@ public class TestIndexReaderCloneNorms e private class SimilarityOne extends DefaultSimilarity { @Override - public float lengthNorm(String fieldName, int numTerms) { - return 1; + public float computeNorm(String fieldName, FieldInvertState state) { + // diable length norm + return state.getBoost(); } } @@ -272,7 +273,7 @@ public class TestIndexReaderCloneNorms e private void verifyIndex(IndexReader ir) throws IOException { for (int i = 0; i < NUM_FIELDS; i++) { String field = "f" + i; - byte b[] = ir.norms(field); + byte b[] = MultiNorms.norms(ir, field); assertEquals("number of norms mismatches", numDocNorms, b.length); ArrayList storedNorms = (i == 1 ? modifiedNorms : norms); for (int j = 0; j < b.length; j++) { Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Thu Jan 13 02:09:33 2011 @@ -43,11 +43,11 @@ import org.apache.lucene.analysis.tokena import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.IndexSearcher; @@ -157,7 +157,7 @@ public class TestIndexWriter extends Luc String[] startFiles = dir.listAll(); SegmentInfos infos = new SegmentInfos(); infos.read(dir); - new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null, CodecProvider.getDefault()); + new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())).rollback(); String[] endFiles = dir.listAll(); Arrays.sort(startFiles); Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java Thu Jan 13 02:09:33 2011 @@ -832,8 +832,8 @@ public class TestIndexWriterDelete exten } } - TestIndexWriter.assertNoUnreferencedFiles(dir, "docsWriter.abort() failed to delete unreferenced files"); modifier.close(); + TestIndexWriter.assertNoUnreferencedFiles(dir, "docsWriter.abort() failed to delete unreferenced files"); dir.close(); } Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java Thu Jan 13 02:09:33 2011 @@ -27,7 +27,6 @@ import org.apache.lucene.document.Field; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Searcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.MockDirectoryWrapper; @@ -40,7 +39,7 @@ import org.apache.lucene.util.BytesRef; * */ public class TestLazyProxSkipping extends LuceneTestCase { - private Searcher searcher; + private IndexSearcher searcher; private int seeksCounter = 0; private String field = "tokens"; Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java Thu Jan 13 02:09:33 2011 @@ -25,6 +25,9 @@ import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.Executors; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; @@ -38,6 +41,7 @@ import org.apache.lucene.search.TermQuer import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.util.NamedThreadFactory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LineFileDocs; @@ -61,7 +65,7 @@ public class TestNRTThreads extends Luce CodecProvider.getDefault().setDefaultFieldCodec("Standard"); } - final LineFileDocs docs = new LineFileDocs(true); + final LineFileDocs docs = new LineFileDocs(random); final File tempDir = _TestUtil.getTempDir("nrtopenfiles"); final MockDirectoryWrapper dir = new MockDirectoryWrapper(random, FSDirectory.open(tempDir)); final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); @@ -191,6 +195,8 @@ public class TestNRTThreads extends Luce // silly starting guess: final AtomicInteger totTermCount = new AtomicInteger(100); + final ExecutorService es = Executors.newCachedThreadPool(new NamedThreadFactory("NRT search threads")); + while(System.currentTimeMillis() < stopTime && !failed.get()) { if (random.nextBoolean()) { if (VERBOSE) { @@ -228,7 +234,7 @@ public class TestNRTThreads extends Luce if (r.numDocs() > 0) { - final IndexSearcher s = new IndexSearcher(r); + final IndexSearcher s = new IndexSearcher(r, es); // run search threads final long searchStopTime = System.currentTimeMillis() + 500; @@ -302,6 +308,9 @@ public class TestNRTThreads extends Luce } } + es.shutdown(); + es.awaitTermination(1, TimeUnit.SECONDS); + if (VERBOSE) { System.out.println("TEST: all searching done [" + (System.currentTimeMillis()-t0) + " ms]"); } Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNorms.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNorms.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNorms.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestNorms.java Thu Jan 13 02:09:33 2011 @@ -41,8 +41,9 @@ public class TestNorms extends LuceneTes private class SimilarityOne extends DefaultSimilarity { @Override - public float lengthNorm(String fieldName, int numTerms) { - return 1; + public float computeNorm(String fieldName, FieldInvertState state) { + // Disable length norm + return state.getBoost(); } } @@ -179,7 +180,7 @@ public class TestNorms extends LuceneTes IndexReader ir = IndexReader.open(dir, false); for (int i = 0; i < NUM_FIELDS; i++) { String field = "f"+i; - byte b[] = ir.norms(field); + byte b[] = MultiNorms.norms(ir, field); assertEquals("number of norms mismatches",numDocNorms,b.length); ArrayList storedNorms = (i==1 ? modifiedNorms : norms); for (int j = 0; j < b.length; j++) { @@ -236,4 +237,52 @@ public class TestNorms extends LuceneTes return norm; } + class CustomNormEncodingSimilarity extends DefaultSimilarity { + @Override + public byte encodeNormValue(float f) { + return (byte) f; + } + + @Override + public float decodeNormValue(byte b) { + return (float) b; + } + + @Override + public float computeNorm(String field, FieldInvertState state) { + return (float) state.getLength(); + } + } + + // LUCENE-1260 + public void testCustomEncoder() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()); + config.setSimilarity(new CustomNormEncodingSimilarity()); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); + Document doc = new Document(); + Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED); + Field bar = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED); + doc.add(foo); + doc.add(bar); + + for (int i = 0; i < 100; i++) { + bar.setValue("singleton"); + writer.addDocument(doc); + } + + IndexReader reader = writer.getReader(); + writer.close(); + + byte fooNorms[] = MultiNorms.norms(reader, "foo"); + for (int i = 0; i < reader.maxDoc(); i++) + assertEquals(0, fooNorms[i]); + + byte barNorms[] = MultiNorms.norms(reader, "bar"); + for (int i = 0; i < reader.maxDoc(); i++) + assertEquals(1, barNorms[i]); + + reader.close(); + dir.close(); + } } Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestOmitTf.java Thu Jan 13 02:09:33 2011 @@ -35,13 +35,13 @@ import org.apache.lucene.search.Explanat public class TestOmitTf extends LuceneTestCase { public static class SimpleSimilarity extends Similarity { - @Override public float lengthNorm(String field, int numTerms) { return 1.0f; } + @Override public float computeNorm(String field, FieldInvertState state) { return state.getBoost(); } @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } @Override public float tf(float freq) { return freq; } @Override public float sloppyFreq(int distance) { return 2.0f; } @Override public float idf(int docFreq, int numDocs) { return 1.0f; } @Override public float coord(int overlap, int maxOverlap) { return 1.0f; } - @Override public IDFExplanation idfExplain(Collection terms, Searcher searcher) throws IOException { + @Override public IDFExplanation idfExplain(Collection terms, IndexSearcher searcher) throws IOException { return new IDFExplanation() { @Override public float getIdf() { @@ -279,7 +279,7 @@ public class TestOmitTf extends LuceneTe /* * Verify the index */ - Searcher searcher = new IndexSearcher(dir, true); + IndexSearcher searcher = new IndexSearcher(dir, true); searcher.setSimilarity(new SimpleSimilarity()); Term a = new Term("noTf", term); Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java Thu Jan 13 02:09:33 2011 @@ -56,6 +56,7 @@ public class TestPerFieldCodecSupport ex conf.setMergePolicy(logByteSizeMergePolicy); final IndexWriter writer = new IndexWriter(dir, conf); + writer.setInfoStream(VERBOSE ? System.out : null); return writer; } @@ -110,12 +111,15 @@ public class TestPerFieldCodecSupport ex } /* - * Test is hetrogenous index segements are merge sucessfully + * Test that heterogeneous index segments are merged sucessfully */ @Test public void testChangeCodecAndMerge() throws IOException { Directory dir = newDirectory(); CodecProvider provider = new MockCodecProvider(); + if (VERBOSE) { + System.out.println("TEST: make new index"); + } IndexWriterConfig iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.CREATE).setCodecProvider(provider); iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); @@ -125,6 +129,9 @@ public class TestPerFieldCodecSupport ex addDocs(writer, 10); writer.commit(); assertQuery(new Term("content", "aaa"), dir, 10, provider); + if (VERBOSE) { + System.out.println("TEST: addDocs3"); + } addDocs3(writer, 10); writer.commit(); writer.close(); @@ -144,6 +151,9 @@ public class TestPerFieldCodecSupport ex iwconf.setCodecProvider(provider); writer = newWriter(dir, iwconf); // swap in new codec for currently written segments + if (VERBOSE) { + System.out.println("TEST: add docs w/ Standard codec for content field"); + } addDocs2(writer, 10); writer.commit(); Codec origContentCodec = provider.lookup("MockSep"); @@ -152,9 +162,12 @@ public class TestPerFieldCodecSupport ex origContentCodec, origContentCodec, newContentCodec); assertEquals(30, writer.maxDoc()); assertQuery(new Term("content", "bbb"), dir, 10, provider); - assertQuery(new Term("content", "ccc"), dir, 10, provider); + assertQuery(new Term("content", "ccc"), dir, 10, provider); //// assertQuery(new Term("content", "aaa"), dir, 10, provider); + if (VERBOSE) { + System.out.println("TEST: add more docs w/ new codec"); + } addDocs2(writer, 10); writer.commit(); assertQuery(new Term("content", "ccc"), dir, 10, provider); @@ -162,6 +175,9 @@ public class TestPerFieldCodecSupport ex assertQuery(new Term("content", "aaa"), dir, 10, provider); assertEquals(40, writer.maxDoc()); + if (VERBOSE) { + System.out.println("TEST: now optimize"); + } writer.optimize(); assertEquals(40, writer.maxDoc()); writer.close(); @@ -206,6 +222,9 @@ public class TestPerFieldCodecSupport ex public void assertQuery(Term t, Directory dir, int num, CodecProvider codecs) throws CorruptIndexException, IOException { + if (VERBOSE) { + System.out.println("\nTEST: assertQuery " + t); + } IndexReader reader = IndexReader.open(dir, null, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, codecs); IndexSearcher searcher = new IndexSearcher(reader); Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java Thu Jan 13 02:09:33 2011 @@ -181,11 +181,11 @@ public class TestSegmentReader extends L assertEquals(reader.hasNorms(f.name()), !DocHelper.noNorms.containsKey(f.name())); if (!reader.hasNorms(f.name())) { // test for fake norms of 1.0 or null depending on the flag - byte [] norms = reader.norms(f.name()); + byte [] norms = MultiNorms.norms(reader, f.name()); byte norm1 = Similarity.getDefault().encodeNormValue(1.0f); assertNull(norms); norms = new byte[reader.maxDoc()]; - reader.norms(f.name(),norms, 0); + MultiNorms.norms(reader, f.name(),norms, 0); for (int j=0; j= 0; out.writeVInt(buffer[i]); } } @@ -105,7 +111,7 @@ public class MockFixedIntBlockCodec exte @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new MockIntFactory()); + PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new MockIntFactory(blockSize)); boolean success = false; TermsIndexWriterBase indexWriter; @@ -139,7 +145,7 @@ public class MockFixedIntBlockCodec exte PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir, state.segmentInfo, state.readBufferSize, - new MockIntFactory(), state.codecId); + new MockIntFactory(blockSize), state.codecId); TermsIndexReaderBase indexReader; boolean success = false; Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java Thu Jan 13 02:09:33 2011 @@ -67,7 +67,13 @@ public class MockVariableIntBlockCodec e return name + "(baseBlockSize="+ baseBlockSize + ")"; } - private class MockIntFactory extends IntStreamFactory { + public static class MockIntFactory extends IntStreamFactory { + + private final int baseBlockSize; + + public MockIntFactory(int baseBlockSize) { + this.baseBlockSize = baseBlockSize; + } @Override public IntIndexInput openInput(Directory dir, String fileName, int readBufferSize) throws IOException { @@ -104,6 +110,7 @@ public class MockVariableIntBlockCodec e @Override protected int add(int value) throws IOException { + assert value >= 0; buffer[pendingCount++] = value; // silly variable block length int encoder: if // first value <= 3, we write N vints at once; @@ -128,7 +135,7 @@ public class MockVariableIntBlockCodec e @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new MockIntFactory()); + PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new MockIntFactory(baseBlockSize)); boolean success = false; TermsIndexWriterBase indexWriter; @@ -162,7 +169,7 @@ public class MockVariableIntBlockCodec e PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir, state.segmentInfo, state.readBufferSize, - new MockIntFactory(), state.codecId); + new MockIntFactory(baseBlockSize), state.codecId); TermsIndexReaderBase indexReader; boolean success = false; Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java Thu Jan 13 02:09:33 2011 @@ -42,6 +42,7 @@ public class MockSingleIntIndexOutput ex /** Write an int to the primary file */ @Override public void write(int v) throws IOException { + assert v >= 0; out.writeVInt(v); } Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java Thu Jan 13 02:09:33 2011 @@ -46,18 +46,14 @@ class PreFlexFieldsWriter extends Fields state.segmentName, state.fieldInfos, state.termIndexInterval); - state.flushedFiles.add(IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.TERMS_EXTENSION)); - state.flushedFiles.add(IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.TERMS_INDEX_EXTENSION)); final String freqFile = IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.FREQ_EXTENSION); freqOut = state.directory.createOutput(freqFile); - state.flushedFiles.add(freqFile); totalNumDocs = state.numDocs; if (state.fieldInfos.hasProx()) { final String proxFile = IndexFileNames.segmentFileName(state.segmentName, "", PreFlexCodec.PROX_EXTENSION); proxOut = state.directory.createOutput(proxFile); - state.flushedFiles.add(proxFile); } else { proxOut = null; } Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java?rev=1058390&r1=1058389&r2=1058390&view=diff ============================================================================== --- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java (original) +++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java Thu Jan 13 02:09:33 2011 @@ -20,7 +20,8 @@ package org.apache.lucene.search; import java.io.IOException; import junit.framework.Assert; -import org.apache.lucene.index.IndexReader; + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; /** * A unit test helper class to test when the filter is getting cached and when it is not. @@ -41,10 +42,10 @@ public class CachingWrapperFilterHelper } @Override - public synchronized DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public synchronized DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { final int saveMissCount = missCount; - DocIdSet docIdSet = super.getDocIdSet(reader); + DocIdSet docIdSet = super.getDocIdSet(context); if (shouldHaveCache) { Assert.assertEquals("Cache should have data ", saveMissCount, missCount);