Return-Path:
X-Original-To: apmail-lucene-commits-archive@www.apache.org
Delivered-To: apmail-lucene-commits-archive@www.apache.org
Received: from mail.apache.org (hermes.apache.org [140.211.11.3])
by minotaur.apache.org (Postfix) with SMTP id D659B17D28
for ;
Tue, 28 Oct 2014 09:20:17 +0000 (UTC)
Received: (qmail 94764 invoked by uid 500); 28 Oct 2014 09:20:17 -0000
Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm
Precedence: bulk
List-Help:
List-Unsubscribe:
List-Post:
List-Id:
Reply-To: dev@lucene.apache.org
Delivered-To: mailing list commits@lucene.apache.org
Received: (qmail 94755 invoked by uid 99); 28 Oct 2014 09:20:17 -0000
Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230)
by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 28 Oct 2014 09:20:17 +0000
X-ASF-Spam-Status: No, hits=-2000.0 required=5.0
tests=ALL_TRUSTED
X-Spam-Check-By: apache.org
Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4)
by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 28 Oct 2014 09:19:45 +0000
Received: from eris.apache.org (localhost [127.0.0.1])
by eris.apache.org (Postfix) with ESMTP id 478F9238899C;
Tue, 28 Oct 2014 09:19:42 +0000 (UTC)
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: svn commit: r1634823 [3/9] - in /lucene/dev/branches/lucene6005: ./
dev-tools/ lucene/ lucene/analysis/ lucene/analysis/common/
lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/
lucene/analysis/common/src/java/org/apache/lucene/analy...
Date: Tue, 28 Oct 2014 09:19:29 -0000
To: commits@lucene.apache.org
From: mikemccand@apache.org
X-Mailer: svnmailer-1.0.9
Message-Id: <20141028091942.478F9238899C@eris.apache.org>
X-Virus-Checked: Checked by ClamAV on apache.org
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java Tue Oct 28 09:19:21 2014
@@ -30,17 +30,16 @@ import org.apache.lucene.codecs.Postings
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.Terms;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.fst.ByteSequenceOutputs;
+import org.apache.lucene.util.fst.Outputs;
/** A block-based terms index and dictionary that assigns
* terms to variable length blocks according to how they
@@ -74,8 +73,31 @@ import org.apache.lucene.util.IOUtils;
public final class BlockTreeTermsReader extends FieldsProducer {
+ static final Outputs FST_OUTPUTS = ByteSequenceOutputs.getSingleton();
+
+ static final BytesRef NO_OUTPUT = FST_OUTPUTS.getNoOutput();
+
+ static final int OUTPUT_FLAGS_NUM_BITS = 2;
+ static final int OUTPUT_FLAGS_MASK = 0x3;
+ static final int OUTPUT_FLAG_IS_FLOOR = 0x1;
+ static final int OUTPUT_FLAG_HAS_TERMS = 0x2;
+
+ /** Extension of terms file */
+ static final String TERMS_EXTENSION = "tim";
+ final static String TERMS_CODEC_NAME = "BlockTreeTermsDict";
+
+ /** Initial terms format. */
+ public static final int VERSION_START = 0;
+
+ /** Current terms format. */
+ public static final int VERSION_CURRENT = VERSION_START;
+
+ /** Extension of terms index file */
+ static final String TERMS_INDEX_EXTENSION = "tip";
+ final static String TERMS_INDEX_CODEC_NAME = "BlockTreeTermsIndex";
+
// Open input to the main terms dict file (_X.tib)
- final IndexInput in;
+ final IndexInput termsIn;
//private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
@@ -96,105 +118,86 @@ public final class BlockTreeTermsReader
private final int version;
/** Sole constructor. */
- public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info,
- PostingsReaderBase postingsReader, IOContext ioContext,
- String segmentSuffix)
- throws IOException {
-
- this.postingsReader = postingsReader;
-
- this.segment = info.name;
- in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_EXTENSION),
- ioContext);
-
+ public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException {
boolean success = false;
IndexInput indexIn = null;
-
+
+ this.postingsReader = postingsReader;
+ this.segment = state.segmentInfo.name;
+
+ String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION);
try {
- version = readHeader(in);
- indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION),
- ioContext);
- int indexVersion = readIndexHeader(indexIn);
- if (indexVersion != version) {
- throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion, indexIn);
- }
+ termsIn = state.directory.openInput(termsName, state.context);
+ version = CodecUtil.checkIndexHeader(termsIn, TERMS_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
- // verify
- if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
- CodecUtil.checksumEntireFile(indexIn);
- }
+ String indexName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
+ indexIn = state.directory.openInput(indexName, state.context);
+ CodecUtil.checkIndexHeader(indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix);
+ CodecUtil.checksumEntireFile(indexIn);
// Have PostingsReader init itself
- postingsReader.init(in);
-
+ postingsReader.init(termsIn, state);
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
// such as file truncation.
- if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
- CodecUtil.retrieveChecksum(in);
- }
+ CodecUtil.retrieveChecksum(termsIn);
// Read per-field details
- seekDir(in, dirOffset);
+ seekDir(termsIn, dirOffset);
seekDir(indexIn, indexDirOffset);
- final int numFields = in.readVInt();
+ final int numFields = termsIn.readVInt();
if (numFields < 0) {
- throw new CorruptIndexException("invalid numFields: " + numFields, in);
+ throw new CorruptIndexException("invalid numFields: " + numFields, termsIn);
}
- for(int i=0;i= BlockTreeTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0;
+ final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : termsIn.readVLong();
+ final long sumDocFreq = termsIn.readVLong();
+ final int docCount = termsIn.readVInt();
+ final int longsSize = termsIn.readVInt();
if (longsSize < 0) {
- throw new CorruptIndexException("invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, in);
+ throw new CorruptIndexException("invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn);
}
- BytesRef minTerm, maxTerm;
- if (version >= BlockTreeTermsWriter.VERSION_MIN_MAX_TERMS) {
- minTerm = readBytesRef(in);
- maxTerm = readBytesRef(in);
- } else {
- minTerm = maxTerm = null;
- }
- if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
- throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount(), in);
+ BytesRef minTerm = readBytesRef(termsIn);
+ BytesRef maxTerm = readBytesRef(termsIn);
+ if (docCount < 0 || docCount > state.segmentInfo.getDocCount()) { // #docs with field must be <= #docs
+ throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.getDocCount(), termsIn);
}
if (sumDocFreq < docCount) { // #postings must be >= #docs with field
- throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in);
+ throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn);
}
if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
- throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in);
+ throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn);
}
final long indexStartFP = indexIn.readVLong();
FieldReader previous = fields.put(fieldInfo.name,
new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
indexStartFP, longsSize, indexIn, minTerm, maxTerm));
if (previous != null) {
- throw new CorruptIndexException("duplicate field: " + fieldInfo.name, in);
+ throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn);
}
}
+
indexIn.close();
-
success = true;
} finally {
if (!success) {
@@ -212,38 +215,11 @@ public final class BlockTreeTermsReader
return bytes;
}
- /** Reads terms file header. */
- private int readHeader(IndexInput input) throws IOException {
- int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_CODEC_NAME,
- BlockTreeTermsWriter.VERSION_START,
- BlockTreeTermsWriter.VERSION_CURRENT);
- if (version < BlockTreeTermsWriter.VERSION_APPEND_ONLY) {
- dirOffset = input.readLong();
- }
- return version;
- }
-
- /** Reads index file header. */
- private int readIndexHeader(IndexInput input) throws IOException {
- int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME,
- BlockTreeTermsWriter.VERSION_START,
- BlockTreeTermsWriter.VERSION_CURRENT);
- if (version < BlockTreeTermsWriter.VERSION_APPEND_ONLY) {
- indexDirOffset = input.readLong();
- }
- return version;
- }
-
/** Seek {@code input} to the directory offset. */
private void seekDir(IndexInput input, long dirOffset)
throws IOException {
- if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
- input.seek(input.length() - CodecUtil.footerLength() - 8);
- dirOffset = input.readLong();
- } else if (version >= BlockTreeTermsWriter.VERSION_APPEND_ONLY) {
- input.seek(input.length() - 8);
- dirOffset = input.readLong();
- }
+ input.seek(input.length() - CodecUtil.footerLength() - 8);
+ dirOffset = input.readLong();
input.seek(dirOffset);
}
@@ -255,7 +231,7 @@ public final class BlockTreeTermsReader
@Override
public void close() throws IOException {
try {
- IOUtils.close(in, postingsReader);
+ IOUtils.close(termsIn, postingsReader);
} finally {
// Clear so refs to terms index is GCable even if
// app hangs onto us:
@@ -313,14 +289,12 @@ public final class BlockTreeTermsReader
}
@Override
- public void checkIntegrity() throws IOException {
- if (version >= BlockTreeTermsWriter.VERSION_CHECKSUM) {
- // term dictionary
- CodecUtil.checksumEntireFile(in);
+ public void checkIntegrity() throws IOException {
+ // term dictionary
+ CodecUtil.checksumEntireFile(termsIn);
- // postings
- postingsReader.checkIntegrity();
- }
+ // postings
+ postingsReader.checkIntegrity();
}
@Override
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java Tue Oct 28 09:19:21 2014
@@ -41,14 +41,12 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
-import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
@@ -192,10 +190,6 @@ import org.apache.lucene.util.packed.Pac
*/
public final class BlockTreeTermsWriter extends FieldsConsumer {
- static final Outputs FST_OUTPUTS = ByteSequenceOutputs.getSingleton();
-
- static final BytesRef NO_OUTPUT = FST_OUTPUTS.getNoOutput();
-
/** Suggested default value for the {@code
* minItemsInBlock} parameter to {@link
* #BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. */
@@ -209,38 +203,7 @@ public final class BlockTreeTermsWriter
// public final static boolean DEBUG = false;
//private final static boolean SAVE_DOT_FILES = false;
- static final int OUTPUT_FLAGS_NUM_BITS = 2;
- static final int OUTPUT_FLAGS_MASK = 0x3;
- static final int OUTPUT_FLAG_IS_FLOOR = 0x1;
- static final int OUTPUT_FLAG_HAS_TERMS = 0x2;
-
- /** Extension of terms file */
- static final String TERMS_EXTENSION = "tim";
- final static String TERMS_CODEC_NAME = "BLOCK_TREE_TERMS_DICT";
-
- /** Initial terms format. */
- public static final int VERSION_START = 0;
-
- /** Append-only */
- public static final int VERSION_APPEND_ONLY = 1;
-
- /** Meta data as array */
- public static final int VERSION_META_ARRAY = 2;
-
- /** checksums */
- public static final int VERSION_CHECKSUM = 3;
-
- /** min/max term */
- public static final int VERSION_MIN_MAX_TERMS = 4;
-
- /** Current terms format. */
- public static final int VERSION_CURRENT = VERSION_MIN_MAX_TERMS;
-
- /** Extension of terms index file */
- static final String TERMS_INDEX_EXTENSION = "tip";
- final static String TERMS_INDEX_CODEC_NAME = "BLOCK_TREE_TERMS_INDEX";
-
- private final IndexOutput out;
+ private final IndexOutput termsOut;
private final IndexOutput indexOut;
final int maxDoc;
final int minItemsInBlock;
@@ -286,8 +249,7 @@ public final class BlockTreeTermsWriter
* sub-blocks) per block will aim to be between
* minItemsPerBlock and maxItemsPerBlock, though in some
* cases the blocks may be smaller than the min. */
- public BlockTreeTermsWriter(
- SegmentWriteState state,
+ public BlockTreeTermsWriter(SegmentWriteState state,
PostingsWriterBase postingsWriter,
int minItemsInBlock,
int maxItemsInBlock)
@@ -295,47 +257,34 @@ public final class BlockTreeTermsWriter
{
validateSettings(minItemsInBlock, maxItemsInBlock);
- maxDoc = state.segmentInfo.getDocCount();
+ this.maxDoc = state.segmentInfo.getDocCount();
+ this.fieldInfos = state.fieldInfos;
+ this.minItemsInBlock = minItemsInBlock;
+ this.maxItemsInBlock = maxItemsInBlock;
+ this.postingsWriter = postingsWriter;
- final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION);
- out = state.directory.createOutput(termsFileName, state.context);
+ final String termsName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_EXTENSION);
+ termsOut = state.directory.createOutput(termsName, state.context);
boolean success = false;
IndexOutput indexOut = null;
try {
- fieldInfos = state.fieldInfos;
- this.minItemsInBlock = minItemsInBlock;
- this.maxItemsInBlock = maxItemsInBlock;
- writeHeader(out);
-
- //DEBUG = state.segmentName.equals("_4a");
-
- final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
- indexOut = state.directory.createOutput(termsIndexFileName, state.context);
- writeIndexHeader(indexOut);
+ CodecUtil.writeIndexHeader(termsOut, BlockTreeTermsReader.TERMS_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
+ state.segmentInfo.getId(), state.segmentSuffix);
- this.postingsWriter = postingsWriter;
- // segment = state.segmentInfo.name;
+ final String indexName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_INDEX_EXTENSION);
+ indexOut = state.directory.createOutput(indexName, state.context);
+ CodecUtil.writeIndexHeader(indexOut, BlockTreeTermsReader.TERMS_INDEX_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
+ state.segmentInfo.getId(), state.segmentSuffix);
- // System.out.println("BTW.init seg=" + state.segmentName);
-
- postingsWriter.init(out); // have consumer write its format/header
+ postingsWriter.init(termsOut, state); // have consumer write its format/header
+
+ this.indexOut = indexOut;
success = true;
} finally {
if (!success) {
- IOUtils.closeWhileHandlingException(out, indexOut);
+ IOUtils.closeWhileHandlingException(termsOut, indexOut);
}
}
- this.indexOut = indexOut;
- }
-
- /** Writes the terms file header. */
- private void writeHeader(IndexOutput out) throws IOException {
- CodecUtil.writeHeader(out, TERMS_CODEC_NAME, VERSION_CURRENT);
- }
-
- /** Writes the index file header. */
- private void writeIndexHeader(IndexOutput out) throws IOException {
- CodecUtil.writeHeader(out, TERMS_INDEX_CODEC_NAME, VERSION_CURRENT);
}
/** Writes the terms file trailer. */
@@ -395,7 +344,7 @@ public final class BlockTreeTermsWriter
static long encodeOutput(long fp, boolean hasTerms, boolean isFloor) {
assert fp < (1L << 62);
- return (fp << 2) | (hasTerms ? OUTPUT_FLAG_HAS_TERMS : 0) | (isFloor ? OUTPUT_FLAG_IS_FLOOR : 0);
+ return (fp << 2) | (hasTerms ? BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS : 0) | (isFloor ? BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR : 0);
}
private static class PendingEntry {
@@ -692,7 +641,7 @@ public final class BlockTreeTermsWriter
assert end > start;
- long startFP = out.getFilePointer();
+ long startFP = termsOut.getFilePointer();
boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1;
@@ -707,7 +656,7 @@ public final class BlockTreeTermsWriter
// Last block:
code |= 1;
}
- out.writeVInt(code);
+ termsOut.writeVInt(code);
/*
if (DEBUG) {
@@ -853,18 +802,18 @@ public final class BlockTreeTermsWriter
// search on lookup
// Write suffixes byte[] blob to terms dict output:
- out.writeVInt((int) (suffixWriter.getFilePointer() << 1) | (isLeafBlock ? 1:0));
- suffixWriter.writeTo(out);
+ termsOut.writeVInt((int) (suffixWriter.getFilePointer() << 1) | (isLeafBlock ? 1:0));
+ suffixWriter.writeTo(termsOut);
suffixWriter.reset();
// Write term stats byte[] blob
- out.writeVInt((int) statsWriter.getFilePointer());
- statsWriter.writeTo(out);
+ termsOut.writeVInt((int) statsWriter.getFilePointer());
+ statsWriter.writeTo(termsOut);
statsWriter.reset();
// Write term meta data byte[] blob
- out.writeVInt((int) metaWriter.getFilePointer());
- metaWriter.writeTo(out);
+ termsOut.writeVInt((int) metaWriter.getFilePointer());
+ metaWriter.writeTo(termsOut);
metaWriter.reset();
// if (DEBUG) {
@@ -1019,38 +968,38 @@ public final class BlockTreeTermsWriter
boolean success = false;
try {
- final long dirStart = out.getFilePointer();
+ final long dirStart = termsOut.getFilePointer();
final long indexDirStart = indexOut.getFilePointer();
- out.writeVInt(fields.size());
+ termsOut.writeVInt(fields.size());
for(FieldMetaData field : fields) {
//System.out.println(" field " + field.fieldInfo.name + " " + field.numTerms + " terms");
- out.writeVInt(field.fieldInfo.number);
+ termsOut.writeVInt(field.fieldInfo.number);
assert field.numTerms > 0;
- out.writeVLong(field.numTerms);
- out.writeVInt(field.rootCode.length);
- out.writeBytes(field.rootCode.bytes, field.rootCode.offset, field.rootCode.length);
+ termsOut.writeVLong(field.numTerms);
+ termsOut.writeVInt(field.rootCode.length);
+ termsOut.writeBytes(field.rootCode.bytes, field.rootCode.offset, field.rootCode.length);
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
- out.writeVLong(field.sumTotalTermFreq);
+ termsOut.writeVLong(field.sumTotalTermFreq);
}
- out.writeVLong(field.sumDocFreq);
- out.writeVInt(field.docCount);
- out.writeVInt(field.longsSize);
+ termsOut.writeVLong(field.sumDocFreq);
+ termsOut.writeVInt(field.docCount);
+ termsOut.writeVInt(field.longsSize);
indexOut.writeVLong(field.indexStartFP);
- writeBytesRef(out, field.minTerm);
- writeBytesRef(out, field.maxTerm);
+ writeBytesRef(termsOut, field.minTerm);
+ writeBytesRef(termsOut, field.maxTerm);
}
- writeTrailer(out, dirStart);
- CodecUtil.writeFooter(out);
+ writeTrailer(termsOut, dirStart);
+ CodecUtil.writeFooter(termsOut);
writeIndexTrailer(indexOut, indexDirStart);
CodecUtil.writeFooter(indexOut);
success = true;
} finally {
if (success) {
- IOUtils.close(out, indexOut, postingsWriter);
+ IOUtils.close(termsOut, indexOut, postingsWriter);
} else {
- IOUtils.closeWhileHandlingException(out, indexOut, postingsWriter);
+ IOUtils.closeWhileHandlingException(termsOut, indexOut, postingsWriter);
}
}
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java Tue Oct 28 09:19:21 2014
@@ -34,8 +34,10 @@ import org.apache.lucene.util.automaton.
import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.FST;
-/** BlockTree's implementation of {@link Terms}. */
-// public for CheckIndex:
+/**
+ * BlockTree's implementation of {@link Terms}.
+ * @lucene.internal
+ */
public final class FieldReader extends Terms implements Accountable {
private static final long BASE_RAM_BYTES_USED =
@@ -77,7 +79,7 @@ public final class FieldReader extends T
// System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
// }
- rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong() >>> BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
+ rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong() >>> BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;
if (indexIn != null) {
final IndexInput clone = indexIn.clone();
@@ -120,8 +122,8 @@ public final class FieldReader extends T
}
/** For debugging -- used by CheckIndex too*/
- // TODO: maybe push this into Terms?
- public Stats computeStats() throws IOException {
+ @Override
+ public Stats getStats() throws IOException {
return new SegmentTermsEnum(this).computeBlockStats();
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java Tue Oct 28 09:19:21 2014
@@ -67,7 +67,7 @@ final class IntersectTermsEnum extends T
this.fr = fr;
runAutomaton = compiled.runAutomaton;
compiledAutomaton = compiled;
- in = fr.parent.in.clone();
+ in = fr.parent.termsIn.clone();
stack = new IntersectTermsEnumFrame[5];
for(int idx=0;idx
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java Tue Oct 28 09:19:21 2014
@@ -64,7 +64,7 @@ public class CompressingStoredFieldsForm
*
* formatName
is the name of the format. This name will be used
* in the file formats to perform
- * {@link CodecUtil#checkSegmentHeader codec header checks}.
+ * {@link CodecUtil#checkIndexHeader codec header checks}.
*
* segmentSuffix
is the segment suffix. This suffix is added to
* the result file name only if it's not the empty string.
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java Tue Oct 28 09:19:21 2014
@@ -118,8 +118,8 @@ public final class CompressingStoredFiel
Throwable priorE = null;
try {
final String codecNameIdx = formatName + CODEC_SFX_IDX;
- version = CodecUtil.checkSegmentHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix);
- assert CodecUtil.segmentHeaderLength(codecNameIdx, segmentSuffix) == indexStream.getFilePointer();
+ version = CodecUtil.checkIndexHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix);
+ assert CodecUtil.indexHeaderLength(codecNameIdx, segmentSuffix) == indexStream.getFilePointer();
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
maxPointer = indexStream.readVLong();
} catch (Throwable exception) {
@@ -141,11 +141,11 @@ public final class CompressingStoredFiel
throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + fieldsStream.length(), fieldsStream);
}
final String codecNameDat = formatName + CODEC_SFX_DAT;
- final int fieldsVersion = CodecUtil.checkSegmentHeader(fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix);
+ final int fieldsVersion = CodecUtil.checkIndexHeader(fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix);
if (version != fieldsVersion) {
throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + fieldsVersion, fieldsStream);
}
- assert CodecUtil.segmentHeaderLength(codecNameDat, segmentSuffix) == fieldsStream.getFilePointer();
+ assert CodecUtil.indexHeaderLength(codecNameDat, segmentSuffix) == fieldsStream.getFilePointer();
chunkSize = fieldsStream.readVInt();
packedIntsVersion = fieldsStream.readVInt();
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java Tue Oct 28 09:19:21 2014
@@ -29,13 +29,9 @@ import org.apache.lucene.index.CorruptIn
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.StorableField;
-import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@@ -118,10 +114,10 @@ public final class CompressingStoredFiel
final String codecNameIdx = formatName + CODEC_SFX_IDX;
final String codecNameDat = formatName + CODEC_SFX_DAT;
- CodecUtil.writeSegmentHeader(indexStream, codecNameIdx, VERSION_CURRENT, si.getId(), segmentSuffix);
- CodecUtil.writeSegmentHeader(fieldsStream, codecNameDat, VERSION_CURRENT, si.getId(), segmentSuffix);
- assert CodecUtil.segmentHeaderLength(codecNameDat, segmentSuffix) == fieldsStream.getFilePointer();
- assert CodecUtil.segmentHeaderLength(codecNameIdx, segmentSuffix) == indexStream.getFilePointer();
+ CodecUtil.writeIndexHeader(indexStream, codecNameIdx, VERSION_CURRENT, si.getId(), segmentSuffix);
+ CodecUtil.writeIndexHeader(fieldsStream, codecNameDat, VERSION_CURRENT, si.getId(), segmentSuffix);
+ assert CodecUtil.indexHeaderLength(codecNameDat, segmentSuffix) == fieldsStream.getFilePointer();
+ assert CodecUtil.indexHeaderLength(codecNameIdx, segmentSuffix) == indexStream.getFilePointer();
indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
indexStream = null;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsFormat.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsFormat.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsFormat.java Tue Oct 28 09:19:21 2014
@@ -46,7 +46,7 @@ public class CompressingTermVectorsForma
*
* formatName
is the name of the format. This name will be used
* in the file formats to perform
- * {@link CodecUtil#checkSegmentHeader codec header checks}.
+ * {@link CodecUtil#checkIndexHeader codec header checks}.
*
* The compressionMode
parameter allows you to choose between
* compression algorithms that have various compression and decompression
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java Tue Oct 28 09:19:21 2014
@@ -114,8 +114,8 @@ public final class CompressingTermVector
Throwable priorE = null;
try {
final String codecNameIdx = formatName + CODEC_SFX_IDX;
- version = CodecUtil.checkSegmentHeader(input, codecNameIdx, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix);
- assert CodecUtil.segmentHeaderLength(codecNameIdx, segmentSuffix) == input.getFilePointer();
+ version = CodecUtil.checkIndexHeader(input, codecNameIdx, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix);
+ assert CodecUtil.indexHeaderLength(codecNameIdx, segmentSuffix) == input.getFilePointer();
indexReader = new CompressingStoredFieldsIndexReader(input, si);
input.readVLong(); // the end of the data file
} catch (Throwable exception) {
@@ -133,11 +133,11 @@ public final class CompressingTermVector
final String vectorsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION);
vectorsStream = d.openInput(vectorsStreamFN, context);
final String codecNameDat = formatName + CODEC_SFX_DAT;
- int version2 = CodecUtil.checkSegmentHeader(vectorsStream, codecNameDat, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix);
+ int version2 = CodecUtil.checkIndexHeader(vectorsStream, codecNameDat, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix);
if (version != version2) {
throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + version2, vectorsStream);
}
- assert CodecUtil.segmentHeaderLength(codecNameDat, segmentSuffix) == vectorsStream.getFilePointer();
+ assert CodecUtil.indexHeaderLength(codecNameDat, segmentSuffix) == vectorsStream.getFilePointer();
long pos = vectorsStream.getFilePointer();
// NOTE: data file is too costly to verify checksum against all the bytes on open,
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java Tue Oct 28 09:19:21 2014
@@ -32,11 +32,8 @@ import org.apache.lucene.index.FieldInfo
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
@@ -231,10 +228,10 @@ public final class CompressingTermVector
final String codecNameIdx = formatName + CODEC_SFX_IDX;
final String codecNameDat = formatName + CODEC_SFX_DAT;
- CodecUtil.writeSegmentHeader(indexStream, codecNameIdx, VERSION_CURRENT, si.getId(), segmentSuffix);
- CodecUtil.writeSegmentHeader(vectorsStream, codecNameDat, VERSION_CURRENT, si.getId(), segmentSuffix);
- assert CodecUtil.segmentHeaderLength(codecNameDat, segmentSuffix) == vectorsStream.getFilePointer();
- assert CodecUtil.segmentHeaderLength(codecNameIdx, segmentSuffix) == indexStream.getFilePointer();
+ CodecUtil.writeIndexHeader(indexStream, codecNameIdx, VERSION_CURRENT, si.getId(), segmentSuffix);
+ CodecUtil.writeIndexHeader(vectorsStream, codecNameDat, VERSION_CURRENT, si.getId(), segmentSuffix);
+ assert CodecUtil.indexHeaderLength(codecNameDat, segmentSuffix) == vectorsStream.getFilePointer();
+ assert CodecUtil.indexHeaderLength(codecNameIdx, segmentSuffix) == indexStream.getFilePointer();
indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
indexStream = null;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50Codec.java Tue Oct 28 09:19:21 2014
@@ -106,7 +106,7 @@ public class Lucene50Codec extends Codec
/** Returns the postings format that should be used for writing
* new segments of field
.
*
- * The default implementation always returns "Lucene41"
+ * The default implementation always returns "Lucene50"
*/
public PostingsFormat getPostingsFormatForField(String field) {
return defaultFormat;
@@ -115,7 +115,7 @@ public class Lucene50Codec extends Codec
/** Returns the docvalues format that should be used for writing
* new segments of field
.
*
- * The default implementation always returns "Lucene410"
+ * The default implementation always returns "Lucene50"
*/
public DocValuesFormat getDocValuesFormatForField(String field) {
return defaultDVFormat;
@@ -126,8 +126,8 @@ public class Lucene50Codec extends Codec
return docValuesFormat;
}
- private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
- private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene410");
+ private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene50");
+ private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene50");
private final NormsFormat normsFormat = new Lucene50NormsFormat();
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java Tue Oct 28 09:19:21 2014
@@ -46,7 +46,7 @@ import org.apache.lucene.store.IndexOutp
*
Compound (.cfs) --> Header, FileData FileCount, Footer
* Compound Entry Table (.cfe) --> Header, FileCount, <FileName,
* DataOffset, DataLength> FileCount
- * Header --> {@link CodecUtil#writeSegmentHeader SegmentHeader}
+ * Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
* FileCount --> {@link DataOutput#writeVInt VInt}
* DataOffset,DataLength,Checksum --> {@link DataOutput#writeLong UInt64}
* FileName --> {@link DataOutput#writeString String}
@@ -79,8 +79,8 @@ public final class Lucene50CompoundForma
try (IndexOutput data = dir.createOutput(dataFile, context);
IndexOutput entries = dir.createOutput(entriesFile, context)) {
- CodecUtil.writeSegmentHeader(data, DATA_CODEC, VERSION_CURRENT, si.getId(), "");
- CodecUtil.writeSegmentHeader(entries, ENTRY_CODEC, VERSION_CURRENT, si.getId(), "");
+ CodecUtil.writeIndexHeader(data, DATA_CODEC, VERSION_CURRENT, si.getId(), "");
+ CodecUtil.writeIndexHeader(entries, ENTRY_CODEC, VERSION_CURRENT, si.getId(), "");
// write number of files
entries.writeVInt(files.size());
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java Tue Oct 28 09:19:21 2014
@@ -71,7 +71,7 @@ final class Lucene50CompoundReader exten
boolean success = false;
handle = directory.openInput(dataFileName, context);
try {
- CodecUtil.checkSegmentHeader(handle, Lucene50CompoundFormat.DATA_CODEC, version, version, si.getId(), "");
+ CodecUtil.checkIndexHeader(handle, Lucene50CompoundFormat.DATA_CODEC, version, version, si.getId(), "");
// NOTE: data file is too costly to verify checksum against all the bytes on open,
// but for now we at least verify proper structure of the checksum footer: which looks
@@ -93,7 +93,7 @@ final class Lucene50CompoundReader exten
try (ChecksumIndexInput entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE)) {
Throwable priorE = null;
try {
- version = CodecUtil.checkSegmentHeader(entriesStream, Lucene50CompoundFormat.ENTRY_CODEC,
+ version = CodecUtil.checkIndexHeader(entriesStream, Lucene50CompoundFormat.ENTRY_CODEC,
Lucene50CompoundFormat.VERSION_START,
Lucene50CompoundFormat.VERSION_CURRENT, segmentID, "");
final int numEntries = entriesStream.readVInt();
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50FieldInfosFormat.java Tue Oct 28 09:19:21 2014
@@ -46,7 +46,7 @@ import org.apache.lucene.store.IndexOutp
* FieldBits,DocValuesBits,DocValuesGen,Attributes> FieldsCount,Footer
* Data types:
*
- * - Header --> {@link CodecUtil#checkSegmentHeader SegmentHeader}
+ * - Header --> {@link CodecUtil#checkIndexHeader IndexHeader}
* - FieldsCount --> {@link DataOutput#writeVInt VInt}
* - FieldName --> {@link DataOutput#writeString String}
* - FieldBits, IndexOptions, DocValuesBits --> {@link DataOutput#writeByte Byte}
@@ -114,9 +114,9 @@ public final class Lucene50FieldInfosFor
Throwable priorE = null;
FieldInfo infos[] = null;
try {
- CodecUtil.checkSegmentHeader(input, CODEC_NAME,
- FORMAT_START,
- FORMAT_CURRENT,
+ CodecUtil.checkIndexHeader(input, Lucene50FieldInfosFormat.CODEC_NAME,
+ Lucene50FieldInfosFormat.FORMAT_START,
+ Lucene50FieldInfosFormat.FORMAT_CURRENT,
segmentInfo.getId(), segmentSuffix);
final int size = input.readVInt(); //read in the size
@@ -251,7 +251,7 @@ public final class Lucene50FieldInfosFor
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION);
try (IndexOutput output = directory.createOutput(fileName, context)) {
- CodecUtil.writeSegmentHeader(output, CODEC_NAME, FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix);
+ CodecUtil.writeIndexHeader(output, Lucene50FieldInfosFormat.CODEC_NAME, Lucene50FieldInfosFormat.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix);
output.writeVInt(infos.size());
for (FieldInfo fi : infos) {
fi.checkConsistency();
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java Tue Oct 28 09:19:21 2014
@@ -41,10 +41,9 @@ import org.apache.lucene.util.MutableBit
* deletions.
* Although per-segment, this file is maintained exterior to compound segment
* files.
- * Deletions (.liv) --> SegmentHeader,Generation,Bits
+ * Deletions (.liv) --> IndexHeader,Generation,Bits
*
- * - SegmentHeader --> {@link CodecUtil#writeSegmentHeader SegmentHeader}
- * - Generation --> {@link DataOutput#writeLong Int64}
+ *
- SegmentHeader --> {@link CodecUtil#writeIndexHeader IndexHeader}
* - Bits --> <{@link DataOutput#writeLong Int64}> LongCount
*
*/
@@ -85,11 +84,8 @@ public final class Lucene50LiveDocsForma
try (ChecksumIndexInput input = dir.openChecksumInput(name, context)) {
Throwable priorE = null;
try {
- CodecUtil.checkSegmentHeader(input, CODEC_NAME, VERSION_START, VERSION_CURRENT, info.info.getId(), "");
- long filegen = input.readLong();
- if (gen != filegen) {
- throw new CorruptIndexException("file mismatch, expected generation=" + gen + ", got=" + filegen, input);
- }
+ CodecUtil.checkIndexHeader(input, CODEC_NAME, VERSION_START, VERSION_CURRENT,
+ info.info.getId(), Long.toString(gen, Character.MAX_RADIX));
long data[] = new long[FixedBitSet.bits2words(length)];
for (int i = 0; i < data.length; i++) {
data[i] = input.readLong();
@@ -120,8 +116,7 @@ public final class Lucene50LiveDocsForma
}
long data[] = fbs.getBits();
try (IndexOutput output = dir.createOutput(name, context)) {
- CodecUtil.writeSegmentHeader(output, CODEC_NAME, VERSION_CURRENT, info.info.getId(), "");
- output.writeLong(gen);
+ CodecUtil.writeIndexHeader(output, CODEC_NAME, VERSION_CURRENT, info.info.getId(), Long.toString(gen, Character.MAX_RADIX));
for (int i = 0; i < data.length; i++) {
output.writeLong(data[i]);
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50NormsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50NormsConsumer.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50NormsConsumer.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50NormsConsumer.java Tue Oct 28 09:19:21 2014
@@ -47,6 +47,7 @@ class Lucene50NormsConsumer extends Norm
static final byte CONST_COMPRESSED = 2;
static final byte UNCOMPRESSED = 3;
static final byte INDIRECT = 4;
+ static final byte PATCHED = 5;
static final int BLOCK_SIZE = 1 << 14;
// threshold for indirect encoding, computed as 1 - 1/log2(maxint)
@@ -61,10 +62,10 @@ class Lucene50NormsConsumer extends Norm
try {
String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
data = state.directory.createOutput(dataName, state.context);
- CodecUtil.writeSegmentHeader(data, dataCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+ CodecUtil.writeIndexHeader(data, dataCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
meta = state.directory.createOutput(metaName, state.context);
- CodecUtil.writeSegmentHeader(meta, metaCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+ CodecUtil.writeIndexHeader(meta, metaCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
success = true;
} finally {
if (!success) {
@@ -82,6 +83,11 @@ class Lucene50NormsConsumer extends Norm
@Override
public void addNormsField(FieldInfo field, Iterable values) throws IOException {
+ writeNormsField(field, values, 0);
+ }
+
+ private void writeNormsField(FieldInfo field, Iterable values, int level) throws IOException {
+ assert level <= 1; // we only "recurse" once in the indirect case
meta.writeVInt(field.number);
long minValue = Long.MAX_VALUE;
long maxValue = Long.MIN_VALUE;
@@ -89,16 +95,12 @@ class Lucene50NormsConsumer extends Norm
NormMap uniqueValues = new NormMap();
int count = 0;
- int missingCount = 0;
for (Number nv : values) {
if (nv == null) {
throw new IllegalStateException("illegal norms data for field " + field.name + ", got null for value: " + count);
}
final long v = nv.longValue();
- if (v == 0) {
- missingCount++;
- }
minValue = Math.min(minValue, v);
maxValue = Math.max(maxValue, v);
@@ -115,9 +117,15 @@ class Lucene50NormsConsumer extends Norm
if (uniqueValues != null && uniqueValues.size == 1) {
// 0 bpv
addConstant(minValue);
- } else if (count > 256 && missingCount > count * INDIRECT_THRESHOLD) {
- // sparse encoding
- addIndirect(field, values, count, missingCount);
+ } else if (level == 0 && count > 256 && uniqueValues != null && uniqueValues.maxFreq() > count * INDIRECT_THRESHOLD) {
+ long commonValue = uniqueValues.getDecodeTable()[uniqueValues.maxOrd()];
+ if (commonValue == 0) {
+ // if the common value is missing, don't waste RAM on a bitset, since we won't be searching those docs
+ addIndirect(field, values, count, uniqueValues);
+ } else {
+ // otherwise, write a sparse bitset, where 1 indicates 'uncommon value'.
+ addPatched(field, values, count, uniqueValues);
+ }
} else if (uniqueValues != null) {
// small number of unique values: this is the typical case:
FormatAndBits compression = fastestFormatAndBits(uniqueValues.size-1);
@@ -200,10 +208,65 @@ class Lucene50NormsConsumer extends Norm
writer.finish();
}
- private void addIndirect(FieldInfo field, final Iterable values, int count, int missingCount) throws IOException {
- meta.writeVInt(count - missingCount);
+ // encodes only uncommon values in a sparse bitset
+ // access is constant time, and the common case is predictable
+ // exceptions nest either to CONST (if there are only 2 values), or INDIRECT (if there are > 2 values)
+ private void addPatched(FieldInfo field, final Iterable values, int count, NormMap uniqueValues) throws IOException {
+ final long decodeTable[] = uniqueValues.getDecodeTable();
+ int commonCount = uniqueValues.maxFreq();
+ final long commonValue = decodeTable[uniqueValues.maxOrd()];
+
+ meta.writeVInt(count - commonCount);
+ meta.writeByte(PATCHED);
+ meta.writeLong(data.getFilePointer());
+
+ // write docs with value
+ writeDocsWithValue(values, commonValue);
+
+ // write exceptions: only two cases make sense
+ // bpv = 1 (folded into sparse bitset already)
+ // bpv > 1 (add indirect exception table)
+ meta.writeVInt(field.number);
+ if (uniqueValues.size == 2) {
+ // special case: implicit in bitset
+ int otherOrd = uniqueValues.maxOrd() == 0 ? 1 : 0;
+ addConstant(decodeTable[otherOrd]);
+ } else {
+ // exception table
+ addIndirect(field, values, count, uniqueValues);
+ }
+ }
+
+ // encodes values as sparse array: keys[] and values[]
+ // access is log(N) where N = keys.length (slow!)
+ // so this is only appropriate as an exception table for patched, or when common value is 0 (wont be accessed by searching)
+ private void addIndirect(FieldInfo field, final Iterable values, int count, NormMap uniqueValues) throws IOException {
+ int commonCount = uniqueValues.maxFreq();
+ final long commonValue = uniqueValues.getDecodeTable()[uniqueValues.maxOrd()];
+
+ meta.writeVInt(count - commonCount);
meta.writeByte(INDIRECT);
meta.writeLong(data.getFilePointer());
+
+ // write docs with value
+ writeDocsWithValue(values, commonValue);
+
+ // write actual values
+ writeNormsField(field, new Iterable() {
+ @Override
+ public Iterator iterator() {
+ return new FilterIterator(values.iterator()) {
+ @Override
+ protected boolean predicateFunction(Number value) {
+ return value.longValue() != commonValue;
+ }
+ };
+ }
+ }, 1);
+ }
+
+ private void writeDocsWithValue(final Iterable values, long commonValue) throws IOException {
+ data.writeLong(commonValue);
data.writeVInt(PackedInts.VERSION_CURRENT);
data.writeVInt(BLOCK_SIZE);
@@ -212,25 +275,12 @@ class Lucene50NormsConsumer extends Norm
int doc = 0;
for (Number n : values) {
long v = n.longValue();
- if (v != 0) {
+ if (v != commonValue) {
writer.add(doc);
}
doc++;
}
writer.finish();
-
- // write actual values
- addNormsField(field, new Iterable() {
- @Override
- public Iterator iterator() {
- return new FilterIterator(values.iterator()) {
- @Override
- protected boolean predicateFunction(Number value) {
- return value.longValue() != 0;
- }
- };
- }
- });
}
@Override
@@ -259,6 +309,7 @@ class Lucene50NormsConsumer extends Norm
static class NormMap {
// we use short: at most we will add 257 values to this map before its rejected as too big above.
final short[] singleByteRange = new short[256];
+ final int[] freqs = new int[257];
final Map other = new HashMap();
int size;
@@ -273,18 +324,24 @@ class Lucene50NormsConsumer extends Norm
int index = (int) (l + 128);
short previous = singleByteRange[index];
if (previous < 0) {
- singleByteRange[index] = (short) size;
+ short slot = (short) size;
+ singleByteRange[index] = slot;
+ freqs[slot]++;
size++;
return true;
} else {
+ freqs[previous]++;
return false;
}
} else {
- if (!other.containsKey(l)) {
+ Short previous = other.get(l);
+ if (previous == null) {
+ freqs[size]++;
other.put(l, (short)size);
size++;
return true;
} else {
+ freqs[previous]++;
return false;
}
}
@@ -315,5 +372,35 @@ class Lucene50NormsConsumer extends Norm
}
return decode;
}
+
+ // TODO: if we need more complicated frequency-driven optos, maybe add 'finish' to this api
+ // and sort all ords by frequency. we could then lower BPV and waste a value to represent 'patched',
+
+ /** retrieves frequency table for items (indexed by ordinal) */
+ public int[] getFreqs() {
+ return freqs;
+ }
+
+ /** sugar: returns max value over getFreqs() */
+ public int maxFreq() {
+ int max = 0;
+ for (int i = 0; i < size; i++) {
+ max = Math.max(max, freqs[i]);
+ }
+ return max;
+ }
+
+ /** sugar: returns ordinal with maxFreq() */
+ public int maxOrd() {
+ long max = 0;
+ int maxOrd = 0;
+ for (int i = 0; i < size; i++) {
+ if (freqs[i] > max) {
+ max = freqs[i];
+ maxOrd = i;
+ }
+ }
+ return maxOrd;
+ }
}
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50NormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50NormsFormat.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50NormsFormat.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50NormsFormat.java Tue Oct 28 09:19:21 2014
@@ -28,6 +28,7 @@ import org.apache.lucene.index.SegmentWr
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.SmallFloat;
import org.apache.lucene.util.packed.BlockPackedWriter;
+import org.apache.lucene.util.packed.MonotonicBlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts;
/**
@@ -50,6 +51,9 @@ import org.apache.lucene.util.packed.Pac
* - Indirect: when norms are extremely sparse, missing values are omitted.
* Access to an individual value is slower, but missing norm values are never accessed
* by search code.
+ *
- Patched: when a single norm value dominates, a sparse bitset encodes docs with exceptions,
+ * so that access to the common value is still very fast. outliers fall thru to an exception
+ * handling mechanism (Indirect or Constant).
*
*
* Files:
@@ -64,7 +68,7 @@ import org.apache.lucene.util.packed.Pac
* Norms data (.nvd)
* Norms metadata (.dvm) --> Header,<Entry>NumFields,Footer
*
- * - Header --> {@link CodecUtil#writeHeader CodecHeader}
+ * - Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
* - Entry --> FieldNumber,Type,Offset
* - FieldNumber --> {@link DataOutput#writeVInt vInt}
* - Type --> {@link DataOutput#writeByte Byte}
@@ -81,20 +85,24 @@ import org.apache.lucene.util.packed.Pac
* a lookup table of unique values is written, followed by the ordinal for each document.
* - 2 --> constant. When there is a single value for the entire field.
*
- 3 --> uncompressed: Values written as a simple byte[].
- *
- 4 --> indirect. Only documents with a value are written with a sparse encoding.
+ *
- 4 --> indirect. Only documents with a value are written with monotonic compression. a nested
+ * entry for the same field will follow for the exception handler.
+ *
- 5 --> patched. Encoded the same as indirect.
*
*
* The Norms data or .nvd file.
* For each Norms field, this stores the actual per-document data (the heavy-lifting)
- * Norms data (.nvd) --> Header,<Uncompressed | TableCompressed | DeltaCompressed>NumFields,Footer
+ * Norms data (.nvd) --> Header,<Uncompressed | TableCompressed | DeltaCompressed | MonotonicCompressed >NumFields,Footer
*
- * - Header --> {@link CodecUtil#writeHeader CodecHeader}
+ * - Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
* - Uncompressed --> {@link DataOutput#writeByte Byte}maxDoc
* - TableCompressed --> PackedIntsVersion,Table,BitPackedData
* - Table --> TableSize, {@link DataOutput#writeLong int64}TableSize
* - BitpackedData --> {@link PackedInts}
* - DeltaCompressed --> PackedIntsVersion,BlockSize,DeltaCompressedData
* - DeltaCompressedData --> {@link BlockPackedWriter BlockPackedWriter(blockSize=16k)}
+ * - MonotonicCompressed --> PackedIntsVersion,BlockSize,MonotonicCompressedData
+ * - MonotonicCompressedData --> {@link MonotonicBlockPackedWriter MonotonicBlockPackedWriter(blockSize=16k)}
* - PackedIntsVersion,BlockSize,TableSize --> {@link DataOutput#writeVInt vInt}
* - Footer --> {@link CodecUtil#writeFooter CodecFooter}
*
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50NormsProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50NormsProducer.java?rev=1634823&r1=1634822&r2=1634823&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50NormsProducer.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50NormsProducer.java Tue Oct 28 09:19:21 2014
@@ -37,6 +37,7 @@ import org.apache.lucene.util.Accountabl
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.SparseFixedBitSet;
import org.apache.lucene.util.packed.BlockPackedReader;
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
import org.apache.lucene.util.packed.PackedInts;
@@ -48,6 +49,7 @@ import static org.apache.lucene.codecs.l
import static org.apache.lucene.codecs.lucene50.Lucene50NormsConsumer.TABLE_COMPRESSED;
import static org.apache.lucene.codecs.lucene50.Lucene50NormsConsumer.UNCOMPRESSED;
import static org.apache.lucene.codecs.lucene50.Lucene50NormsConsumer.INDIRECT;
+import static org.apache.lucene.codecs.lucene50.Lucene50NormsConsumer.PATCHED;
/**
* Reader for {@link Lucene50NormsFormat}
@@ -63,6 +65,7 @@ class Lucene50NormsProducer extends Norm
private final AtomicLong ramBytesUsed;
private final AtomicInteger activeCount = new AtomicInteger();
+ private final int maxDoc;
private final boolean merging;
@@ -75,11 +78,13 @@ class Lucene50NormsProducer extends Norm
instancesInfo.putAll(original.instancesInfo);
ramBytesUsed = new AtomicLong(original.ramBytesUsed.get());
activeCount.set(original.activeCount.get());
+ maxDoc = original.maxDoc;
merging = true;
}
Lucene50NormsProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
merging = false;
+ maxDoc = state.segmentInfo.getDocCount();
String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
int version = -1;
@@ -88,7 +93,7 @@ class Lucene50NormsProducer extends Norm
try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) {
Throwable priorE = null;
try {
- version = CodecUtil.checkSegmentHeader(in, metaCodec, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+ version = CodecUtil.checkIndexHeader(in, metaCodec, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
readFields(in, state.fieldInfos);
} catch (Throwable exception) {
priorE = exception;
@@ -101,7 +106,7 @@ class Lucene50NormsProducer extends Norm
this.data = state.directory.openInput(dataName, state.context);
boolean success = false;
try {
- final int version2 = CodecUtil.checkSegmentHeader(data, dataCodec, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
+ final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
if (version != version2) {
throw new CorruptIndexException("Format versions mismatch: meta=" + version + ",data=" + version2, data);
}
@@ -146,6 +151,7 @@ class Lucene50NormsProducer extends Norm
case TABLE_COMPRESSED:
case DELTA_COMPRESSED:
break;
+ case PATCHED:
case INDIRECT:
if (meta.readVInt() != info.number) {
throw new CorruptIndexException("indirect norms entry for field: " + info.name + " is corrupt", meta);
@@ -254,6 +260,7 @@ class Lucene50NormsProducer extends Norm
}
case INDIRECT: {
data.seek(entry.offset);
+ final long common = data.readLong();
int packedIntsVersion = data.readVInt();
int blockSize = data.readVInt();
final MonotonicBlockPackedReader live = MonotonicBlockPackedReader.of(data, packedIntsVersion, blockSize, entry.count, false);
@@ -279,7 +286,34 @@ class Lucene50NormsProducer extends Norm
return values.get(mid);
}
}
- return 0;
+ return common;
+ }
+ };
+ break;
+ }
+ case PATCHED: {
+ data.seek(entry.offset);
+ final long common = data.readLong();
+ int packedIntsVersion = data.readVInt();
+ int blockSize = data.readVInt();
+ MonotonicBlockPackedReader live = MonotonicBlockPackedReader.of(data, packedIntsVersion, blockSize, entry.count, true);
+ final SparseFixedBitSet set = new SparseFixedBitSet(maxDoc);
+ for (int i = 0; i < live.size(); i++) {
+ int doc = (int) live.get(i);
+ set.set(doc);
+ }
+ LoadedNorms nestedInstance = loadNorms(entry.nested);
+ instance.ramBytesUsed = set.ramBytesUsed() + nestedInstance.ramBytesUsed;
+ instance.info = Accountables.namedAccountable("patched -> " + nestedInstance.info, instance.ramBytesUsed);
+ final NumericDocValues values = nestedInstance.norms;
+ instance.norms = new NumericDocValues() {
+ @Override
+ public long get(int docID) {
+ if (set.get(docID)) {
+ return values.get(docID);
+ } else {
+ return common;
+ }
}
};
break;