Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 87675 invoked from network); 8 Mar 2010 16:11:27 -0000 Received: from unknown (HELO mail.apache.org) (140.211.11.3) by 140.211.11.9 with SMTP; 8 Mar 2010 16:11:27 -0000 Received: (qmail 75206 invoked by uid 500); 8 Mar 2010 16:11:03 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 75165 invoked by uid 500); 8 Mar 2010 16:11:03 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 75157 invoked by uid 99); 8 Mar 2010 16:11:03 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 08 Mar 2010 16:11:03 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 08 Mar 2010 16:10:55 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 1F85A238888A; Mon, 8 Mar 2010 16:10:35 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r920378 [1/3] - in /lucene/java/branches/flex_1458: contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/ contrib/misc/src/java/org/apache/lucene/index/ contrib/misc/src/test/org/apache/lucene/index/ contrib/queries/src/java/org/ap... Date: Mon, 08 Mar 2010 16:10:33 -0000 To: java-commits@lucene.apache.org From: mikemccand@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100308161035.1F85A238888A@eris.apache.org> Author: mikemccand Date: Mon Mar 8 16:10:31 2010 New Revision: 920378 URL: http://svn.apache.org/viewvc?rev=920378&view=rev Log: LUCENE-2111 (on flex branch): clean up many nocommits; some renaming/refactoring; clarifying null/EMPTY semantics; move delDocs out of Multi/DirReader Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java lucene/java/branches/flex_1458/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DirectoryReader.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocsEnum.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Fields.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FieldsEnum.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FreqProxTermsWriter.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexWriter.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyFieldsEnum.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyTerms.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiFields.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiFieldsEnum.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiReader.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTerms.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTermsEnum.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/ParallelReader.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentInfo.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentInfos.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentReader.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentWriteState.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Terms.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsHash.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsHashPerField.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsHashPerThread.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/Codec.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/Codecs.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/intblock/IntBlockCodec.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepCodec.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/DefaultSkipListWriter.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriterImpl.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/AutomatonQuery.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FieldCache.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FieldCacheImpl.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/FilteredTermsEnum.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/MatchAllDocsQuery.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/MultiPhraseQuery.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/MultiTermQuery.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/NumericRangeQuery.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/PhraseQuery.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/PrefixQuery.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/Similarity.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TermQuery.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TermRangeQuery.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/TermRangeTermsEnum.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/function/ValueSourceQuery.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/search/spans/SpanTermQuery.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/Bits.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/BytesRef.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/MultiBits.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/ReaderUtil.java lucene/java/branches/flex_1458/src/test/org/apache/lucene/TestExternalCodecs.java lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/FlexTestUtil.java lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestCodecs.java lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestIndexWriter.java lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestLazyProxSkipping.java lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestOmitTf.java lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestSegmentTermEnum.java lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestBoolean2.java Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original) +++ lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Mon Mar 8 16:10:31 2010 @@ -478,7 +478,7 @@ TermsEnum terms = fields.terms(); DocsEnum docs = null; while(terms.next() != null) { - docs = terms.docs(reader.getDeletedDocs(), docs); + docs = terms.docs(MultiFields.getDeletedDocs(reader), docs); while(docs.nextDoc() != docs.NO_MORE_DOCS) { totalTokenCount2 += docs.freq(); } Modified: lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java (original) +++ lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java Mon Mar 8 16:10:31 2010 @@ -202,6 +202,8 @@ if (oldDels != null) { dels.or(oldDels); } + // nocommit -- not good that this class has to do this... + storeDelDocs(null); } @Override Modified: lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java (original) +++ lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/TermVectorAccessor.java Mon Mar 8 16:10:31 2010 @@ -100,7 +100,7 @@ positions.clear(); } - final Bits delDocs = indexReader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(indexReader); Terms terms = MultiFields.getTerms(indexReader, field); boolean anyTerms = false; Modified: lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java (original) +++ lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java Mon Mar 8 16:10:31 2010 @@ -73,8 +73,6 @@ for (int i = 0; i < ir.maxDoc(); i++) { - // nocommit - /* mapper = new ParallelArrayTermVectorMapper(); accessor.accept(ir, i, "a", mapper); tfv = mapper.materializeVector(); @@ -94,7 +92,6 @@ assertEquals("doc " + i, 8, tfv.getTermFrequencies().length); assertEquals("doc " + i, "c", tfv.getTerms()[2]); assertEquals("doc " + i, 7, tfv.getTermFrequencies()[2]); - */ mapper = new ParallelArrayTermVectorMapper(); accessor.accept(ir, i, "q", mapper); Modified: lucene/java/branches/flex_1458/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java (original) +++ lucene/java/branches/flex_1458/contrib/queries/src/java/org/apache/lucene/search/DuplicateFilter.java Mon Mar 8 16:10:31 2010 @@ -22,6 +22,7 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.Bits; @@ -83,7 +84,7 @@ private OpenBitSet correctBits(IndexReader reader) throws IOException { OpenBitSet bits = new OpenBitSet(reader.maxDoc()); //assume all are INvalid - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); Terms terms = reader.fields().terms(fieldName); if (terms != null) { TermsEnum termsEnum = terms.iterator(); @@ -121,7 +122,7 @@ OpenBitSet bits=new OpenBitSet(reader.maxDoc()); bits.set(0,reader.maxDoc()); //assume all are valid - final Bits delDocs = reader.getDeletedDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); Terms terms = reader.fields().terms(fieldName); if (terms != null) { TermsEnum termsEnum = terms.iterator(); Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/CheckIndex.java Mon Mar 8 16:10:31 2010 @@ -492,7 +492,7 @@ segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader); // Test the Term Index - segInfoStat.termIndexStatus = testTermIndex(info, reader); + segInfoStat.termIndexStatus = testTermIndex(reader); // Test Stored Fields segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf); @@ -575,7 +575,7 @@ /** * Test the term index. */ - private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) { + private Status.TermIndexStatus testTermIndex(SegmentReader reader) { final Status.TermIndexStatus status = new Status.TermIndexStatus(); final int maxDoc = reader.maxDoc(); Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DirectoryReader.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DirectoryReader.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DirectoryReader.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DirectoryReader.java Mon Mar 8 16:10:31 2010 @@ -39,7 +39,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.MultiBits; import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close @@ -355,6 +354,7 @@ buffer.append(' '); } buffer.append(subReaders[i]); + buffer.append(' '); } buffer.append(')'); return buffer.toString(); @@ -363,7 +363,6 @@ private void initialize(SegmentReader[] subReaders) throws IOException { this.subReaders = subReaders; starts = new int[subReaders.length + 1]; // build starts array - Bits[] subs = new Bits[subReaders.length]; final List subFields = new ArrayList(); final List fieldSlices = new ArrayList(); @@ -374,7 +373,6 @@ if (subReaders[i].hasDeletions()) { hasDeletions = true; - subs[i] = subReaders[i].getDeletedDocs(); } final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i], subReaders[i].maxDoc(), i); @@ -387,20 +385,11 @@ } } starts[subReaders.length] = maxDoc; - - if (hasDeletions) { - deletedDocs = new MultiBits(subs, starts); - } else { - deletedDocs = null; - } } - private Bits deletedDocs; - @Override public Bits getDeletedDocs() { - // nocommit -- maybe not supported? - return deletedDocs; + throw new UnsupportedOperationException("please use MultiFields.getDeletedDocs if you really need a top level Bits deletedDocs (NOTE that it's usually better to work per segment instead)"); } @Override @@ -725,7 +714,6 @@ @Override public TermEnum terms() throws IOException { ensureOpen(); - //nocommit: investigate this opto if (subReaders.length == 1) { // Optimize single segment case: return subReaders[0].terms(); @@ -788,11 +776,7 @@ @Override public Fields fields() throws IOException { - if (subReaders.length == 0) { - return null; - } else { - throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields for this reader"); - } + throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields (NOTE that it's usually better to work per segment instead)"); } @Override @@ -949,6 +933,11 @@ } @Override + public long getUniqueTermCount() throws IOException { + throw new UnsupportedOperationException(""); + } + + @Override public Map getCommitUserData() { ensureOpen(); return segmentInfos.getUserData(); Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocsEnum.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocsEnum.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocsEnum.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocsEnum.java Mon Mar 8 16:10:31 2010 @@ -30,7 +30,7 @@ private AttributeSource atts = null; - // nocommit + // nocommit -- debugging public String desc; /** Returns term frequency in the current document. Do @@ -44,11 +44,10 @@ return atts; } - // nocommit -- state in API that doc/freq are undefined - // (defined?) after this? // nocommit -- fix this API so that intblock codecs are // able to return their own int arrays, to save a copy... IntsRef? - /** Bulk read: returns number of docs read. + /** Bulk read: returns number of docs read. After this is + * called, {@link #doc} and {@link #freq} are undefined. * *

NOTE: the default impl simply delegates to {@link * #nextDoc}, but subclasses may do this more Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java Mon Mar 8 16:10:31 2010 @@ -1057,8 +1057,7 @@ // by re-using the same TermsEnum and seeking only // forwards if (term.field() != currentField) { - // nocommit -- once we sync up branch again, add - // assert that this field is always > last one + assert currentField == null || currentField.compareTo(term.field()) < 0; currentField = term.field(); Terms terms = fields.terms(currentField); if (terms != null) { @@ -1071,8 +1070,10 @@ if (termsEnum == null) { continue; } + assert checkDeleteTerm(term); termRef.copy(term.text()); + if (termsEnum.seek(termRef) == TermsEnum.SeekStatus.FOUND) { DocsEnum docsEnum = termsEnum.docs(reader.getDeletedDocs(), docs); Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Fields.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Fields.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Fields.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Fields.java Mon Mar 8 16:10:31 2010 @@ -19,18 +19,18 @@ import java.io.IOException; -/** Access to fields and terms +/** Flex API for access to fields and terms * @lucene.experimental */ public abstract class Fields { - public final static Fields[] EMPTY_ARRAY = new Fields[0]; - /** Returns an iterator that will step through all fields - * names */ + * names. This will not return null. */ public abstract FieldsEnum iterator() throws IOException; - /** Get the {@link Terms} for this field */ + /** Get the {@link Terms} for this field. This may return + * null if the field does not exist. */ public abstract Terms terms(String field) throws IOException; -} + public final static Fields[] EMPTY_ARRAY = new Fields[0]; +} Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FieldsEnum.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FieldsEnum.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FieldsEnum.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FieldsEnum.java Mon Mar 8 16:10:31 2010 @@ -28,7 +28,8 @@ public abstract class FieldsEnum { - public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0]; + // TODO: maybe allow retrieving FieldInfo for current + // field, as optional method? private AttributeSource atts = null; @@ -36,25 +37,38 @@ * Returns the related attributes. */ public AttributeSource attributes() { - if (atts == null) atts = new AttributeSource(); + if (atts == null) { + atts = new AttributeSource(); + } return atts; } - // nocommit -- do we need seek? - // nocommit -- should this return FieldInfo? /** Increments the enumeration to the next field. The * returned field is always interned, so simple == * comparison is allowed. Returns null when there are no * more fields.*/ public abstract String next() throws IOException; - // nocommit should we add a field()? fieldInfo()? - // mirrors TermsEnum - /** Get {@link TermsEnum} for the current field. You - * should not call {@link #next()} until you're done - * using this {@link TermsEnum}. After {@link #next} - * returns null, this method should not be called. */ + * should not call {@link #next} until you're done using + * this {@link TermsEnum}. After {@link #next} returns + * null this method should not be called. This method + * will not return null. */ public abstract TermsEnum terms() throws IOException; -} + public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0]; + + /** Provides zero fields */ + public final static FieldsEnum EMPTY = new FieldsEnum() { + + @Override + public String next() { + return null; + } + + @Override + public TermsEnum terms() { + throw new IllegalStateException("this method should never be called"); + } + }; +} Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java Mon Mar 8 16:10:31 2010 @@ -18,6 +18,7 @@ */ import java.io.IOException; +import java.util.Comparator; import org.apache.lucene.util.BytesRef; // TODO FI: some of this is "generic" to TermsHash* so we @@ -44,7 +45,7 @@ int docID; int termFreq; - public FreqProxFieldMergeState(FreqProxTermsWriterPerField field, BytesRef.Comparator termComp) { + public FreqProxFieldMergeState(FreqProxTermsWriterPerField field, Comparator termComp) { this.field = field; this.numPostings = field.termsHashPerField.numPostings; this.postings = field.termsHashPerField.sortPostings(termComp); Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FreqProxTermsWriter.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Mon Mar 8 16:10:31 2010 @@ -24,11 +24,11 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Comparator; import org.apache.lucene.index.codecs.PostingsConsumer; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.TermsConsumer; -import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.BytesRef; final class FreqProxTermsWriter extends TermsHashConsumer { @@ -159,7 +159,7 @@ final FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields]; final TermsConsumer termsConsumer = consumer.addField(fields[0].fieldInfo); - final BytesRef.Comparator termComp = termsConsumer.getComparator(); + final Comparator termComp = termsConsumer.getComparator(); for(int i=0;i getComparator() { // Pre-flex indexes always sorted in UTF16 order return BytesRef.getUTF8SortedAsUTF16Comparator(); } @@ -197,11 +198,11 @@ this.term = term; td.seek(term); - if (skipDocs != r.getDeletedDocs()) { + if (skipDocs != MultiFields.getDeletedDocs(r)) { // An external reader's TermDocs/Positions will // silently skip deleted docs, so, we can't allow // arbitrary skipDocs here: - throw new IllegalStateException("external IndexReader requires skipDocs == IndexReader.getDeletedDocs()"); + throw new IllegalStateException("external IndexReader requires skipDocs == MultiFields.getDeletedDocs()"); } return this; @@ -256,11 +257,11 @@ this.term = term; tp.seek(term); - if (skipDocs != r.getDeletedDocs()) { + if (skipDocs != MultiFields.getDeletedDocs(r)) { // An external reader's TermDocs/Positions will // silently skip deleted docs, so, we can't allow // arbitrary skipDocs here: - throw new IllegalStateException("external IndexReader requires skipDocs == IndexReader.getDeletedDocs()"); + throw new IllegalStateException("external IndexReader requires skipDocs == MultiFields.getDeletedDocs() skipDocs=" + skipDocs + " MultiFields.getDeletedDocs=" + MultiFields.getDeletedDocs(r) + " r=" + r); } return this; Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyTerms.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyTerms.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyTerms.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/LegacyTerms.java Mon Mar 8 16:10:31 2010 @@ -18,6 +18,7 @@ */ import java.io.IOException; +import java.util.Comparator; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.BytesRef; @@ -41,7 +42,7 @@ } @Override - public BytesRef.Comparator getComparator() { + public Comparator getComparator() { // Pre-flex indexes always sorted in UTF16 order return BytesRef.getUTF8SortedAsUTF16Comparator(); } Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiFields.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiFields.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiFields.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiFields.java Mon Mar 8 16:10:31 2010 @@ -25,7 +25,7 @@ import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.util.MultiBits; /** * Exposes flex API, merged from flex API of sub-segments. @@ -45,13 +45,24 @@ public final class MultiFields extends Fields { private final Fields[] subs; private final ReaderUtil.Slice[] subSlices; - private final Map terms = new HashMap(); + private final Map terms = new HashMap(); + /** Returns a single {@link Fields} instance for this + * reader, merging fields/terms/docs/positions on the + * fly. This method will not return null. + * + *

: this is a slow way to access postings. + * It's better to get the sub-readers (using {@link + * ReaderUtil#Gather}) and iterate through them + * yourself. */ public static Fields getFields(IndexReader r) throws IOException { final IndexReader[] subs = r.getSequentialSubReaders(); if (subs == null) { // already an atomic reader return r.fields(); + } else if (subs.length == 0) { + // no fields + return null; } else if (subs.length == 1) { return getFields(subs[0]); } else { @@ -61,7 +72,14 @@ final List fields = new ArrayList(); final List slices = new ArrayList(); - ReaderUtil.gatherSubFields(null, fields, slices, r, 0); + + new ReaderUtil.Gather(r) { + @Override + protected void add(int base, IndexReader r) throws IOException { + fields.add(r.fields()); + slices.add(new ReaderUtil.Slice(base, r.maxDoc(), fields.size()-1)); + } + }.run(); if (fields.size() == 0) { return null; @@ -77,66 +95,80 @@ } } + public static Bits getDeletedDocs(IndexReader r) throws IOException { + Bits result; + if (r.hasDeletions()) { + + result = r.retrieveDelDocs(); + if (result == null) { + + final List bits = new ArrayList(); + final List starts = new ArrayList(); + + final int maxDoc = new ReaderUtil.Gather(r) { + @Override + protected void add(int base, IndexReader r) throws IOException { + // record all delDocs, even if they are null + bits.add(r.getDeletedDocs()); + starts.add(base); + } + }.run(); + starts.add(maxDoc); + + assert bits.size() > 0; + if (bits.size() == 1) { + // Only one actual sub reader -- optimize this case + result = bits.get(0); + } else { + result = new MultiBits(bits, starts); + } + r.storeDelDocs(result); + } + } else { + result = null; + } + + return result; + } + + /** This method may return null if the field does not exist.*/ public static Terms getTerms(IndexReader r, String field) throws IOException { final Fields fields = getFields(r); - if (fields != null) { - return fields.terms(field); - } else { + if (fields == null) { return null; + } else { + return fields.terms(field); } } /** Returns {@link DocsEnum} for the specified field & - * term. This may return null, for example if either the - * field or term does not exist. */ + * term. This may return null if the term does not + * exist. */ public static DocsEnum getTermDocsEnum(IndexReader r, Bits skipDocs, String field, BytesRef term) throws IOException { - assert field != null; assert term != null; - final Fields fields = getFields(r); - if (fields != null) { - final Terms terms = fields.terms(field); - if (terms != null) { - if (Codec.DEBUG) { - System.out.println("mf.termDocsEnum field=" + field + " term=" + term + " terms=" + terms); - } - final DocsEnum docs = terms.docs(skipDocs, term, null); - if (Codec.DEBUG) { - System.out.println("mf.termDocsEnum field=" + field + " docs=" +docs); - } - return docs; - } + final Terms terms = getTerms(r, field); + if (terms != null) { + return terms.docs(skipDocs, term, null); + } else { + return null; } - - return null; } /** Returns {@link DocsAndPositionsEnum} for the specified - * field & term. This may return null, for example if - * either the field or term does not exist. */ + * field & term. This may return null if the term does + * not exist or positions were not indexed. */ public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits skipDocs, String field, BytesRef term) throws IOException { assert field != null; assert term != null; - - final Fields fields = getFields(r); - if (fields != null) { - final Terms terms = fields.terms(field); - if (terms != null) { - if (Codec.DEBUG) { - System.out.println("mf.termPositionsEnum field=" + field + " term=" + term + " terms=" + terms); - } - final DocsAndPositionsEnum postings = terms.docsAndPositions(skipDocs, term, null); - if (Codec.DEBUG) { - System.out.println("mf.termPositionsEnum field=" + field + " postings=" +postings); - } - return postings; - } + final Terms terms = getTerms(r, field); + if (terms != null) { + return terms.docsAndPositions(skipDocs, term, null); + } else { + return null; } - - return null; } - public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) { this.subs = subs; this.subSlices = subSlices; @@ -148,14 +180,11 @@ final List fieldsEnums = new ArrayList(); final List fieldsSlices = new ArrayList(); for(int i=0;iConstruct a MultiReader aggregating the named set of (sub)readers. @@ -76,7 +73,6 @@ this.subReaders = subReaders.clone(); starts = new int[subReaders.length + 1]; // build starts array decrefOnClose = new boolean[subReaders.length]; - Bits[] subs = new Bits[subReaders.length]; for (int i = 0; i < subReaders.length; i++) { starts[i] = maxDoc; @@ -92,7 +88,6 @@ if (subReaders[i].hasDeletions()) { hasDeletions = true; } - subs[i] = subReaders[i].getDeletedDocs(); final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i], subReaders[i].maxDoc(), @@ -101,11 +96,11 @@ } starts[subReaders.length] = maxDoc; - if (hasDeletions) { - deletedDocs = new MultiBits(subs, starts); - } else { - deletedDocs = null; - } + } + + @Override + public long getUniqueTermCount() throws IOException { + throw new UnsupportedOperationException(""); } @Override @@ -115,7 +110,7 @@ @Override public Fields fields() throws IOException { - throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields for this reader"); + throw new UnsupportedOperationException("please use MultiFields.getFields if you really need a top level Fields (NOTE that it's usually better to work per segment instead)"); } /** @@ -162,11 +157,7 @@ @Override public Bits getDeletedDocs() throws IOException { - if (subReaders.length == 1) { - return subReaders[0].getDeletedDocs(); - } else { - return deletedDocs; - } + throw new UnsupportedOperationException("please use MultiFields.getDeletedDocs if you really need a top level Bits deletedDocs (NOTE that it's usually better to work per segment instead)"); } /** Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTerms.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTerms.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTerms.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTerms.java Mon Mar 8 16:10:31 2010 @@ -1,6 +1,5 @@ package org.apache.lucene.index; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -24,6 +23,7 @@ import java.io.IOException; import java.util.List; import java.util.ArrayList; +import java.util.Comparator; /** * Exposes flex API, merged from flex API of @@ -35,20 +35,20 @@ public final class MultiTerms extends Terms { private final Terms[] subs; private final ReaderUtil.Slice[] subSlices; - private final BytesRef.Comparator termComp; + private final Comparator termComp; public MultiTerms(Terms[] subs, ReaderUtil.Slice[] subSlices) throws IOException { this.subs = subs; this.subSlices = subSlices; - BytesRef.Comparator _termComp = null; + Comparator _termComp = null; for(int i=0;i subTermComp = subs[i].getComparator(); if (subTermComp != null && !subTermComp.equals(_termComp)) { throw new IllegalStateException("sub-readers have different BytesRef.Comparators; cannot merge"); } @@ -72,12 +72,12 @@ if (termsEnums.size() > 0) { return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY)); } else { - return null; + return TermsEnum.EMPTY; } } @Override - public BytesRef.Comparator getComparator() { + public Comparator getComparator() { return termComp; } } Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTermsEnum.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTermsEnum.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/MultiTermsEnum.java Mon Mar 8 16:10:31 2010 @@ -25,6 +25,7 @@ import org.apache.lucene.util.ReaderUtil; import java.io.IOException; +import java.util.Comparator; /** * Exposes flex API, merged from flex API of sub-segments. @@ -35,8 +36,8 @@ public final class MultiTermsEnum extends TermsEnum { private final TermMergeQueue queue; - private final TermsEnumWithSlice[] subs; - private final TermsEnumWithSlice[] currentSubs; + private final TermsEnumWithSlice[] subs; // all of our subs (one per sub-reader) + private final TermsEnumWithSlice[] currentSubs; // current subs that have at least one term for this field private final TermsEnumWithSlice[] top; private final MultiDocsEnum.EnumWithSlice[] subDocs; private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions; @@ -44,7 +45,7 @@ private int numTop; private int numSubs; private BytesRef current; - private BytesRef.Comparator termComp; + private Comparator termComp; public static class TermsEnumIndex { public final static TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0]; @@ -87,13 +88,13 @@ } @Override - public BytesRef.Comparator getComparator() { + public Comparator getComparator() { return termComp; } /** The terms array must be newly created TermsEnum, ie * {@link TermsEnum#next} has not yet been called. */ - public MultiTermsEnum reset(TermsEnumIndex[] termsEnumsIndex) throws IOException { + public TermsEnum reset(TermsEnumIndex[] termsEnumsIndex) throws IOException { assert termsEnumsIndex.length <= top.length; numSubs = 0; numTop = 0; @@ -110,7 +111,7 @@ } else { // We cannot merge sub-readers that have // different TermComps - final BytesRef.Comparator subTermComp = termsEnumIndex.termsEnum.getComparator(); + final Comparator subTermComp = termsEnumIndex.termsEnum.getComparator(); if (subTermComp != null && !subTermComp.equals(termComp)) { throw new IllegalStateException("sub-readers have different BytesRef.Comparators; cannot merge"); } @@ -128,7 +129,7 @@ } if (queue.size() == 0) { - return null; + return TermsEnum.EMPTY; } else { return this; } @@ -141,12 +142,12 @@ for(int i=0;i { - BytesRef.Comparator termComp; + Comparator termComp; TermMergeQueue(int size) { initialize(size); } Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/ParallelReader.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/ParallelReader.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/ParallelReader.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/ParallelReader.java Mon Mar 8 16:10:31 2010 @@ -171,7 +171,7 @@ if (terms != null) { return terms.iterator(); } else { - return null; + return TermsEnum.EMPTY; } } } @@ -196,7 +196,7 @@ @Override public Bits getDeletedDocs() throws IOException { - return ((IndexReader) readers.get(0)).getDeletedDocs(); + return MultiFields.getDeletedDocs(readers.get(0)); } @Override Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentInfo.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentInfo.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentInfo.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentInfo.java Mon Mar 8 16:10:31 2010 @@ -26,6 +26,8 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.HashSet; import java.util.HashMap; import java.util.ArrayList; import java.util.Collections; @@ -90,8 +92,6 @@ private boolean hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false - // nocommit: unread field - private boolean flexPostings; // True if postings were written with new flex format private Codec codec; @@ -110,17 +110,9 @@ docStoreIsCompoundFile = false; delCount = 0; hasProx = true; - flexPostings = true; this.codec = codec; } - // nocommit -- this ctor is only used by back-compat tests - public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) { - this(name, docCount, dir, isCompoundFile, hasSingleNormFile, -1, null, false, true, null); - SegmentWriteState state = new SegmentWriteState(null, dir, name, null, null, docCount, docCount, -1, Codecs.getDefault()); - codec = state.codec = Codecs.getDefault().getWriter(state); - } - public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile, int docStoreOffset, String docStoreSegment, boolean docStoreIsCompoundFile, boolean hasProx, Codec codec) { @@ -602,7 +594,7 @@ return codec; } - private void addIfExists(List files, String fileName) throws IOException { + private void addIfExists(Set files, String fileName) throws IOException { if (dir.fileExists(fileName)) files.add(fileName); } @@ -620,19 +612,17 @@ return files; } - files = new ArrayList(); + Set fileSet = new HashSet(); boolean useCompoundFile = getUseCompoundFile(); if (useCompoundFile) { - files.add(IndexFileNames.segmentFileName(name, IndexFileNames.COMPOUND_FILE_EXTENSION)); + fileSet.add(IndexFileNames.segmentFileName(name, IndexFileNames.COMPOUND_FILE_EXTENSION)); } else { - final String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS; for(String ext : IndexFileNames.NON_STORE_INDEX_EXTENSIONS) { - addIfExists(files, IndexFileNames.segmentFileName(name, ext)); + addIfExists(fileSet, IndexFileNames.segmentFileName(name, ext)); } - // nocommit -- only does ifExists on prx for standard codec - codec.files(dir, this, files); + codec.files(dir, this, fileSet); } if (docStoreOffset != -1) { @@ -640,19 +630,19 @@ // vectors) with other segments assert docStoreSegment != null; if (docStoreIsCompoundFile) { - files.add(IndexFileNames.segmentFileName(docStoreSegment, IndexFileNames.COMPOUND_FILE_STORE_EXTENSION)); + fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, IndexFileNames.COMPOUND_FILE_STORE_EXTENSION)); } else { for (String ext : IndexFileNames.STORE_INDEX_EXTENSIONS) - addIfExists(files, IndexFileNames.segmentFileName(docStoreSegment, ext)); + addIfExists(fileSet, IndexFileNames.segmentFileName(docStoreSegment, ext)); } } else if (!useCompoundFile) { for (String ext : IndexFileNames.STORE_INDEX_EXTENSIONS) - addIfExists(files, IndexFileNames.segmentFileName(name, ext)); + addIfExists(fileSet, IndexFileNames.segmentFileName(name, ext)); } String delFileName = IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) { - files.add(delFileName); + fileSet.add(delFileName); } // Careful logic for norms files @@ -661,14 +651,14 @@ long gen = normGen[i]; if (gen >= YES) { // Definitely a separate norm file, with generation: - files.add(IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen)); + fileSet.add(IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen)); } else if (NO == gen) { // No separate norms but maybe plain norms // in the non compound file case: if (!hasSingleNormFile && !useCompoundFile) { String fileName = IndexFileNames.segmentFileName(name, IndexFileNames.PLAIN_NORMS_EXTENSION + i); if (dir.fileExists(fileName)) { - files.add(fileName); + fileSet.add(fileName); } } } else if (CHECK_DIR == gen) { @@ -680,7 +670,7 @@ fileName = IndexFileNames.segmentFileName(name, IndexFileNames.PLAIN_NORMS_EXTENSION + i); } if (fileName != null && dir.fileExists(fileName)) { - files.add(fileName); + fileSet.add(fileName); } } } @@ -699,10 +689,13 @@ for(int i=0;i(userData); return sis; Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java Mon Mar 8 16:10:31 2010 @@ -20,7 +20,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; - +import java.util.Set; +import java.util.HashSet; import java.util.List; import org.apache.lucene.document.Document; @@ -171,31 +172,23 @@ throws IOException { CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort); - List files = new ArrayList(); + Set fileSet = new HashSet(); // Basic files for (String ext : IndexFileNames.COMPOUND_EXTENSIONS_NOT_CODEC) { - - // nocommit - /* - if (ext.equals(IndexFileNames.PROX_EXTENSION) && !hasProx()) - continue; - - */ - if (mergeDocStores || (!ext.equals(IndexFileNames.FIELDS_EXTENSION) && !ext.equals(IndexFileNames.FIELDS_INDEX_EXTENSION))) - files.add(IndexFileNames.segmentFileName(segment, ext)); + fileSet.add(IndexFileNames.segmentFileName(segment, ext)); } - codec.files(directory, info, files); + codec.files(directory, info, fileSet); // Fieldable norm files int numFIs = fieldInfos.size(); for (int i = 0; i < numFIs; i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { - files.add(IndexFileNames.segmentFileName(segment, IndexFileNames.NORMS_EXTENSION)); + fileSet.add(IndexFileNames.segmentFileName(segment, IndexFileNames.NORMS_EXTENSION)); break; } } @@ -203,19 +196,19 @@ // Vector files if (fieldInfos.hasVectors() && mergeDocStores) { for (String ext : IndexFileNames.VECTOR_EXTENSIONS) { - files.add(IndexFileNames.segmentFileName(segment, ext)); + fileSet.add(IndexFileNames.segmentFileName(segment, ext)); } } // Now merge all added files - for (String file : files) { + for (String file : fileSet) { cfsWriter.addFile(file); } // Perform the merge cfsWriter.close(); - return files; + return new ArrayList(fileSet); } private void addIndexed(IndexReader reader, FieldInfos fInfos, @@ -571,12 +564,25 @@ final List fields = new ArrayList(); final List subReaders = new ArrayList(); final List slices = new ArrayList(); + final List bits = new ArrayList(); + final List bitsStarts = new ArrayList(); final int numReaders = readers.size(); for(int i=0;i fieldsReaderLocal = new FieldsReaderLocal(); CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal(); @@ -835,7 +834,7 @@ @Override public TermEnum terms() throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: @@ -852,7 +851,7 @@ @Override public TermEnum terms(Term t) throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: @@ -901,7 +900,7 @@ @Override public TermDocs termDocs() throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: @@ -921,7 +920,7 @@ @Override public TermPositions termPositions() throws IOException { ensureOpen(); - if (isPreFlex) { + if (core.isPreFlex) { // For old API on an old segment, instead of // converting old API -> new API -> old API, just give // direct access to old: @@ -1300,19 +1299,11 @@ // This is necessary so that cloned SegmentReaders (which // share the underlying postings data) will map to the // same entry in the FieldCache. See LUCENE-1579. - // nocommit - what to return here? @Override public final Object getFieldCacheKey() { return core; } - // nocommit: missing? - //@Override - //public long getUniqueTermCount() { - // return core.getTermsReader().size(); - //} - - /** * Lotsa tests did hacks like:
* SegmentReader reader = (SegmentReader) IndexReader.open(dir);
@@ -1363,9 +1354,6 @@ } else if (t != null) { // Pre-seek to this term - // nocommit -- inefficient; do we need - // FieldsEnum.seek? (but this is slow only for - // legacy API, and, when field count is high) while(currentField.compareTo(t.field) < 0) { currentField = fields.next(); if (currentField == null) { @@ -1379,7 +1367,6 @@ // We found some field -- get its terms: terms = fields.terms(); - // nocommit: confirm inlining is working! if (currentField == t.field) { // We found exactly the requested field; now // seek the term text: @@ -1486,7 +1473,6 @@ public void close() {} public void seek(TermEnum termEnum) throws IOException { - // nocommit -- optimize for the special cases here seek(termEnum.term()); } @@ -1590,7 +1576,6 @@ public void close() {} public void seek(TermEnum termEnum) throws IOException { - // nocommit -- optimize for the special cases here seek(termEnum.term()); } Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentWriteState.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentWriteState.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentWriteState.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentWriteState.java Mon Mar 8 16:10:31 2010 @@ -31,6 +31,9 @@ * @lucene.experimental */ public class SegmentWriteState { + // nocommit -- not clean that this is here; sometimes we + // write a newly flushed segment; other times a merged + // segment (and this is null): DocumentsWriter docWriter; public Directory directory; public String segmentName; Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java Mon Mar 8 16:10:31 2010 @@ -95,9 +95,6 @@ public void abort() {} - // nocommit -- should be @ thread level not field - private final BytesRef flushTerm = new BytesRef(); - /** Called once per field per document if term vectors * are enabled, to write the vectors to * RAMOutputStream, which is then quickly flushed to @@ -109,6 +106,8 @@ final int numPostings = termsHashPerField.numPostings; + final BytesRef flushTerm = perThread.flushTerm; + assert numPostings >= 0; if (!doVectors || numPostings == 0) @@ -128,8 +127,8 @@ perThread.doc.addField(termsHashPerField.fieldInfo.number); - // nocommit -- should I sort by whatever terms dict is - // sorting by? + // TODO: we may want to make this sort in same order + // as Codec's terms dict? final RawPostingList[] postings = termsHashPerField.sortPostings(BytesRef.getUTF8SortedAsUTF16Comparator()); tvf.writeVInt(numPostings); Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java Mon Mar 8 16:10:31 2010 @@ -17,11 +17,14 @@ * limitations under the License. */ +import org.apache.lucene.util.BytesRef; + final class TermVectorsTermsWriterPerThread extends TermsHashConsumerPerThread { final TermVectorsTermsWriter termsWriter; final TermsHashPerThread termsHashPerThread; final DocumentsWriter.DocState docState; + final BytesRef flushTerm = new BytesRef(); TermVectorsTermsWriter.PerDoc doc; Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Terms.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Terms.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Terms.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/Terms.java Mon Mar 8 16:10:31 2010 @@ -18,6 +18,7 @@ */ import java.io.IOException; +import java.util.Comparator; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CloseableThreadLocal; @@ -29,21 +30,20 @@ public abstract class Terms { - public final static Terms[] EMPTY_ARRAY = new Terms[0]; - // Privately cache a TermsEnum per-thread for looking up // docFreq and getting a private DocsEnum private final CloseableThreadLocal threadEnums = new CloseableThreadLocal(); - /** Returns an iterator that will step through all terms */ + /** Returns an iterator that will step through all + * terms. This method will not return null.*/ public abstract TermsEnum iterator() throws IOException; /** Return the BytesRef Comparator used to sort terms - * provided by the iterator. NOTE: this may return null + * provided by the iterator. This method may return null * if there are no terms. This method may be invoked * many times; it's best to cache a single instance & * reuse it. */ - public abstract BytesRef.Comparator getComparator() throws IOException; + public abstract Comparator getComparator() throws IOException; /** Returns the number of documents containing the * specified term text. Returns 0 if the term does not @@ -57,9 +57,8 @@ } } - // nocommit -- or maybe make a separate positions(...) method? - /** Get DocsEnum for the specified term. Returns null if - * the term does not exist. */ + /** Get DocsEnum for the specified term. This method may + * return null if the term does not exist. */ public DocsEnum docs(Bits skipDocs, BytesRef text, DocsEnum reuse) throws IOException { final TermsEnum termsEnum = getThreadTermsEnum(); if (termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) { @@ -69,8 +68,9 @@ } } - /** Get DocsEnum for the specified term. Returns null if - * the term does not exist. */ + /** Get DocsEnum for the specified term. This method will + * may return null if the term does not exists, or + * positions were not indexed. */ public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException { final TermsEnum termsEnum = getThreadTermsEnum(); if (termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) { @@ -97,4 +97,5 @@ protected void close() { threadEnums.close(); } + public final static Terms[] EMPTY_ARRAY = new Terms[0]; } Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java?rev=920378&r1=920377&r2=920378&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java Mon Mar 8 16:10:31 2010 @@ -18,6 +18,7 @@ */ import java.io.IOException; +import java.util.Comparator; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.Bits; @@ -74,7 +75,7 @@ /** Returns current term. Do not call this before calling * next() for the first time, after next() returns null - * or seek returns {@link SeekStatus#END}.*/ + * or after seek returns {@link SeekStatus#END}.*/ public abstract BytesRef term() throws IOException; /** Returns ordinal position for current term. This is an @@ -83,6 +84,8 @@ * before calling next() for the first time, after next() * returns null or seek returns {@link * SeekStatus#END}. */ + // nocommit -- should we allow calling this after next + // returns null? and it returns 1+ max ord? public abstract long ord() throws IOException; /** Returns the number of documents containing the current @@ -91,27 +94,29 @@ * {@link SeekStatus#END}.*/ public abstract int docFreq(); - // nocommit -- clarify if this may return null - // nocommit -- maybe require up front boolean doPositions? - // nocommit -- or maybe make a separate positions(...) method? /** Get {@link DocsEnum} for the current term. Do not - * call this before calling next() for the first time, - * after next() returns null or seek returns {@link - * SeekStatus#END}. + * call this before calling {@link #next} or {@link + * #seek} for the first time. This method will not + * return null. * * @param skipDocs set bits are documents that should not * be returned * @param reuse pass a prior DocsEnum for possible reuse */ public abstract DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException; + /** Get {@link DocsAndPositionsEnum} for the current term. + * Do not call this before calling {@link #next} or + * {@link #seek} for the first time. This method will + * only return null if positions were not indexed into + * the postings by this codec. */ public abstract DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException; /** Return the {@link BytesRef} Comparator used to sort - * terms provided by the iterator. NOTE: this may return + * terms provided by the iterator. This may return * null if there are no terms. Callers may invoke this * method many times, so it's best to cache a single * instance & reuse it. */ - public abstract BytesRef.Comparator getComparator() throws IOException; + public abstract Comparator getComparator() throws IOException; /** An empty TermsEnum for quickly returning an empty instance e.g. * in {@link org.apache.lucene.search.MultiTermQuery} @@ -128,30 +133,40 @@ public SeekStatus seek(long ord) { return SeekStatus.END; } @Override - public BytesRef term() { return null; } + public BytesRef term() { + throw new IllegalStateException("this method should never be called"); + } @Override - public BytesRef.Comparator getComparator() { + public Comparator getComparator() { // return an unused dummy to prevent NPE return BytesRef.getUTF8SortedAsUTF16Comparator(); } @Override - public int docFreq() { return -1; } + public int docFreq() { + throw new IllegalStateException("this method should never be called"); + } @Override - public long ord() { return -1; } + public long ord() { + throw new IllegalStateException("this method should never be called"); + } @Override - public DocsEnum docs(Bits bits, DocsEnum reuse) { return null; } + public DocsEnum docs(Bits bits, DocsEnum reuse) { + throw new IllegalStateException("this method should never be called"); + } @Override public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) { - return null; + throw new IllegalStateException("this method should never be called"); } @Override - public BytesRef next() { return null; } + public BytesRef next() { + return null; + } @Override // make it synchronized here, to prevent double lazy init public synchronized AttributeSource attributes() {