Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 5161 invoked from network); 21 Nov 2009 18:36:50 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 21 Nov 2009 18:36:50 -0000 Received: (qmail 21503 invoked by uid 500); 21 Nov 2009 18:36:50 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 21427 invoked by uid 500); 21 Nov 2009 18:36:50 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 21418 invoked by uid 99); 21 Nov 2009 18:36:50 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 21 Nov 2009 18:36:50 +0000 X-ASF-Spam-Status: No, hits=-2.5 required=5.0 tests=AWL,BAYES_00 X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 21 Nov 2009 18:36:44 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id D8024238888E; Sat, 21 Nov 2009 18:36:24 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r882962 - in /lucene/java/branches/flex_1458: contrib/misc/src/java/org/apache/lucene/index/ src/java/org/apache/lucene/index/ src/java/org/apache/lucene/index/codecs/ src/java/org/apache/lucene/index/codecs/standard/ src/java/org/apache/lu... Date: Sat, 21 Nov 2009 18:36:22 -0000 To: java-commits@lucene.apache.org From: mikemccand@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20091121183624.D8024238888E@eris.apache.org> Author: mikemccand Date: Sat Nov 21 18:36:17 2009 New Revision: 882962 URL: http://svn.apache.org/viewvc?rev=882962&view=rev Log: LUCENE-1458 (on flex branch): allow codecs to override merging Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/MergeState.java (with props) Removed: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentFieldMergeQueue.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMergeQueue.java Modified: lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/PositionsConsumer.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/OpenBitSet.java Modified: lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java?rev=882962&r1=882961&r2=882962&view=diff ============================================================================== --- lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java (original) +++ lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java Sat Nov 21 18:36:17 2009 @@ -26,6 +26,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.Bits; /** * This tool splits input index into multiple equal parts. The method employed @@ -212,6 +213,11 @@ } @Override + public Bits getDeletedDocs() { + return dels; + } + + @Override public boolean isDeleted(int n) { return dels.get(n); } Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=882962&r1=882961&r2=882962&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java Sat Nov 21 18:36:17 2009 @@ -989,6 +989,8 @@ // by re-using the same TermsEnum and seeking only // forwards if (term.field() != currentField) { + // nocommit -- once we sync up branch again, add + // assert that this field is always > last one currentField = term.field(); Terms terms = fields.terms(currentField); if (terms != null) { Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java?rev=882962&r1=882961&r2=882962&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java Sat Nov 21 18:36:17 2009 @@ -28,13 +28,12 @@ import org.apache.lucene.index.MergePolicy.MergeAbortedException; import org.apache.lucene.index.codecs.Codecs; import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.MergeState; import org.apache.lucene.index.codecs.FieldsConsumer; -import org.apache.lucene.index.codecs.TermsConsumer; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.index.codecs.DocsConsumer; -import org.apache.lucene.index.codecs.PositionsConsumer; +import org.apache.lucene.util.Bits; /** * The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add}, @@ -575,9 +574,6 @@ } } - private SegmentFieldMergeQueue fieldsQueue; - private SegmentMergeQueue termsQueue; - Codec getCodec() { return codec; } @@ -587,204 +583,68 @@ SegmentWriteState state = new SegmentWriteState(null, directory, segment, fieldInfos, null, mergedDocs, 0, termIndexInterval, codecs); // Let Codecs decide which codec will be used to write - // this segment: + // the new segment: codec = codecs.getWriter(state); + mergeState = new MergeState(); + mergeState.readers = readers; + mergeState.fieldInfos = fieldInfos; + mergeState.readerCount = readers.size(); + mergeState.mergedDocCount = mergedDocs; + + // Remap docIDs + mergeState.delCounts = new int[mergeState.readerCount]; + mergeState.docMaps = new int[mergeState.readerCount][]; + mergeState.docBase = new int[mergeState.readerCount]; + + int docBase = 0; + for(int i=0;i 0) { - if (payloadBuffer == null || payloadBuffer.length < payloadLength) - payloadBuffer = new byte[payloadLength]; - positions.getPayload(payloadBuffer, 0); - } - posConsumer.addPosition(position, payloadBuffer, 0, payloadLength); - } - posConsumer.finishDoc(); - } - } - } - termsConsumer.finishTerm(text, df); - - return df; - } - private void mergeNorms() throws IOException { byte[] normBuffer = null; IndexOutput output = null; Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java?rev=882962&r1=882961&r2=882962&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java Sat Nov 21 18:36:17 2009 @@ -19,15 +19,12 @@ import java.io.IOException; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.DocsEnum; /** * NOTE: this API is experimental and will likely change */ -// nocommit -- name this "StandardDocsConsumer"? eg the -// RAMCodec doesn't need most of these methods... public abstract class DocsConsumer { // nocommit @@ -43,4 +40,55 @@ * consumer doesn't need to see the positions for this * doc. */ public abstract PositionsConsumer addDoc(int docID, int termDocFreq) throws IOException; + + public static class DocsMergeState { + DocsEnum docsEnum; + int[] docMap; + int docBase; + } + + /** Default merge impl: append documents, mapping around + * deletes */ + public int merge(MergeState mergeState, DocsMergeState[] toMerge, int count) throws IOException { + + int df = 0; + // Append docs in order: + for(int i=0;i { + public MergeQueue(int size) { + initialize(size); + } + + @Override + protected final boolean lessThan(FieldMergeState a, FieldMergeState b) { + final int cmp = a.current.compareTo(b.current); + if (cmp != 0) { + return cmp < 0; + } else { + // nocommit -- technically not required to break + // ties, since the terms merging will do so? + return a.readerIndex < b.readerIndex; + } + } + } + + public void merge(MergeState mergeState, Fields[] fields) throws IOException { + + MergeQueue queue = new MergeQueue(fields.length); + + for(int i=0;i 0) { + // Merge one field + final String field = pending[0].current; + mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field); + mergeState.omitTermFreqAndPositions = mergeState.fieldInfo.omitTermFreqAndPositions; + final TermsConsumer termsConsumer = addField(mergeState.fieldInfo); + termsConsumer.merge(mergeState, match, matchCount); + } + + // Put fields back into queue + for(int i=0;iThis API is experimental and subject to suddenly + * change.

*/ +public class MergeState { + public FieldInfos fieldInfos; + public List readers; // Readers being merged + public int readerCount; // Number of readers being merged + public int[][] docMaps; // Maps docIDs around deletions + public int[] delCounts; // Deletion count per reader + public int[] docBase; // New docID base per reader + public int mergedDocCount; // Total # merged docs + + // Updated per field; + public FieldInfo fieldInfo; + public boolean omitTermFreqAndPositions; +} + Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/MergeState.java ------------------------------------------------------------------------------ svn:eol-style = native Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/PositionsConsumer.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/PositionsConsumer.java?rev=882962&r1=882961&r2=882962&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/PositionsConsumer.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/PositionsConsumer.java Sat Nov 21 18:36:17 2009 @@ -19,7 +19,7 @@ import java.io.IOException; -import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.index.PositionsEnum; public abstract class PositionsConsumer { @@ -32,4 +32,23 @@ /** Called when we are done adding positions & payloads * for each doc */ public abstract void finishDoc() throws IOException; + + private byte[] payloadBuffer; + + /** Default merge impl, just copies positions & payloads + * from the input. */ + public void merge(MergeState mergeState, PositionsEnum positions, int freq) throws IOException { + for(int i=0;i 0) { + if (payloadBuffer == null || payloadBuffer.length < payloadLength) { + payloadBuffer = new byte[payloadLength]; + } + positions.getPayload(payloadBuffer, 0); + } + addPosition(position, payloadBuffer, 0, payloadLength); + } + finishDoc(); + } } Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java?rev=882962&r1=882961&r2=882962&view=diff ============================================================================== --- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java (original) +++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java Sat Nov 21 18:36:17 2009 @@ -20,6 +20,9 @@ import java.io.IOException; import org.apache.lucene.index.TermRef; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.util.PriorityQueue; /** * NOTE: this API is experimental and will likely change @@ -35,4 +38,96 @@ /** Called when we are done adding terms to this field */ public abstract void finish() throws IOException; + + // For default merge impl + public static class TermMergeState { + TermRef current; + TermsEnum termsEnum; + int readerIndex; + } + + private final static class MergeQueue extends PriorityQueue { + public MergeQueue(int size) { + initialize(size); + } + + @Override + protected final boolean lessThan(TermMergeState a, TermMergeState b) { + final int cmp = a.current.compareTerm(b.current); + if (cmp != 0) { + return cmp < 0; + } else { + return a.readerIndex < b.readerIndex; + } + } + } + + private MergeQueue queue; + private DocsConsumer.DocsMergeState[] match; + private TermMergeState[] pending; + + /** Default merge impl */ + public void merge(MergeState mergeState, TermMergeState[] termsStates, int count) throws IOException { + if (queue == null) { + queue = new MergeQueue(mergeState.readerCount); + match = new DocsConsumer.DocsMergeState[mergeState.readerCount]; + for(int i=0;i 0) { + // Merge one term + final TermRef term = pending[0].current; + final DocsConsumer docsConsumer = startTerm(term); + final int numDocs = docsConsumer.merge(mergeState, match, matchCount); + finishTerm(term, numDocs); + } + + // Put terms back into queue + for(int i=0;i