Return-Path:
Delivered-To: apmail-lucene-java-commits-archive@www.apache.org
Received: (qmail 5161 invoked from network); 21 Nov 2009 18:36:50 -0000
Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3)
by minotaur.apache.org with SMTP; 21 Nov 2009 18:36:50 -0000
Received: (qmail 21503 invoked by uid 500); 21 Nov 2009 18:36:50 -0000
Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org
Received: (qmail 21427 invoked by uid 500); 21 Nov 2009 18:36:50 -0000
Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm
Precedence: bulk
List-Help:
List-Unsubscribe:
List-Post:
List-Id:
Reply-To: java-dev@lucene.apache.org
Delivered-To: mailing list java-commits@lucene.apache.org
Received: (qmail 21418 invoked by uid 99); 21 Nov 2009 18:36:50 -0000
Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136)
by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 21 Nov 2009 18:36:50 +0000
X-ASF-Spam-Status: No, hits=-2.5 required=5.0
tests=AWL,BAYES_00
X-Spam-Check-By: apache.org
Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4)
by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 21 Nov 2009 18:36:44 +0000
Received: by eris.apache.org (Postfix, from userid 65534)
id D8024238888E; Sat, 21 Nov 2009 18:36:24 +0000 (UTC)
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: svn commit: r882962 - in /lucene/java/branches/flex_1458:
contrib/misc/src/java/org/apache/lucene/index/
src/java/org/apache/lucene/index/ src/java/org/apache/lucene/index/codecs/
src/java/org/apache/lucene/index/codecs/standard/ src/java/org/apache/lu...
Date: Sat, 21 Nov 2009 18:36:22 -0000
To: java-commits@lucene.apache.org
From: mikemccand@apache.org
X-Mailer: svnmailer-1.0.8
Message-Id: <20091121183624.D8024238888E@eris.apache.org>
Author: mikemccand
Date: Sat Nov 21 18:36:17 2009
New Revision: 882962
URL: http://svn.apache.org/viewvc?rev=882962&view=rev
Log:
LUCENE-1458 (on flex branch): allow codecs to override merging
Added:
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/MergeState.java (with props)
Removed:
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentFieldMergeQueue.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMergeQueue.java
Modified:
lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/FieldsConsumer.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/PositionsConsumer.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/OpenBitSet.java
Modified: lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java?rev=882962&r1=882961&r2=882962&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java Sat Nov 21 18:36:17 2009
@@ -26,6 +26,7 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.Bits;
/**
* This tool splits input index into multiple equal parts. The method employed
@@ -212,6 +213,11 @@
}
@Override
+ public Bits getDeletedDocs() {
+ return dels;
+ }
+
+ @Override
public boolean isDeleted(int n) {
return dels.get(n);
}
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=882962&r1=882961&r2=882962&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java Sat Nov 21 18:36:17 2009
@@ -989,6 +989,8 @@
// by re-using the same TermsEnum and seeking only
// forwards
if (term.field() != currentField) {
+ // nocommit -- once we sync up branch again, add
+ // assert that this field is always > last one
currentField = term.field();
Terms terms = fields.terms(currentField);
if (terms != null) {
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java?rev=882962&r1=882961&r2=882962&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/SegmentMerger.java Sat Nov 21 18:36:17 2009
@@ -28,13 +28,12 @@
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
import org.apache.lucene.index.codecs.Codecs;
import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.MergeState;
import org.apache.lucene.index.codecs.FieldsConsumer;
-import org.apache.lucene.index.codecs.TermsConsumer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.index.codecs.DocsConsumer;
-import org.apache.lucene.index.codecs.PositionsConsumer;
+import org.apache.lucene.util.Bits;
/**
* The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
@@ -575,9 +574,6 @@
}
}
- private SegmentFieldMergeQueue fieldsQueue;
- private SegmentMergeQueue termsQueue;
-
Codec getCodec() {
return codec;
}
@@ -587,204 +583,68 @@
SegmentWriteState state = new SegmentWriteState(null, directory, segment, fieldInfos, null, mergedDocs, 0, termIndexInterval, codecs);
// Let Codecs decide which codec will be used to write
- // this segment:
+ // the new segment:
codec = codecs.getWriter(state);
+ mergeState = new MergeState();
+ mergeState.readers = readers;
+ mergeState.fieldInfos = fieldInfos;
+ mergeState.readerCount = readers.size();
+ mergeState.mergedDocCount = mergedDocs;
+
+ // Remap docIDs
+ mergeState.delCounts = new int[mergeState.readerCount];
+ mergeState.docMaps = new int[mergeState.readerCount][];
+ mergeState.docBase = new int[mergeState.readerCount];
+
+ int docBase = 0;
+ for(int i=0;i 0) {
- if (payloadBuffer == null || payloadBuffer.length < payloadLength)
- payloadBuffer = new byte[payloadLength];
- positions.getPayload(payloadBuffer, 0);
- }
- posConsumer.addPosition(position, payloadBuffer, 0, payloadLength);
- }
- posConsumer.finishDoc();
- }
- }
- }
- termsConsumer.finishTerm(text, df);
-
- return df;
- }
-
private void mergeNorms() throws IOException {
byte[] normBuffer = null;
IndexOutput output = null;
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java?rev=882962&r1=882961&r2=882962&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/DocsConsumer.java Sat Nov 21 18:36:17 2009
@@ -19,15 +19,12 @@
import java.io.IOException;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.DocsEnum;
/**
* NOTE: this API is experimental and will likely change
*/
-// nocommit -- name this "StandardDocsConsumer"? eg the
-// RAMCodec doesn't need most of these methods...
public abstract class DocsConsumer {
// nocommit
@@ -43,4 +40,55 @@
* consumer doesn't need to see the positions for this
* doc. */
public abstract PositionsConsumer addDoc(int docID, int termDocFreq) throws IOException;
+
+ public static class DocsMergeState {
+ DocsEnum docsEnum;
+ int[] docMap;
+ int docBase;
+ }
+
+ /** Default merge impl: append documents, mapping around
+ * deletes */
+ public int merge(MergeState mergeState, DocsMergeState[] toMerge, int count) throws IOException {
+
+ int df = 0;
+ // Append docs in order:
+ for(int i=0;i {
+ public MergeQueue(int size) {
+ initialize(size);
+ }
+
+ @Override
+ protected final boolean lessThan(FieldMergeState a, FieldMergeState b) {
+ final int cmp = a.current.compareTo(b.current);
+ if (cmp != 0) {
+ return cmp < 0;
+ } else {
+ // nocommit -- technically not required to break
+ // ties, since the terms merging will do so?
+ return a.readerIndex < b.readerIndex;
+ }
+ }
+ }
+
+ public void merge(MergeState mergeState, Fields[] fields) throws IOException {
+
+ MergeQueue queue = new MergeQueue(fields.length);
+
+ for(int i=0;i 0) {
+ // Merge one field
+ final String field = pending[0].current;
+ mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field);
+ mergeState.omitTermFreqAndPositions = mergeState.fieldInfo.omitTermFreqAndPositions;
+ final TermsConsumer termsConsumer = addField(mergeState.fieldInfo);
+ termsConsumer.merge(mergeState, match, matchCount);
+ }
+
+ // Put fields back into queue
+ for(int i=0;iThis API is experimental and subject to suddenly
+ * change.
*/
+public class MergeState {
+ public FieldInfos fieldInfos;
+ public List readers; // Readers being merged
+ public int readerCount; // Number of readers being merged
+ public int[][] docMaps; // Maps docIDs around deletions
+ public int[] delCounts; // Deletion count per reader
+ public int[] docBase; // New docID base per reader
+ public int mergedDocCount; // Total # merged docs
+
+ // Updated per field;
+ public FieldInfo fieldInfo;
+ public boolean omitTermFreqAndPositions;
+}
+
Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/MergeState.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/PositionsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/PositionsConsumer.java?rev=882962&r1=882961&r2=882962&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/PositionsConsumer.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/PositionsConsumer.java Sat Nov 21 18:36:17 2009
@@ -19,7 +19,7 @@
import java.io.IOException;
-import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.index.PositionsEnum;
public abstract class PositionsConsumer {
@@ -32,4 +32,23 @@
/** Called when we are done adding positions & payloads
* for each doc */
public abstract void finishDoc() throws IOException;
+
+ private byte[] payloadBuffer;
+
+ /** Default merge impl, just copies positions & payloads
+ * from the input. */
+ public void merge(MergeState mergeState, PositionsEnum positions, int freq) throws IOException {
+ for(int i=0;i 0) {
+ if (payloadBuffer == null || payloadBuffer.length < payloadLength) {
+ payloadBuffer = new byte[payloadLength];
+ }
+ positions.getPayload(payloadBuffer, 0);
+ }
+ addPosition(position, payloadBuffer, 0, payloadLength);
+ }
+ finishDoc();
+ }
}
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java?rev=882962&r1=882961&r2=882962&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/TermsConsumer.java Sat Nov 21 18:36:17 2009
@@ -20,6 +20,9 @@
import java.io.IOException;
import org.apache.lucene.index.TermRef;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.util.PriorityQueue;
/**
* NOTE: this API is experimental and will likely change
@@ -35,4 +38,96 @@
/** Called when we are done adding terms to this field */
public abstract void finish() throws IOException;
+
+ // For default merge impl
+ public static class TermMergeState {
+ TermRef current;
+ TermsEnum termsEnum;
+ int readerIndex;
+ }
+
+ private final static class MergeQueue extends PriorityQueue {
+ public MergeQueue(int size) {
+ initialize(size);
+ }
+
+ @Override
+ protected final boolean lessThan(TermMergeState a, TermMergeState b) {
+ final int cmp = a.current.compareTerm(b.current);
+ if (cmp != 0) {
+ return cmp < 0;
+ } else {
+ return a.readerIndex < b.readerIndex;
+ }
+ }
+ }
+
+ private MergeQueue queue;
+ private DocsConsumer.DocsMergeState[] match;
+ private TermMergeState[] pending;
+
+ /** Default merge impl */
+ public void merge(MergeState mergeState, TermMergeState[] termsStates, int count) throws IOException {
+ if (queue == null) {
+ queue = new MergeQueue(mergeState.readerCount);
+ match = new DocsConsumer.DocsMergeState[mergeState.readerCount];
+ for(int i=0;i 0) {
+ // Merge one term
+ final TermRef term = pending[0].current;
+ final DocsConsumer docsConsumer = startTerm(term);
+ final int numDocs = docsConsumer.merge(mergeState, match, matchCount);
+ finishTerm(term, numDocs);
+ }
+
+ // Put terms back into queue
+ for(int i=0;i