Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id DC88CCE4C for ; Wed, 30 May 2012 20:16:24 +0000 (UTC) Received: (qmail 28075 invoked by uid 500); 30 May 2012 20:16:24 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 28067 invoked by uid 99); 30 May 2012 20:16:24 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 30 May 2012 20:16:24 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 30 May 2012 20:16:20 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id C956C2388847; Wed, 30 May 2012 20:15:58 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1344441 - in /lucene/dev/trunk/lucene: core/src/java/org/apache/lucene/codecs/perfield/ core/src/test/org/apache/lucene/codecs/perfield/ core/src/test/org/apache/lucene/index/ test-framework/src/java/org/apache/lucene/index/ Date: Wed, 30 May 2012 20:15:58 -0000 To: commits@lucene.apache.org From: rmuir@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120530201558.C956C2388847@eris.apache.org> Author: rmuir Date: Wed May 30 20:15:58 2012 New Revision: 1344441 URL: http://svn.apache.org/viewvc?rev=1344441&view=rev Log: LUCENE-4090: PerFieldPostingsFormat cannot use name as suffix Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat.java lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java?rev=1344441&r1=1344440&r2=1344441&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java (original) +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java Wed May 30 20:15:58 2012 @@ -17,8 +17,9 @@ package org.apache.lucene.codecs.perfiel * limitations under the License. */ +import java.io.Closeable; import java.io.IOException; -import java.util.IdentityHashMap; +import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.ServiceLoader; // javadocs @@ -46,7 +47,7 @@ import org.apache.lucene.util.IOUtils; *

* Files written by each posting format have an additional suffix containing the * format name. For example, in a per-field configuration instead of _1.prx - * filenames would look like _1_Lucene40.prx. + * filenames would look like _1_Lucene40_0.prx. * @see ServiceLoader * @lucene.experimental */ @@ -55,6 +56,7 @@ public abstract class PerFieldPostingsFo public static final String PER_FIELD_NAME = "PerField40"; public static final String PER_FIELD_FORMAT_KEY = PerFieldPostingsFormat.class.getSimpleName() + ".format"; + public static final String PER_FIELD_SUFFIX_KEY = PerFieldPostingsFormat.class.getSimpleName() + ".suffix"; public PerFieldPostingsFormat() { super(PER_FIELD_NAME); @@ -65,11 +67,22 @@ public abstract class PerFieldPostingsFo throws IOException { return new FieldsWriter(state); } + + static class FieldsConsumerAndSuffix implements Closeable { + FieldsConsumer consumer; + int suffix; + + @Override + public void close() throws IOException { + consumer.close(); + } + } private class FieldsWriter extends FieldsConsumer { - private final Map formats = new IdentityHashMap(); - + private final Map formats = new HashMap(); + private final Map suffixes = new HashMap(); + private final SegmentWriteState segmentWriteState; public FieldsWriter(SegmentWriteState state) throws IOException { @@ -82,26 +95,48 @@ public abstract class PerFieldPostingsFo if (format == null) { throw new IllegalStateException("invalid null PostingsFormat for field=\"" + field.name + "\""); } + final String formatName = format.getName(); - String previousValue = field.putAttribute(PER_FIELD_FORMAT_KEY, format.getName()); + String previousValue = field.putAttribute(PER_FIELD_FORMAT_KEY, formatName); assert previousValue == null; - - FieldsConsumer consumer = formats.get(format); + + Integer suffix; + + FieldsConsumerAndSuffix consumer = formats.get(format); if (consumer == null) { // First time we are seeing this format; create a new instance + + // bump the suffix + suffix = suffixes.get(formatName); + if (suffix == null) { + suffix = 0; + } else { + suffix = suffix + 1; + } + suffixes.put(formatName, suffix); + final String segmentSuffix = getFullSegmentSuffix(field.name, segmentWriteState.segmentSuffix, - format.getName()); - consumer = format.fieldsConsumer(new SegmentWriteState(segmentWriteState, segmentSuffix)); + getSuffix(formatName, Integer.toString(suffix))); + consumer = new FieldsConsumerAndSuffix(); + consumer.consumer = format.fieldsConsumer(new SegmentWriteState(segmentWriteState, segmentSuffix)); + consumer.suffix = suffix; formats.put(format, consumer); + } else { + // we've already seen this format, so just grab its suffix + assert suffixes.containsKey(formatName); + suffix = consumer.suffix; } + + previousValue = field.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(suffix)); + assert previousValue == null; // TODO: we should only provide the "slice" of FIS // that this PF actually sees ... then stuff like // .hasProx could work correctly? // NOTE: .hasProx is already broken in the same way for the non-perfield case, // if there is a fieldinfo with prox that has no postings, you get a 0 byte file. - return consumer.addField(field); + return consumer.consumer.addField(field); } @Override @@ -110,6 +145,10 @@ public abstract class PerFieldPostingsFo IOUtils.close(formats.values()); } } + + static String getSuffix(String formatName, String suffix) { + return formatName + "_" + suffix; + } static String getFullSegmentSuffix(String fieldName, String outerSegmentSuffix, String segmentSuffix) { if (outerSegmentSuffix.length() == 0) { @@ -125,7 +164,7 @@ public abstract class PerFieldPostingsFo private class FieldsReader extends FieldsProducer { private final Map fields = new TreeMap(); - private final Map formats = new IdentityHashMap(); + private final Map formats = new HashMap(); public FieldsReader(final SegmentReadState readState) throws IOException { @@ -139,11 +178,14 @@ public abstract class PerFieldPostingsFo final String formatName = fi.getAttribute(PER_FIELD_FORMAT_KEY); if (formatName != null) { // null formatName means the field is in fieldInfos, but has no postings! + final String suffix = fi.getAttribute(PER_FIELD_SUFFIX_KEY); + assert suffix != null; PostingsFormat format = PostingsFormat.forName(formatName); - if (!formats.containsKey(format)) { - formats.put(format, format.fieldsProducer(new SegmentReadState(readState, formatName))); + String segmentSuffix = getSuffix(formatName, suffix); + if (!formats.containsKey(segmentSuffix)) { + formats.put(segmentSuffix, format.fieldsProducer(new SegmentReadState(readState, segmentSuffix))); } - fields.put(fieldName, formats.get(format)); + fields.put(fieldName, formats.get(segmentSuffix)); } } } Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat.java?rev=1344441&r1=1344440&r2=1344441&view=diff ============================================================================== --- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat.java (original) +++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat.java Wed May 30 20:15:58 2012 @@ -19,10 +19,12 @@ package org.apache.lucene.codecs.perfiel import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene40.Lucene40Codec; import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat; import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat; +import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat; import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -34,6 +36,7 @@ import org.apache.lucene.index.IndexRead import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LogDocMergePolicy; +import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.IndexSearcher; @@ -264,4 +267,60 @@ public class TestPerFieldPostingsFormat } dir.close(); } + + public void testSameCodecDifferentInstance() throws Exception { + Codec codec = new Lucene40Codec() { + @Override + public PostingsFormat getPostingsFormatForField(String field) { + if ("id".equals(field)) { + return new Pulsing40PostingsFormat(1); + } else if ("date".equals(field)) { + return new Pulsing40PostingsFormat(1); + } else { + return super.getPostingsFormatForField(field); + } + } + }; + doTestMixedPostings(codec); + } + + public void testSameCodecDifferentParams() throws Exception { + Codec codec = new Lucene40Codec() { + @Override + public PostingsFormat getPostingsFormatForField(String field) { + if ("id".equals(field)) { + return new Pulsing40PostingsFormat(1); + } else if ("date".equals(field)) { + return new Pulsing40PostingsFormat(2); + } else { + return super.getPostingsFormatForField(field); + } + } + }; + doTestMixedPostings(codec); + } + + private void doTestMixedPostings(Codec codec) throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + iwc.setCodec(codec); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + Document doc = new Document(); + FieldType ft = new FieldType(TextField.TYPE_UNSTORED); + // turn on vectors for the checkindex cross-check + ft.setStoreTermVectors(true); + ft.setStoreTermVectorOffsets(true); + ft.setStoreTermVectorPositions(true); + Field idField = new Field("id", "", ft); + Field dateField = new Field("date", "", ft); + doc.add(idField); + doc.add(dateField); + for (int i = 0; i < 100; i++) { + idField.setStringValue(Integer.toString(random().nextInt(50))); + dateField.setStringValue(Integer.toString(random().nextInt(100))); + iw.addDocument(doc); + } + iw.close(); + dir.close(); // checkindex + } } Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip?rev=1344441&r1=1344440&r2=1344441&view=diff ============================================================================== Binary files - no diff available. Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip?rev=1344441&r1=1344440&r2=1344441&view=diff ============================================================================== Binary files - no diff available. Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip?rev=1344441&r1=1344440&r2=1344441&view=diff ============================================================================== Binary files - no diff available. Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip?rev=1344441&r1=1344440&r2=1344441&view=diff ============================================================================== Binary files - no diff available. Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java?rev=1344441&r1=1344440&r2=1344441&view=diff ============================================================================== --- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (original) +++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java Wed May 30 20:15:58 2012 @@ -87,12 +87,11 @@ public class RandomCodec extends Lucene4 int minItemsPerBlock = _TestUtil.nextInt(random, 2, 100); int maxItemsPerBlock = 2*(Math.max(2, minItemsPerBlock-1)) + random.nextInt(100); - // TODO: make it possible to specify min/max iterms per block via CL: - minItemsPerBlock = _TestUtil.nextInt(random, 2, 100); - maxItemsPerBlock = 2*(Math.max(1, minItemsPerBlock-1)) + random.nextInt(100); add(avoidCodecs, new Lucene40PostingsFormat(minItemsPerBlock, maxItemsPerBlock), new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock), + // add pulsing again with (usually) different parameters + new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock), new MockSepPostingsFormat(), new MockFixedIntBlockPostingsFormat(_TestUtil.nextInt(random, 1, 2000)), new MockVariableIntBlockPostingsFormat( _TestUtil.nextInt(random, 1, 127)), @@ -100,7 +99,8 @@ public class RandomCodec extends Lucene4 new NestedPulsingPostingsFormat(), new Lucene40WithOrds(), new SimpleTextPostingsFormat(), - new MemoryPostingsFormat(random.nextBoolean())); + new MemoryPostingsFormat(true), + new MemoryPostingsFormat(false)); Collections.shuffle(formats, random); }