lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1344441 - in /lucene/dev/trunk/lucene: core/src/java/org/apache/lucene/codecs/perfield/ core/src/test/org/apache/lucene/codecs/perfield/ core/src/test/org/apache/lucene/index/ test-framework/src/java/org/apache/lucene/index/
Date Wed, 30 May 2012 20:15:58 GMT
Author: rmuir
Date: Wed May 30 20:15:58 2012
New Revision: 1344441

URL: http://svn.apache.org/viewvc?rev=1344441&view=rev
Log:
LUCENE-4090: PerFieldPostingsFormat cannot use name as suffix

Modified:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java?rev=1344441&r1=1344440&r2=1344441&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
(original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
Wed May 30 20:15:58 2012
@@ -17,8 +17,9 @@ package org.apache.lucene.codecs.perfiel
  * limitations under the License.
  */
 
+import java.io.Closeable;
 import java.io.IOException;
-import java.util.IdentityHashMap;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.ServiceLoader; // javadocs
@@ -46,7 +47,7 @@ import org.apache.lucene.util.IOUtils;
  * <p>
  * Files written by each posting format have an additional suffix containing the 
  * format name. For example, in a per-field configuration instead of <tt>_1.prx</tt>

- * filenames would look like <tt>_1_Lucene40.prx</tt>.
+ * filenames would look like <tt>_1_Lucene40_0.prx</tt>.
  * @see ServiceLoader
  * @lucene.experimental
  */
@@ -55,6 +56,7 @@ public abstract class PerFieldPostingsFo
   public static final String PER_FIELD_NAME = "PerField40";
   
   public static final String PER_FIELD_FORMAT_KEY = PerFieldPostingsFormat.class.getSimpleName()
+ ".format";
+  public static final String PER_FIELD_SUFFIX_KEY = PerFieldPostingsFormat.class.getSimpleName()
+ ".suffix";
 
   public PerFieldPostingsFormat() {
     super(PER_FIELD_NAME);
@@ -65,11 +67,22 @@ public abstract class PerFieldPostingsFo
       throws IOException {
     return new FieldsWriter(state);
   }
+  
+  static class FieldsConsumerAndSuffix implements Closeable {
+    FieldsConsumer consumer;
+    int suffix;
+    
+    @Override
+    public void close() throws IOException {
+      consumer.close();
+    }
+  }
     
   private class FieldsWriter extends FieldsConsumer {
 
-    private final Map<PostingsFormat,FieldsConsumer> formats = new IdentityHashMap<PostingsFormat,FieldsConsumer>();
-
+    private final Map<PostingsFormat,FieldsConsumerAndSuffix> formats = new HashMap<PostingsFormat,FieldsConsumerAndSuffix>();
+    private final Map<String,Integer> suffixes = new HashMap<String,Integer>();
+    
     private final SegmentWriteState segmentWriteState;
 
     public FieldsWriter(SegmentWriteState state) throws IOException {
@@ -82,26 +95,48 @@ public abstract class PerFieldPostingsFo
       if (format == null) {
         throw new IllegalStateException("invalid null PostingsFormat for field=\"" + field.name
+ "\"");
       }
+      final String formatName = format.getName();
       
-      String previousValue = field.putAttribute(PER_FIELD_FORMAT_KEY, format.getName());
+      String previousValue = field.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
       assert previousValue == null;
-
-      FieldsConsumer consumer = formats.get(format);
+      
+      Integer suffix;
+      
+      FieldsConsumerAndSuffix consumer = formats.get(format);
       if (consumer == null) {
         // First time we are seeing this format; create a new instance
+        
+        // bump the suffix
+        suffix = suffixes.get(formatName);
+        if (suffix == null) {
+          suffix = 0;
+        } else {
+          suffix = suffix + 1;
+        }
+        suffixes.put(formatName, suffix);
+        
         final String segmentSuffix = getFullSegmentSuffix(field.name,
                                                           segmentWriteState.segmentSuffix,
-                                                          format.getName());
-        consumer = format.fieldsConsumer(new SegmentWriteState(segmentWriteState, segmentSuffix));
+                                                          getSuffix(formatName, Integer.toString(suffix)));
+        consumer = new FieldsConsumerAndSuffix();
+        consumer.consumer = format.fieldsConsumer(new SegmentWriteState(segmentWriteState,
segmentSuffix));
+        consumer.suffix = suffix;
         formats.put(format, consumer);
+      } else {
+        // we've already seen this format, so just grab its suffix
+        assert suffixes.containsKey(formatName);
+        suffix = consumer.suffix;
       }
+      
+      previousValue = field.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(suffix));
+      assert previousValue == null;
 
       // TODO: we should only provide the "slice" of FIS
       // that this PF actually sees ... then stuff like
       // .hasProx could work correctly?
       // NOTE: .hasProx is already broken in the same way for the non-perfield case,
       // if there is a fieldinfo with prox that has no postings, you get a 0 byte file.
-      return consumer.addField(field);
+      return consumer.consumer.addField(field);
     }
 
     @Override
@@ -110,6 +145,10 @@ public abstract class PerFieldPostingsFo
       IOUtils.close(formats.values());
     }
   }
+  
+  static String getSuffix(String formatName, String suffix) {
+    return formatName + "_" + suffix;
+  }
 
   static String getFullSegmentSuffix(String fieldName, String outerSegmentSuffix, String
segmentSuffix) {
     if (outerSegmentSuffix.length() == 0) {
@@ -125,7 +164,7 @@ public abstract class PerFieldPostingsFo
   private class FieldsReader extends FieldsProducer {
 
     private final Map<String,FieldsProducer> fields = new TreeMap<String,FieldsProducer>();
-    private final Map<PostingsFormat,FieldsProducer> formats = new IdentityHashMap<PostingsFormat,FieldsProducer>();
+    private final Map<String,FieldsProducer> formats = new HashMap<String,FieldsProducer>();
 
     public FieldsReader(final SegmentReadState readState) throws IOException {
 
@@ -139,11 +178,14 @@ public abstract class PerFieldPostingsFo
             final String formatName = fi.getAttribute(PER_FIELD_FORMAT_KEY);
             if (formatName != null) {
               // null formatName means the field is in fieldInfos, but has no postings!
+              final String suffix = fi.getAttribute(PER_FIELD_SUFFIX_KEY);
+              assert suffix != null;
               PostingsFormat format = PostingsFormat.forName(formatName);
-              if (!formats.containsKey(format)) {
-                formats.put(format, format.fieldsProducer(new SegmentReadState(readState,
formatName)));
+              String segmentSuffix = getSuffix(formatName, suffix);
+              if (!formats.containsKey(segmentSuffix)) {
+                formats.put(segmentSuffix, format.fieldsProducer(new SegmentReadState(readState,
segmentSuffix)));
               }
-              fields.put(fieldName, formats.get(format));
+              fields.put(fieldName, formats.get(segmentSuffix));
             }
           }
         }

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat.java?rev=1344441&r1=1344440&r2=1344441&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat.java
(original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat.java
Wed May 30 20:15:58 2012
@@ -19,10 +19,12 @@ package org.apache.lucene.codecs.perfiel
 import java.io.IOException;
 
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40Codec;
 import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
 import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat;
+import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat;
 import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -34,6 +36,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LogDocMergePolicy;
+import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.search.IndexSearcher;
@@ -264,4 +267,60 @@ public class TestPerFieldPostingsFormat 
     }
     dir.close();
   }
+  
+  public void testSameCodecDifferentInstance() throws Exception {
+    Codec codec = new Lucene40Codec() {
+      @Override
+      public PostingsFormat getPostingsFormatForField(String field) {
+        if ("id".equals(field)) {
+          return new Pulsing40PostingsFormat(1);
+        } else if ("date".equals(field)) {
+          return new Pulsing40PostingsFormat(1);
+        } else {
+          return super.getPostingsFormatForField(field);
+        }
+      }
+    };
+    doTestMixedPostings(codec);
+  }
+  
+  public void testSameCodecDifferentParams() throws Exception {
+    Codec codec = new Lucene40Codec() {
+      @Override
+      public PostingsFormat getPostingsFormatForField(String field) {
+        if ("id".equals(field)) {
+          return new Pulsing40PostingsFormat(1);
+        } else if ("date".equals(field)) {
+          return new Pulsing40PostingsFormat(2);
+        } else {
+          return super.getPostingsFormatForField(field);
+        }
+      }
+    };
+    doTestMixedPostings(codec);
+  }
+  
+  private void doTestMixedPostings(Codec codec) throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setCodec(codec);
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    Document doc = new Document();
+    FieldType ft = new FieldType(TextField.TYPE_UNSTORED);
+    // turn on vectors for the checkindex cross-check
+    ft.setStoreTermVectors(true);
+    ft.setStoreTermVectorOffsets(true);
+    ft.setStoreTermVectorPositions(true);
+    Field idField = new Field("id", "", ft);
+    Field dateField = new Field("date", "", ft);
+    doc.add(idField);
+    doc.add(dateField);
+    for (int i = 0; i < 100; i++) {
+      idField.setStringValue(Integer.toString(random().nextInt(50)));
+      dateField.setStringValue(Integer.toString(random().nextInt(100)));
+      iw.addDocument(doc);
+    }
+    iw.close();
+    dir.close(); // checkindex
+  }
 }

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip?rev=1344441&r1=1344440&r2=1344441&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip?rev=1344441&r1=1344440&r2=1344441&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip?rev=1344441&r1=1344440&r2=1344441&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip?rev=1344441&r1=1344440&r2=1344441&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java?rev=1344441&r1=1344440&r2=1344441&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
(original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
Wed May 30 20:15:58 2012
@@ -87,12 +87,11 @@ public class RandomCodec extends Lucene4
     int minItemsPerBlock = _TestUtil.nextInt(random, 2, 100);
     int maxItemsPerBlock = 2*(Math.max(2, minItemsPerBlock-1)) + random.nextInt(100);
 
-    // TODO: make it possible to specify min/max iterms per block via CL:
-    minItemsPerBlock = _TestUtil.nextInt(random, 2, 100);
-    maxItemsPerBlock = 2*(Math.max(1, minItemsPerBlock-1)) + random.nextInt(100);
     add(avoidCodecs,
         new Lucene40PostingsFormat(minItemsPerBlock, maxItemsPerBlock),
         new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
+        // add pulsing again with (usually) different parameters
+        new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock),
         new MockSepPostingsFormat(),
         new MockFixedIntBlockPostingsFormat(_TestUtil.nextInt(random, 1, 2000)),
         new MockVariableIntBlockPostingsFormat( _TestUtil.nextInt(random, 1, 127)),
@@ -100,7 +99,8 @@ public class RandomCodec extends Lucene4
         new NestedPulsingPostingsFormat(),
         new Lucene40WithOrds(),
         new SimpleTextPostingsFormat(),
-        new MemoryPostingsFormat(random.nextBoolean()));
+        new MemoryPostingsFormat(true),
+        new MemoryPostingsFormat(false));
 
     Collections.shuffle(formats, random);
   }



Mime
View raw message