lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Robert Muir <rcm...@gmail.com>
Subject Re: New codecs keep Freq skip/omit Pos
Date Fri, 22 Apr 2011 16:29:42 GMT
On Fri, Apr 22, 2011 at 12:03 PM, Alex vB <mail@avomberg.de> wrote:
> During indexing I use StandardAnalyzer (StandardFilter, LowerCaseFilter,
> StopFilter).
> Can I get somewhere more information for Codec creation or is there just
> "grubbing" through the code?

try the following patch to switch PFOR1 and PFOR2 over to Sep, so that
they create separate .doc and .frq files.
then you can compare the compression of the freqs against your
implementation (again the .skp/.tib/.tiv will be larger due to using
Sep codec and due to having pos pointers, but try to ignore that)


Index: lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java	(revision
1095422)
+++ lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java	(working
copy)
@@ -30,6 +30,8 @@
 import org.apache.lucene.index.codecs.FieldsProducer;
 import org.apache.lucene.index.codecs.fixed.FixedPostingsReaderImpl;
 import org.apache.lucene.index.codecs.fixed.FixedPostingsWriterImpl;
+import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl;
+import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl;
 import org.apache.lucene.index.codecs.standard.StandardCodec;
 import org.apache.lucene.index.codecs.BlockTermsWriter;
 import org.apache.lucene.index.codecs.BlockTermsReader;
@@ -48,7 +50,7 @@

   @Override
   public FieldsConsumer fieldsConsumer(SegmentWriteState state)
throws IOException {
-    PostingsWriterBase postingsWriter = new
FixedPostingsWriterImpl(state, new PForDeltaFactory(128));
+    PostingsWriterBase postingsWriter = new
SepPostingsWriterImpl(state, new PForDeltaFactory(128));

     boolean success = false;
     TermsIndexWriterBase indexWriter;
@@ -79,7 +81,7 @@

   @Override
   public FieldsProducer fieldsProducer(SegmentReadState state) throws
IOException {
-    PostingsReaderBase postingsReader = new FixedPostingsReaderImpl(state.dir,
+    PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir,

state.segmentInfo,

state.readBufferSize,
                                                                   new
PForDeltaFactory(128),
@@ -125,14 +127,14 @@

   @Override
   public void files(Directory dir, SegmentInfo segmentInfo, String
id, Set<String> files) {
-    FixedPostingsReaderImpl.files(segmentInfo, id, files);
+    SepPostingsReaderImpl.files(segmentInfo, id, files);
     BlockTermsReader.files(dir, segmentInfo, id, files);
     VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
   }

   @Override
   public void getExtensions(Set<String> extensions) {
-    FixedPostingsWriterImpl.getExtensions(extensions);
+    SepPostingsWriterImpl.getExtensions(extensions);
     BlockTermsReader.getExtensions(extensions);
     VariableGapTermsIndexReader.getIndexExtensions(extensions);
   }
Index: lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java
(revision
1095422)
+++ lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java
(working
copy)
@@ -41,6 +41,8 @@
 import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
 import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
 import org.apache.lucene.index.codecs.sep.IntStreamFactory;
+import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl;
+import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl;
 import org.apache.lucene.index.codecs.standard.StandardCodec;
 import org.apache.lucene.store.*;
 import org.apache.lucene.util.BytesRef;
@@ -168,7 +170,7 @@

   @Override
   public FieldsConsumer fieldsConsumer(SegmentWriteState state)
throws IOException {
-    PostingsWriterBase postingsWriter = new
FixedPostingsWriterImpl(state, new PForDeltaIntFactory());
+    PostingsWriterBase postingsWriter = new
SepPostingsWriterImpl(state, new PForDeltaIntFactory());

     boolean success = false;
     TermsIndexWriterBase indexWriter;
@@ -199,7 +201,7 @@

   @Override
   public FieldsProducer fieldsProducer(SegmentReadState state) throws
IOException {
-    PostingsReaderBase postingsReader = new FixedPostingsReaderImpl(state.dir,
+    PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir,

state.segmentInfo,

state.readBufferSize,

new PForDeltaIntFactory(), state.codecId);
@@ -244,14 +246,14 @@

   @Override
   public void files(Directory dir, SegmentInfo segmentInfo, String
codecId, Set<String> files) {
-    FixedPostingsReaderImpl.files(segmentInfo, codecId, files);
+    SepPostingsReaderImpl.files(segmentInfo, codecId, files);
     BlockTermsReader.files(dir, segmentInfo, codecId, files);
     VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
   }

   @Override
   public void getExtensions(Set<String> extensions) {
-    FixedPostingsWriterImpl.getExtensions(extensions);
+    SepPostingsWriterImpl.getExtensions(extensions);
     BlockTermsReader.getExtensions(extensions);
     VariableGapTermsIndexReader.getIndexExtensions(extensions);
   }

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message