lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From busc...@apache.org
Subject svn commit: r704193 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/index/DocumentsWriter.java src/java/org/apache/lucene/index/IndexWriter.java
Date Mon, 13 Oct 2008 18:12:11 GMT
Author: buschmi
Date: Mon Oct 13 11:12:11 2008
New Revision: 704193

URL: http://svn.apache.org/viewvc?rev=704193&view=rev
Log:
LUCENE-1419: Add expert API to set custom indexing chain. 

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=704193&r1=704192&r2=704193&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon Oct 13 11:12:11 2008
@@ -1,4 +1,4 @@
-Lucene Change Log
+Lucene Change Log
 $Id$
 
 ======================= Trunk (not yet released) =======================
@@ -7,6 +7,11 @@
 
 API Changes
 
+1. LUCENE-1419: Add expert API to set custom indexing chain. This API is 
+   package-protected for now, so we don't have to officially support it.
+   Yet, it will give us the possibility to try out different consumers in
+   in the chain. (Michael Busch)
+
 Bug fixes
 
 1. LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=704193&r1=704192&r2=704193&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java Mon Oct 13 11:12:11
2008
@@ -184,6 +184,51 @@
       this.next = next;
     }
   };
+  
+  /**
+   * The IndexingChain must define the {@link #getChain(DocumentsWriter)} method
+   * which returns the DocConsumer that the DocumentsWriter calls to process the
+   * documents. 
+   */
+  abstract static class IndexingChain {
+    abstract DocConsumer getChain(DocumentsWriter documentsWriter);
+  }
+  
+  static final IndexingChain DefaultIndexingChain = new IndexingChain() {
+
+    DocConsumer getChain(DocumentsWriter documentsWriter) {
+      /*
+      This is the current indexing chain:
+
+      DocConsumer / DocConsumerPerThread
+        --> code: DocFieldProcessor / DocFieldProcessorPerThread
+          --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField
+            --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField
+              --> code: DocInverter / DocInverterPerThread / DocInverterPerField
+                --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
+                  --> code: TermsHash / TermsHashPerThread / TermsHashPerField
+                    --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField
+                      --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField
+                      --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread
/ TermVectorsTermsWriterPerField
+                --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
+                  --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField
+              --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField
+    */
+
+    // Build up indexing chain:
+
+      final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter);
+      final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
+
+      final InvertedDocConsumer  termsHash = new TermsHash(documentsWriter, true, freqProxWriter,
+                                                           new TermsHash(documentsWriter,
false, termVectorsWriter, null));
+      final NormsWriter normsWriter = new NormsWriter();
+      final DocInverter docInverter = new DocInverter(termsHash, normsWriter);
+      final StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(documentsWriter);
+      final DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter);
+      return new DocFieldProcessor(documentsWriter, docFieldConsumers);
+    }
+  };
 
   final DocConsumer consumer;
 
@@ -228,48 +273,23 @@
 
   private boolean closed;
 
-  DocumentsWriter(Directory directory, IndexWriter writer) throws IOException {
+  DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain) throws
IOException {
     this.directory = directory;
     this.writer = writer;
     this.similarity = writer.getSimilarity();
     flushedDocCount = writer.maxDoc();
 
-    /*
-      This is the current indexing chain:
-
-      DocConsumer / DocConsumerPerThread
-        --> code: DocFieldProcessor / DocFieldProcessorPerThread
-          --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField
-            --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField
-              --> code: DocInverter / DocInverterPerThread / DocInverterPerField
-                --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
-                  --> code: TermsHash / TermsHashPerThread / TermsHashPerField
-                    --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField
-                      --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField
-                      --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread
/ TermVectorsTermsWriterPerField
-                --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
-                  --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField
-              --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField
-    */
-
-    // TODO FI: this should be something the user can pass in
-    // Build up indexing chain:
-    final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(this);
-    final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
-
-    final InvertedDocConsumer  termsHash = new TermsHash(this, true, freqProxWriter,
-                                                         new TermsHash(this, false, termVectorsWriter,
null));
-    final NormsWriter normsWriter = new NormsWriter();
-    final DocInverter docInverter = new DocInverter(termsHash, normsWriter);
-    final StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(this);
-    final DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter);
-    consumer = docFieldProcessor = new DocFieldProcessor(this, docFieldConsumers);
+    consumer = indexingChain.getChain(this);
+    if (consumer instanceof DocFieldProcessor) {
+      docFieldProcessor = (DocFieldProcessor) consumer;
+    }
   }
 
   /** Returns true if any of the fields in the current
    *  buffered docs have omitTf==false */
   boolean hasProx() {
-    return docFieldProcessor.fieldInfos.hasProx();
+    return (docFieldProcessor != null) ? docFieldProcessor.fieldInfos.hasProx()
+                                       : true;
   }
 
   /** If non-null, various details of indexing are printed

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?rev=704193&r1=704192&r2=704193&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Mon Oct 13 11:12:11
2008
@@ -19,6 +19,7 @@
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DocumentsWriter.IndexingChain;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.store.Directory;
@@ -994,7 +995,43 @@
        throws CorruptIndexException, LockObtainFailedException, IOException {
     init(d, a, create, false, deletionPolicy, false, mfl.getLimit());
   }
-
+  
+  /**
+   * Expert: constructs an IndexWriter with a custom {@link
+   * IndexDeletionPolicy} and {@link IndexingChain}, 
+   * for the index in <code>d</code>.
+   * Text will be analyzed with <code>a</code>.  If
+   * <code>create</code> is true, then a new, empty index
+   * will be created in <code>d</code>, replacing the index
+   * already there, if any.
+   *
+   * <p><b>NOTE</b>: autoCommit (see <a
+   * href="#autoCommit">above</a>) is set to false with this
+   * constructor.
+   *
+   * @param d the index directory
+   * @param a the analyzer to use
+   * @param create <code>true</code> to create the index or overwrite
+   *  the existing one; <code>false</code> to append to the existing
+   *  index
+   * @param deletionPolicy see <a href="#deletionPolicy">above</a>
+   * @param indexingChain the {@link DocConsumer} chain to be used to 
+   *  process documents
+   * @param mfl whether or not to limit field lengths
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws LockObtainFailedException if another writer
+   *  has this index open (<code>write.lock</code> could not
+   *  be obtained)
+   * @throws IOException if the directory cannot be read/written to, or
+   *  if it does not exist and <code>create</code> is
+   *  <code>false</code> or if there is any other low-level
+   *  IO error
+   */
+  IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy,
MaxFieldLength mfl, IndexingChain indexingChain)
+       throws CorruptIndexException, LockObtainFailedException, IOException {
+    init(d, a, create, false, deletionPolicy, false, mfl.getLimit(), indexingChain);
+  }
+  
   /**
    * Expert: constructs an IndexWriter with a custom {@link
    * IndexDeletionPolicy}, for the index in <code>d</code>.
@@ -1028,16 +1065,30 @@
     init(d, a, create, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH);
   }
 
-  private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy,
boolean autoCommit, int maxFieldLength)
+  private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy,

+      boolean autoCommit, int maxFieldLength) 
+    throws CorruptIndexException, LockObtainFailedException, IOException {
+    init(d, a, closeDir, deletionPolicy, autoCommit, maxFieldLength, DocumentsWriter.DefaultIndexingChain);
+  }
+  
+  private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy,

+      boolean autoCommit, int maxFieldLength, IndexingChain indexingChain)
     throws CorruptIndexException, LockObtainFailedException, IOException {
     if (IndexReader.indexExists(d)) {
-      init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength);
+      init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain);
     } else {
-      init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength);
+      init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength, indexingChain);
     }
   }
 
-  private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, IndexDeletionPolicy
deletionPolicy, boolean autoCommit, int maxFieldLength)
+  private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, 
+      IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength)
+    throws CorruptIndexException, LockObtainFailedException, IOException {
+    init(d, a, create, closeDir, deletionPolicy, autoCommit, maxFieldLength, DocumentsWriter.DefaultIndexingChain);
+  }
+  private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, 
+      IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength,
+      IndexingChain indexingChain)
     throws CorruptIndexException, LockObtainFailedException, IOException {
     this.closeDir = closeDir;
     directory = d;
@@ -1084,7 +1135,7 @@
       this.autoCommit = autoCommit;
       setRollbackSegmentInfos(segmentInfos);
 
-      docWriter = new DocumentsWriter(directory, this);
+      docWriter = new DocumentsWriter(directory, this, indexingChain);
       docWriter.setInfoStream(infoStream);
       docWriter.setMaxFieldLength(maxFieldLength);
 



Mime
View raw message