lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r921485 - in /lucene/java/trunk/src: java/org/apache/lucene/index/IndexWriterConfig.java test/org/apache/lucene/index/TestIndexWriterConfig.java
Date Wed, 10 Mar 2010 17:55:53 GMT
Author: mikemccand
Date: Wed Mar 10 17:55:53 2010
New Revision: 921485

URL: http://svn.apache.org/viewvc?rev=921485&view=rev
Log:
LUCENE-2294: cutover to IndexWriterConfig object for settings to IW (add missing files)

Added:
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriterConfig.java   (with props)
    lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterConfig.java   (with
props)

Added: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=921485&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriterConfig.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriterConfig.java Wed Mar 10 17:55:53
2010
@@ -0,0 +1,518 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.index.DocumentsWriter.IndexingChain;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.util.Version;
+
+/**
+ * Holds all the configuration of {@link IndexWriter}. This object is only used
+ * while constructing a new IndexWriter. Those settings cannot be changed
+ * afterwards, except instantiating a new IndexWriter.
+ * <p>
+ * All setter methods return {@link IndexWriterConfig} to allow chaining
+ * settings conveniently. Thus someone can do:
+ * 
+ * <pre>
+ * IndexWriterConfig conf = new IndexWriterConfig(analyzer);
+ * conf.setter1().setter2();
+ * </pre>
+ * 
+ * @since 3.1
+ */
+public final class IndexWriterConfig implements Cloneable {
+
+  public static final int UNLIMITED_FIELD_LENGTH = Integer.MAX_VALUE;
+
+  /**
+   * Specifies the open mode for {@link IndexWriter}:
+   * <ul>
+   * {@link #CREATE} - creates a new index or overwrites an existing one.
+   * {@link #CREATE_OR_APPEND} - creates a new index if one does not exist,
+   * otherwise it opens the index and documents will be appended.
+   * {@link #APPEND} - opens an existing index.
+   * </ul>
+   */
+  public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND }
+  
+  /** Default value is 128. Change using {@link #setTermIndexInterval(int)}. */
+  public static final int DEFAULT_TERM_INDEX_INTERVAL = 128;
+
+  /** Denotes a flush trigger is disabled. */
+  public final static int DISABLE_AUTO_FLUSH = -1;
+
+  /** Disabled by default (because IndexWriter flushes by RAM usage by default). */
+  public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;
+
+  /** Disabled by default (because IndexWriter flushes by RAM usage by default). */
+  public final static int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;
+
+  /**
+   * Default value is 16 MB (which means flush when buffered docs consume
+   * approximately 16 MB RAM).
+   */
+  public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;
+
+  /**
+   * Default value for the write lock timeout (1,000 ms).
+   * 
+   * @see #setDefaultWriteLockTimeout(long)
+   */
+  public static long WRITE_LOCK_TIMEOUT = 1000;
+
+  /**
+   * Sets the default (for any instance) maximum time to wait for a write lock
+   * (in milliseconds).
+   */
+  public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
+    WRITE_LOCK_TIMEOUT = writeLockTimeout;
+  }
+
+  /**
+   * Returns the default write lock timeout for newly instantiated
+   * IndexWriterConfigs.
+   * 
+   * @see #setDefaultWriteLockTimeout(long)
+   */
+  public static long getDefaultWriteLockTimeout() {
+    return WRITE_LOCK_TIMEOUT;
+  }
+
+  private Analyzer analyzer;
+  private IndexDeletionPolicy delPolicy;
+  private IndexCommit commit;
+  private OpenMode openMode;
+  private int maxFieldLength;
+  private Similarity similarity;
+  private int termIndexInterval;
+  private MergeScheduler mergeScheduler;
+  private long writeLockTimeout;
+  private int maxBufferedDeleteTerms;
+  private double ramBufferSizeMB;
+  private int maxBufferedDocs;
+  private IndexingChain indexingChain;
+  
+  // required for clone
+  private Version matchVersion;
+
+  /**
+   * Creates a new config that with defaults that match the specified
+   * {@link Version}. {@link Version} is a placeholder for future changes. The
+   * default settings are relevant to 3.1 and before. In the future, if
+   * different settings will apply to different versions, they will be
+   * documented here.
+   */
+  public IndexWriterConfig(Version matchVersion) {
+    this.matchVersion = matchVersion;
+    analyzer = new WhitespaceAnalyzer(matchVersion);
+    delPolicy = new KeepOnlyLastCommitDeletionPolicy();
+    commit = null;
+    openMode = OpenMode.CREATE_OR_APPEND;
+    maxFieldLength = UNLIMITED_FIELD_LENGTH;
+    similarity = Similarity.getDefault();
+    termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
+    mergeScheduler = new ConcurrentMergeScheduler();
+    writeLockTimeout = WRITE_LOCK_TIMEOUT;
+    maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
+    ramBufferSizeMB = DEFAULT_RAM_BUFFER_SIZE_MB;
+    maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS;
+    indexingChain = DocumentsWriter.defaultIndexingChain;
+  }
+  
+  @Override
+  public Object clone() {
+    // Shallow clone is the only thing that's possible, since parameters like
+    // analyzer, index commit etc. do not implemnt Cloneable.
+    try {
+      return super.clone();
+    } catch (CloneNotSupportedException e) {
+      // should not happen
+      throw new RuntimeException(e);
+    }
+  }
+
+  /**
+   * Sets the default {@link Analyzer} to be used when indexing documents. The
+   * default {@link WhitespaceAnalyzer} is set for convenience (e.g. for test
+   * purposes or when the analyzer used does not make a difference) and it's
+   * recommended to override the default setting if you care about the tokens
+   * that end up in your index.
+   * <p>
+   * <b>NOTE:</b> the analyzer cannot be null. If <code>null</code>
is passed,
+   * the analyzer will be set to the default.
+   */
+  public IndexWriterConfig setAnalyzer(Analyzer analyzer) {
+    this.analyzer = analyzer == null ? new WhitespaceAnalyzer(matchVersion) : analyzer;
+    return this;
+  }
+
+  /** Returns the default analyzer to use for indexing documents. */
+  public Analyzer getAnalyzer() {
+    return analyzer;
+  }
+
+  /** Specifies {@link OpenMode} of that index. */
+  public IndexWriterConfig setOpenMode(OpenMode openMode) {
+    this.openMode = openMode;
+    return this;
+  }
+  
+  /** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */
+  public OpenMode getOpenMode() {
+    return openMode;
+  }
+
+  /**
+   * Expert: allows an optional {@link IndexDeletionPolicy} implementation to be
+   * specified. You can use this to control when prior commits are deleted from
+   * the index. The default policy is {@link KeepOnlyLastCommitDeletionPolicy}
+   * which removes all prior commits as soon as a new commit is done (this
+   * matches behavior before 2.2). Creating your own policy can allow you to
+   * explicitly keep previous "point in time" commits alive in the index for
+   * some time, to allow readers to refresh to the new commit without having the
+   * old commit deleted out from under them. This is necessary on filesystems
+   * like NFS that do not support "delete on last close" semantics, which
+   * Lucene's "point in time" search normally relies on.
+   * <p>
+   * <b>NOTE:</b> the deletion policy cannot be null. If <code>null</code>
is
+   * passed, the deletion policy will be set to the default.
+   */
+  public IndexWriterConfig setIndexDeletionPolicy(IndexDeletionPolicy delPolicy) {
+    this.delPolicy = delPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : delPolicy;
+    return this;
+  }
+
+  /**
+   * Returns the {@link IndexDeletionPolicy} specified in
+   * {@link #setIndexDeletionPolicy(IndexDeletionPolicy)} or the default
+   * {@link KeepOnlyLastCommitDeletionPolicy}/
+   */
+  public IndexDeletionPolicy getIndexDeletionPolicy() {
+    return delPolicy;
+  }
+
+  /**
+   * The maximum number of terms that will be indexed for a single field in a
+   * document. This limits the amount of memory required for indexing, so that
+   * collections with very large files will not crash the indexing process by
+   * running out of memory. This setting refers to the number of running terms,
+   * not to the number of different terms.
+   * <p>
+   * <b>NOTE:</b> this silently truncates large documents, excluding from the
+   * index all terms that occur further in the document. If you know your source
+   * documents are large, be sure to set this value high enough to accomodate
+   * the expected size. If you set it to {@link #UNLIMITED_FIELD_LENGTH}, then
+   * the only limit is your memory, but you should anticipate an
+   * OutOfMemoryError.
+   * <p>
+   * By default it is set to {@link #UNLIMITED_FIELD_LENGTH}.
+   */
+  public IndexWriterConfig setMaxFieldLength(int maxFieldLength) {
+    this.maxFieldLength = maxFieldLength;
+    return this;
+  }
+
+  /**
+   * Returns the maximum number of terms that will be indexed for a single field
+   * in a document.
+   * 
+   * @see #setMaxFieldLength(int)
+   */
+  public int getMaxFieldLength() {
+    return maxFieldLength;
+  }
+
+  /**
+   * Expert: allows to open a certain commit point. The default is null which
+   * opens the latest commit point.
+   */
+  public IndexWriterConfig setIndexCommit(IndexCommit commit) {
+    this.commit = commit;
+    return this;
+  }
+
+  /**
+   * Returns the {@link IndexCommit} as specified in
+   * {@link #setIndexCommit(IndexCommit)} or the default, <code>null</code>
+   * which specifies to open the latest index commit point.
+   */
+  public IndexCommit getIndexCommit() {
+    return commit;
+  }
+
+  /**
+   * Expert: set the {@link Similarity} implementation used by this IndexWriter.
+   * <p>
+   * <b>NOTE:</b> the similarity cannot be null. If <code>null</code>
is passed,
+   * the similarity will be set to the default.
+   * 
+   * @see Similarity#setDefault(Similarity)
+   */
+  public IndexWriterConfig setSimilarity(Similarity similarity) {
+    this.similarity = similarity == null ? Similarity.getDefault() : similarity;
+    return this;
+  }
+
+  /**
+   * Expert: returns the {@link Similarity} implementation used by this
+   * IndexWriter. This defaults to the current value of
+   * {@link Similarity#getDefault()}.
+   */
+  public Similarity getSimilarity() {
+    return similarity;
+  }
+  
+  /**
+   * Expert: set the interval between indexed terms. Large values cause less
+   * memory to be used by IndexReader, but slow random-access to terms. Small
+   * values cause more memory to be used by an IndexReader, and speed
+   * random-access to terms.
+   * <p>
+   * This parameter determines the amount of computation required per query
+   * term, regardless of the number of documents that contain that term. In
+   * particular, it is the maximum number of other terms that must be scanned
+   * before a term is located and its frequency and position information may be
+   * processed. In a large index with user-entered query terms, query processing
+   * time is likely to be dominated not by term lookup but rather by the
+   * processing of frequency and positional data. In a small index or when many
+   * uncommon query terms are generated (e.g., by wildcard queries) term lookup
+   * may become a dominant cost.
+   * <p>
+   * In particular, <code>numUniqueTerms/interval</code> terms are read into
+   * memory by an IndexReader, and, on average, <code>interval/2</code> terms
+   * must be scanned for each random term access.
+   * 
+   * @see #DEFAULT_TERM_INDEX_INTERVAL
+   */
+  public IndexWriterConfig setTermIndexInterval(int interval) {
+    this.termIndexInterval = interval;
+    return this;
+  }
+
+  /**
+   * Returns the interval between indexed terms.
+   * 
+   * @see #setTermIndexInterval(int)
+   */
+  public int getTermIndexInterval() {
+    return termIndexInterval;
+  }
+
+  /**
+   * Expert: sets the merge scheduler used by this writer. The default is
+   * {@link ConcurrentMergeScheduler}.
+   * <p>
+   * <b>NOTE:</b> the merge scheduler cannot be null. If <code>null</code>
is
+   * passed, the merge scheduler will be set to the default.
+   */
+  public IndexWriterConfig setMergeScheduler(MergeScheduler mergeScheduler) {
+    this.mergeScheduler = mergeScheduler == null ? new ConcurrentMergeScheduler() : mergeScheduler;
+    return this;
+  }
+
+  /**
+   * Returns the {@link MergeScheduler} that was set by
+   * {@link #setMergeScheduler(MergeScheduler)}
+   */
+  public MergeScheduler getMergeScheduler() {
+    return mergeScheduler;
+  }
+
+  /**
+   * Sets the maximum time to wait for a write lock (in milliseconds) for this
+   * instance. You can change the default value for all instances by calling
+   * {@link #setDefaultWriteLockTimeout(long)}.
+   */
+  public IndexWriterConfig setWriteLockTimeout(long writeLockTimeout) {
+    this.writeLockTimeout = writeLockTimeout;
+    return this;
+  }
+  
+  /**
+   * Returns allowed timeout when acquiring the write lock.
+   * 
+   * @see #setWriteLockTimeout(long)
+   */
+  public long getWriteLockTimeout() {
+    return writeLockTimeout;
+  }
+
+  /**
+   * Determines the minimal number of delete terms required before the buffered
+   * in-memory delete terms are applied and flushed. If there are documents
+   * buffered in memory at the time, they are merged and a new segment is
+   * created.
+
+   * <p>Disabled by default (writer flushes by RAM usage).
+   * 
+   * @throws IllegalArgumentException if maxBufferedDeleteTerms
+   * is enabled but smaller than 1
+   * @see #setRAMBufferSizeMB
+   */
+  public IndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
+    if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH
+        && maxBufferedDeleteTerms < 1)
+      throw new IllegalArgumentException(
+          "maxBufferedDeleteTerms must at least be 1 when enabled");
+    this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
+    return this;
+  }
+
+  /**
+   * Returns the number of buffered deleted terms that will trigger a flush if
+   * enabled.
+   * 
+   * @see #setMaxBufferedDeleteTerms(int)
+   */
+  public int getMaxBufferedDeleteTerms() {
+    return maxBufferedDeleteTerms;
+  }
+
+  /**
+   * Determines the amount of RAM that may be used for buffering added documents
+   * and deletions before they are flushed to the Directory. Generally for
+   * faster indexing performance it's best to flush by RAM usage instead of
+   * document count and use as large a RAM buffer as you can.
+   * 
+   * <p>
+   * When this is set, the writer will flush whenever buffered documents and
+   * deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
+   * triggering a flush due to RAM usage. Note that if flushing by document
+   * count is also enabled, then the flush will be triggered by whichever comes
+   * first.
+   * 
+   * <p>
+   * <b>NOTE</b>: the account of RAM usage for pending deletions is only
+   * approximate. Specifically, if you delete by Query, Lucene currently has no
+   * way to measure the RAM usage of individual Queries so the accounting will
+   * under-estimate and you should compensate by either calling commit()
+   * periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)}
+   * to flush by count instead of RAM usage (each buffered delete Query counts 
+   * as one).
+   * 
+   * <p>
+   * <b>NOTE</b>: because IndexWriter uses <code>int</code>s when
managing its
+   * internal storage, the absolute maximum value for this setting is somewhat
+   * less than 2048 MB. The precise limit depends on various factors, such as
+   * how large your documents are, how many fields have norms, etc., so it's
+   * best to set this value comfortably under 2048.
+   * 
+   * <p>
+   * The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.
+   * 
+   * @throws IllegalArgumentException
+   *           if ramBufferSize is enabled but non-positive, or it disables
+   *           ramBufferSize when maxBufferedDocs is already disabled
+   */
+  public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) {
+    if (ramBufferSizeMB > 2048.0) {
+      throw new IllegalArgumentException("ramBufferSize " + ramBufferSizeMB
+          + " is too large; should be comfortably less than 2048");
+    }
+    if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0)
+      throw new IllegalArgumentException(
+          "ramBufferSize should be > 0.0 MB when enabled");
+    if (ramBufferSizeMB == DISABLE_AUTO_FLUSH && maxBufferedDocs == DISABLE_AUTO_FLUSH)
+      throw new IllegalArgumentException(
+          "at least one of ramBufferSize and maxBufferedDocs must be enabled");
+    this.ramBufferSizeMB = ramBufferSizeMB;
+    return this;
+  }
+
+  /** Returns the value set by {@link #setRAMBufferSizeMB(double)} if enabled. */
+  public double getRAMBufferSizeMB() {
+    return ramBufferSizeMB;
+  }
+
+  /**
+   * Determines the minimal number of documents required before the buffered
+   * in-memory documents are flushed as a new Segment. Large values generally
+   * give faster indexing.
+   * 
+   * <p>
+   * When this is set, the writer will flush every maxBufferedDocs added
+   * documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a
+   * flush due to number of buffered documents. Note that if flushing by RAM
+   * usage is also enabled, then the flush will be triggered by whichever comes
+   * first.
+   * 
+   * <p>
+   * Disabled by default (writer flushes by RAM usage).
+   * 
+   * @see #setRAMBufferSizeMB(double)
+   * 
+   * @throws IllegalArgumentException
+   *           if maxBufferedDocs is enabled but smaller than 2, or it disables
+   *           maxBufferedDocs when ramBufferSize is already disabled
+   */
+  public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) {
+    if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
+      throw new IllegalArgumentException(
+          "maxBufferedDocs must at least be 2 when enabled");
+    if (maxBufferedDocs == DISABLE_AUTO_FLUSH
+        && ramBufferSizeMB == DISABLE_AUTO_FLUSH)
+      throw new IllegalArgumentException(
+          "at least one of ramBufferSize and maxBufferedDocs must be enabled");
+    this.maxBufferedDocs = maxBufferedDocs;
+    return this;
+  }
+
+  /**
+   * Returns the number of buffered added documents that will trigger a flush if
+   * enabled.
+   * 
+   * @see #setMaxBufferedDocs(int)
+   */
+  public int getMaxBufferedDocs() {
+    return maxBufferedDocs;
+  }
+
+  /** Expert: sets the {@link DocConsumer} chain to be used to process documents. */
+  IndexWriterConfig setIndexingChain(IndexingChain indexingChain) {
+    this.indexingChain = indexingChain == null ? DocumentsWriter.defaultIndexingChain : indexingChain;
+    return this;
+  }
+  
+  /** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */
+  IndexingChain getIndexingChain() {
+    return indexingChain;
+  }
+  
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("matchVersion=").append(matchVersion).append("\n");
+    sb.append("analyzer=").append(analyzer.getClass().getName()).append("\n");
+    sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
+    sb.append("commit=").append(commit == null ? "null" : commit.getClass().getName()).append("\n");
+    sb.append("openMode=").append(openMode).append("\n");
+    sb.append("maxFieldLength=").append(maxFieldLength).append("\n");
+    sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
+    sb.append("termIndexInterval=").append(termIndexInterval).append("\n");
+    sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
+    sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n");
+    sb.append("writeLockTimeout=").append(writeLockTimeout).append("\n");
+    sb.append("maxBufferedDeleteTerms=").append(maxBufferedDeleteTerms).append("\n");
+    sb.append("ramBufferSizeMB=").append(ramBufferSizeMB).append("\n");
+    sb.append("maxBufferedDocs=").append(maxBufferedDocs).append("\n");
+    return sb.toString();
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriterConfig.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterConfig.java?rev=921485&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterConfig.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterConfig.java Wed Mar
10 17:55:53 2010
@@ -0,0 +1,263 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.junit.Assert.*;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.index.DocumentsWriter.IndexingChain;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCaseJ4;
+import org.junit.Test;
+
+public class TestIndexWriterConfig extends LuceneTestCaseJ4 {
+
+  private static final class MySimilarity extends DefaultSimilarity {
+    // Does not implement anything - used only for type checking on IndexWriterConfig.
+  }
+  
+  private static final class MyIndexingChain extends IndexingChain {
+    // Does not implement anything - used only for type checking on IndexWriterConfig.
+
+    @Override
+    DocConsumer getChain(DocumentsWriter documentsWriter) {
+      return null;
+    }
+    
+  }
+  
+  @Test
+  public void testDefaults() throws Exception {
+    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT);
+    assertEquals(WhitespaceAnalyzer.class, conf.getAnalyzer().getClass());
+    assertNull(conf.getIndexCommit());
+    assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
+    assertEquals(IndexWriterConfig.UNLIMITED_FIELD_LENGTH, conf.getMaxFieldLength());
+    assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
+    assertEquals(OpenMode.CREATE_OR_APPEND, conf.getOpenMode());
+    assertTrue(Similarity.getDefault() == conf.getSimilarity());
+    assertEquals(IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, conf.getTermIndexInterval());
+    assertEquals(IndexWriterConfig.getDefaultWriteLockTimeout(), conf.getWriteLockTimeout());
+    assertEquals(IndexWriterConfig.WRITE_LOCK_TIMEOUT, IndexWriterConfig.getDefaultWriteLockTimeout());
+    assertEquals(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS, conf.getMaxBufferedDeleteTerms());
+    assertEquals(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, conf.getRAMBufferSizeMB(),
0.0);
+    assertEquals(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS, conf.getMaxBufferedDocs());
+    assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain());
+    
+    // Sanity check - validate that all getters are covered.
+    Set<String> getters = new HashSet<String>();
+    getters.add("getAnalyzer");
+    getters.add("getIndexCommit");
+    getters.add("getIndexDeletionPolicy");
+    getters.add("getMaxFieldLength");
+    getters.add("getMergeScheduler");
+    getters.add("getOpenMode");
+    getters.add("getSimilarity");
+    getters.add("getTermIndexInterval");
+    getters.add("getWriteLockTimeout");
+    getters.add("getDefaultWriteLockTimeout");
+    getters.add("getMaxBufferedDeleteTerms");
+    getters.add("getRAMBufferSizeMB");
+    getters.add("getMaxBufferedDocs");
+    getters.add("getIndexingChain");
+    for (Method m : IndexWriterConfig.class.getDeclaredMethods()) {
+      if (m.getDeclaringClass() == IndexWriterConfig.class && m.getName().startsWith("get"))
{
+        assertTrue("method " + m.getName() + " is not tested for defaults", getters.contains(m.getName()));
+      }
+    }
+  }
+
+  @Test
+  public void testSettersChaining() throws Exception {
+    // Ensures that every setter returns IndexWriterConfig to enable easy
+    // chaining.
+    for (Method m : IndexWriterConfig.class.getDeclaredMethods()) {
+      if (m.getDeclaringClass() == IndexWriterConfig.class
+          && m.getName().startsWith("set")
+          && !Modifier.isStatic(m.getModifiers())) {
+        assertEquals("method " + m.getName() + " does not return IndexWriterConfig", 
+            IndexWriterConfig.class, m.getReturnType());
+      }
+    }
+  }
+  
+  @Test
+  public void testConstants() throws Exception {
+    // Tests that the values of the constants does not change
+    assertEquals(1000, IndexWriterConfig.WRITE_LOCK_TIMEOUT);
+    assertEquals(128, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL);
+    assertEquals(Integer.MAX_VALUE, IndexWriterConfig.UNLIMITED_FIELD_LENGTH);
+    assertEquals(-1, IndexWriterConfig.DISABLE_AUTO_FLUSH);
+    assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS);
+    assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);
+    assertEquals(16.0, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, 0.0);
+  }
+  
+  @Test
+  public void testToString() throws Exception {
+    String str = new IndexWriterConfig(TEST_VERSION_CURRENT).toString();
+    for (Field f : IndexWriterConfig.class.getDeclaredFields()) {
+      int modifiers = f.getModifiers();
+      if (Modifier.isStatic(modifiers) && Modifier.isFinal(modifiers)) {
+        // Skip static final fields, they are only constants
+        continue;
+      } else if ("indexingChain".equals(f.getName())) {
+        // indexingChain is a package-private setting and thus is not output by
+        // toString.
+        continue;
+      }
+      assertTrue(f.getName() + " not found in toString", str.indexOf(f.getName()) != -1);
+    }
+  }
+  
+  @Test
+  public void testClone() throws Exception {
+    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT);
+    IndexWriterConfig clone = (IndexWriterConfig) conf.clone();
+    
+    // Clone is shallow since not all parameters are cloneable.
+    assertTrue(conf.getIndexDeletionPolicy() == clone.getIndexDeletionPolicy());
+    
+    conf.setMergeScheduler(new SerialMergeScheduler());
+    assertEquals(ConcurrentMergeScheduler.class, clone.getMergeScheduler().getClass());
+  }
+
+  @Test
+  public void testInvalidValues() throws Exception {
+    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT);
+    
+    // Test Analyzer
+    assertEquals(WhitespaceAnalyzer.class, conf.getAnalyzer().getClass());
+    conf.setAnalyzer(new SimpleAnalyzer(TEST_VERSION_CURRENT));
+    assertEquals(SimpleAnalyzer.class, conf.getAnalyzer().getClass());
+    conf.setAnalyzer(null);
+    assertEquals(WhitespaceAnalyzer.class, conf.getAnalyzer().getClass());
+    
+    // Test IndexDeletionPolicy
+    assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
+    conf.setIndexDeletionPolicy(new SnapshotDeletionPolicy(null));
+    assertEquals(SnapshotDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
+    conf.setIndexDeletionPolicy(null);
+    assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
+    
+    // Test MergeScheduler
+    assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
+    conf.setMergeScheduler(new SerialMergeScheduler());
+    assertEquals(SerialMergeScheduler.class, conf.getMergeScheduler().getClass());
+    conf.setMergeScheduler(null);
+    assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
+
+    // Test Similarity
+    assertTrue(Similarity.getDefault() == conf.getSimilarity());
+    conf.setSimilarity(new MySimilarity());
+    assertEquals(MySimilarity.class, conf.getSimilarity().getClass());
+    conf.setSimilarity(null);
+    assertTrue(Similarity.getDefault() == conf.getSimilarity());
+
+    // Test IndexingChain
+    assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain());
+    conf.setIndexingChain(new MyIndexingChain());
+    assertEquals(MyIndexingChain.class, conf.getIndexingChain().getClass());
+    conf.setIndexingChain(null);
+    assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain());
+    
+    try {
+      conf.setMaxBufferedDeleteTerms(0);
+      fail("should not have succeeded to set maxBufferedDeleteTerms to 0");
+    } catch (IllegalArgumentException e) {
+      // this is expected
+    }
+
+    try {
+      conf.setMaxBufferedDocs(1);
+      fail("should not have succeeded to set maxBufferedDocs to 1");
+    } catch (IllegalArgumentException e) {
+      // this is expected
+    }
+
+    try {
+      // Disable both MAX_BUF_DOCS and RAM_SIZE_MB
+      conf.setMaxBufferedDocs(4);
+      conf.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
+      conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
+      fail("should not have succeeded to disable maxBufferedDocs when ramBufferSizeMB is
disabled as well");
+    } catch (IllegalArgumentException e) {
+      // this is expected
+    }
+
+    conf.setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
+    conf.setMaxBufferedDocs(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);
+    try {
+      conf.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
+      fail("should not have succeeded to disable ramBufferSizeMB when maxBufferedDocs is
disabled as well");
+    } catch (IllegalArgumentException e) {
+      // this is expected
+    }
+    
+  }
+
+  /**
+   * @deprecated should be removed once all the deprecated setters are removed
+   *             from IndexWriter.
+   */
+  @Test
+  public void testIndexWriterSetters() throws Exception {
+    // This test intentionally tests deprecated methods. The purpose is to pass
+    // whatever the user set on IW to IWC, so that if the user calls
+    // iw.getConfig().getXYZ(), he'll get the same value he passed to
+    // iw.setXYZ().
+    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT);
+    Directory dir = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(dir, conf);
+
+    writer.setSimilarity(new MySimilarity());
+    assertEquals(MySimilarity.class, writer.getConfig().getSimilarity().getClass());
+
+    writer.setMaxBufferedDeleteTerms(4);
+    assertEquals(4, writer.getConfig().getMaxBufferedDeleteTerms());
+
+    writer.setMaxBufferedDocs(10);
+    assertEquals(10, writer.getConfig().getMaxBufferedDocs());
+
+    writer.setMaxFieldLength(10);
+    assertEquals(10, writer.getConfig().getMaxFieldLength());
+    
+    writer.setMergeScheduler(new SerialMergeScheduler());
+    assertEquals(SerialMergeScheduler.class, writer.getConfig().getMergeScheduler().getClass());
+    
+    writer.setRAMBufferSizeMB(1.5);
+    assertEquals(1.5, writer.getConfig().getRAMBufferSizeMB(), 0.0);
+    
+    writer.setTermIndexInterval(40);
+    assertEquals(40, writer.getConfig().getTermIndexInterval());
+    
+    writer.setWriteLockTimeout(100);
+    assertEquals(100, writer.getConfig().getWriteLockTimeout());
+  }
+}

Propchange: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message