lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dwe...@apache.org
Subject svn commit: r1130976 - /lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/fst/Builder.java
Date Fri, 03 Jun 2011 10:39:34 GMT
Author: dweiss
Date: Fri Jun  3 10:39:33 2011
New Revision: 1130976

URL: http://svn.apache.org/viewvc?rev=1130976&view=rev
Log:
Adding more JavaDoc to FST Builder because the arguments are rather cryptic right now.

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/fst/Builder.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/fst/Builder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/fst/Builder.java?rev=1130976&r1=1130975&r2=1130976&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/fst/Builder.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/fst/Builder.java Fri Jun  3 10:39:33
2011
@@ -21,6 +21,7 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.fst.FST.INPUT_TYPE;
 
 import java.io.IOException;
 
@@ -69,6 +70,42 @@ public class Builder<T> {
   // current "frontier"
   private UnCompiledNode<T>[] frontier;
 
+  /**
+   * Instantiates an FST/FSA builder without any pruning. A shortcut
+   * to {@link #Builder(INPUT_TYPE, int, int, boolean, Outputs)} with 
+   * pruning options turned off.
+   */
+  public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs)
+  {
+      this(inputType, 0, 0, true, outputs);
+  }
+
+  /**
+   * Instantiates an FST/FSA builder with all the possible tuning and construction
+   * tweaks. Read parameter documentation carefully.
+   * 
+   * @param inputType 
+   *    The input type (transition labels). Can be anything from {@link INPUT_TYPE}
+   *    enumeration. Shorter types will consume less memory. Strings (character sequences)
are 
+   *    represented as {@link INPUT_TYPE#BYTE4} (full unicode codepoints). 
+   *     
+   * @param minSuffixCount1
+   *    If pruning the input graph during construction, this threshold is used for telling
+   *    if a node is kept or pruned. If transition_count(node) &gt;= minSuffixCount1,
the node
+   *    is kept. 
+   *    
+   * @param minSuffixCount2
+   *    (Note: only Mike McCandless knows what this one is really doing...) 
+   * 
+   * @param doMinSuffix 
+   *    If <code>true</code>, the shared suffixes will be compacted into unique
paths.
+   *    This requires an additional hash map for lookups in memory. Setting this parameter
to
+   *    <code>false</code> creates a single path for all input sequences. This
will result in a larger
+   *    graph, but may require less memory and will speed up construction.  
+   * @param outputs The output type for each input sequence. Applies only if building an
FST. For
+   *    FSA, use {@link NoOutputs#getSingleton()} and {@link NoOutputs#getNoOutput()} as
the
+   *    singleton output object.
+   */
   public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean
doMinSuffix, Outputs<T> outputs) {
     this.minSuffixCount1 = minSuffixCount1;
     this.minSuffixCount2 = minSuffixCount2;



Mime
View raw message