lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From markrmil...@apache.org
Subject svn commit: r886210 [2/4] - in /lucene/java/branches/flex_1458: ./ contrib/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/bg/ contrib/analyzers/common/src/java/org/apache/l...
Date Wed, 02 Dec 2009 17:27:08 GMT
Modified: lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java Wed Dec  2 17:26:34 2009
@@ -25,6 +25,7 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.util.Version;
 
 /**
  * Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be
@@ -35,44 +36,77 @@
  * @see <a href="http://fr.wikipedia.org/wiki/%C3%89lision">Elision in Wikipedia</a>
  */
 public final class ElisionFilter extends TokenFilter {
-  private CharArraySet articles = null;
-  private TermAttribute termAtt;
+  private CharArraySet articles = CharArraySet.EMPTY_SET;
+  private final TermAttribute termAtt;
+  private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
+      new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(
+          "l", "m", "t", "qu", "n", "s", "j"), true));
   
-  private static char[] apostrophes = {'\'', '’'};
+  private static char[] apostrophes = {'\'', '\u2019'};
+  
+  /**
+   * Set the stopword articles
+   * @param matchVersion the lucene backwards compatibility version
+   * @param articles a set of articles
+   * @deprecated use {@link #ElisionFilter(Version, TokenStream, Set)} instead
+   */
+  public void setArticles(Version matchVersion, Set<?> articles) {
+    this.articles = CharArraySet.unmodifiableSet(
+        CharArraySet.copy(matchVersion, articles));
+  }
 
+  /**
+   * Set the stopword articles
+   * @param articles a set of articles
+   * @deprecated use {@link #setArticles(Version, Set)} instead
+   */
   public void setArticles(Set<?> articles) {
-    if (articles instanceof CharArraySet)
-      this.articles = (CharArraySet) articles;
-    else
-      this.articles = new CharArraySet(articles, true);
+    setArticles(Version.LUCENE_CURRENT, articles);
+  }
+  /**
+   * Constructs an elision filter with standard stop words
+   */
+  protected ElisionFilter(Version matchVersion, TokenStream input) {
+    this(matchVersion, input, DEFAULT_ARTICLES);
   }
 
   /**
    * Constructs an elision filter with standard stop words
+   * @deprecated use {@link #ElisionFilter(Version, TokenStream)} instead
    */
   protected ElisionFilter(TokenStream input) {
-    super(input);
-    this.articles = new CharArraySet(Arrays.asList(
-        "l", "m", "t", "qu", "n", "s", "j"), true);
-    termAtt = addAttribute(TermAttribute.class);
+    this(Version.LUCENE_30, input);
   }
 
   /**
    * Constructs an elision filter with a Set of stop words
+   * @deprecated use {@link #ElisionFilter(Version, TokenStream, Set)} instead
    */
   public ElisionFilter(TokenStream input, Set<?> articles) {
+    this(Version.LUCENE_30, input, articles);
+  }
+  
+  /**
+   * Constructs an elision filter with a Set of stop words
+   * @param matchVersion the lucene backwards compatibility version
+   * @param input the source {@link TokenStream}
+   * @param articles a set of stopword articles
+   */
+  public ElisionFilter(Version matchVersion, TokenStream input, Set<?> articles) {
     super(input);
-    setArticles(articles);
+    this.articles = CharArraySet.unmodifiableSet(
+        new CharArraySet(matchVersion, articles, true));
     termAtt = addAttribute(TermAttribute.class);
   }
 
   /**
    * Constructs an elision filter with an array of stop words
+   * @deprecated use {@link #ElisionFilter(Version, TokenStream, Set)} instead
    */
   public ElisionFilter(TokenStream input, String[] articles) {
-    super(input);
-    this.articles = new CharArraySet(Arrays.asList(articles), true);
-    termAtt = addAttribute(TermAttribute.class);
+    this(Version.LUCENE_CURRENT, input,
+        new CharArraySet(Version.LUCENE_CURRENT,
+            Arrays.asList(articles), true));
   }
 
   /**

Modified: lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java Wed Dec  2 17:26:34 2009
@@ -34,6 +34,7 @@
 import java.io.Reader;
 import java.util.Arrays;
 import java.util.HashSet;
+import java.util.Collections;
 import java.util.Map;
 import java.util.Set;
 
@@ -98,7 +99,7 @@
    * Contains words that should be indexed but not stemmed.
    */
   //TODO make this final in 3.0
-  private Set<?> excltable = new HashSet();
+  private Set<?> excltable = Collections.<Object>emptySet();
 
   private final Version matchVersion;
   
@@ -112,7 +113,7 @@
   
   private static class DefaultSetHolder {
     static final Set<?> DEFAULT_STOP_SET = CharArraySet
-        .unmodifiableSet(new CharArraySet(Arrays.asList(FRENCH_STOP_WORDS),
+        .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(FRENCH_STOP_WORDS),
             false));
   }
 
@@ -148,9 +149,10 @@
   public FrenchAnalyzer(Version matchVersion, Set<?> stopwords,
       Set<?> stemExclutionSet) {
     this.matchVersion = matchVersion;
-    this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+    this.stoptable = CharArraySet.unmodifiableSet(CharArraySet
+        .copy(matchVersion, stopwords));
     this.excltable = CharArraySet.unmodifiableSet(CharArraySet
-        .copy(stemExclutionSet));
+        .copy(matchVersion, stemExclutionSet));
   }
  
 
@@ -159,7 +161,7 @@
    * @deprecated use {@link #FrenchAnalyzer(Version, Set)} instead
    */
   public FrenchAnalyzer(Version matchVersion, String... stopwords) {
-    this(matchVersion, StopFilter.makeStopSet(stopwords));
+    this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
   }
 
   /**
@@ -176,7 +178,7 @@
    * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(String... exclusionlist) {
-    excltable = StopFilter.makeStopSet(exclusionlist);
+    excltable = StopFilter.makeStopSet(matchVersion, exclusionlist);
     setPreviousTokenStream(null); // force a new stemmer to be created
   }
 
@@ -184,8 +186,8 @@
    * Builds an exclusionlist from a Map.
    * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
    */
-  public void setStemExclusionTable(Map exclusionlist) {
-    excltable = new HashSet(exclusionlist.keySet());
+  public void setStemExclusionTable(Map<?,?> exclusionlist) {
+    excltable = new HashSet<Object>(exclusionlist.keySet());
     setPreviousTokenStream(null); // force a new stemmer to be created
   }
 
@@ -195,7 +197,7 @@
    * @deprecated use {@link #FrenchAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(File exclusionlist) throws IOException {
-    excltable = new HashSet(WordlistLoader.getWordSet(exclusionlist));
+    excltable = new HashSet<Object>(WordlistLoader.getWordSet(exclusionlist));
     setPreviousTokenStream(null); // force a new stemmer to be created
   }
 
@@ -211,11 +213,10 @@
   public final TokenStream tokenStream(String fieldName, Reader reader) {
     TokenStream result = new StandardTokenizer(matchVersion, reader);
     result = new StandardFilter(result);
-    result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                            result, stoptable);
+    result = new StopFilter(matchVersion, result, stoptable);
     result = new FrenchStemFilter(result, excltable);
     // Convert to lowercase after stemming!
-    result = new LowerCaseFilter(result);
+    result = new LowerCaseFilter(matchVersion, result);
     return result;
   }
   
@@ -240,11 +241,10 @@
       streams = new SavedStreams();
       streams.source = new StandardTokenizer(matchVersion, reader);
       streams.result = new StandardFilter(streams.source);
-      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                      streams.result, stoptable);
+      streams.result = new StopFilter(matchVersion, streams.result, stoptable);
       streams.result = new FrenchStemFilter(streams.result, excltable);
       // Convert to lowercase after stemming!
-      streams.result = new LowerCaseFilter(streams.result);
+      streams.result = new LowerCaseFilter(matchVersion, streams.result);
       setPreviousTokenStream(streams);
     } else {
       streams.source.reset(reader);

Modified: lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java Wed Dec  2 17:26:34 2009
@@ -73,7 +73,8 @@
   public static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
   
   private static final CharArraySet EXTENDED_ENGLISH_STOP_WORDS =
-    CharArraySet.unmodifiableSet(new CharArraySet(Arrays.asList(
+    CharArraySet.unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, 
+        Arrays.asList(
       "a", "about", "above", "across", "adj", "after", "afterwards",
       "again", "against", "albeit", "all", "almost", "alone", "along",
       "already", "also", "although", "always", "among", "amongst", "an",
@@ -153,7 +154,7 @@
    *            if non-null, ignores all tokens that are contained in the
    *            given stop set (after previously having applied toLowerCase()
    *            if applicable). For example, created via
-   *            {@link StopFilter#makeStopSet(String[])}and/or
+   *            {@link StopFilter#makeStopSet(Version, String[])}and/or
    *            {@link org.apache.lucene.analysis.WordlistLoader}as in
    *            <code>WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")</code>
    *            or <a href="http://www.unine.ch/info/clef/">other stop words
@@ -199,7 +200,7 @@
     }
     else {
       stream = new PatternTokenizer(text, pattern, toLowerCase);
-      if (stopWords != null) stream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), stream, stopWords);
+      if (stopWords != null) stream = new StopFilter(matchVersion, stream, stopWords);
     }
     
     return stream;
@@ -387,12 +388,12 @@
     private int pos;
     private final boolean isLetter;
     private final boolean toLowerCase;
-    private final Set stopWords;
+    private final Set<?> stopWords;
     private static final Locale locale = Locale.getDefault();
     private TermAttribute termAtt = addAttribute(TermAttribute.class);
     private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
     
-    public FastStringTokenizer(String str, boolean isLetter, boolean toLowerCase, Set stopWords) {
+    public FastStringTokenizer(String str, boolean isLetter, boolean toLowerCase, Set<?> stopWords) {
       this.str = str;
       this.isLetter = isLetter;
       this.toLowerCase = toLowerCase;

Modified: lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java Wed Dec  2 17:26:34 2009
@@ -80,8 +80,8 @@
   
   private static class DefaultSetHolder {
     static final Set<?> DEFAULT_STOP_SET = CharArraySet
-        .unmodifiableSet(new CharArraySet(Arrays.asList(DUTCH_STOP_WORDS),
-            false));
+        .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, 
+            Arrays.asList(DUTCH_STOP_WORDS), false));
   }
 
 
@@ -116,8 +116,8 @@
   }
   
   public DutchAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionTable){
-    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
-    excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable));
+    stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
+    excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
     this.matchVersion = matchVersion;
     setOverridesTokenStreamMethod(DutchAnalyzer.class);
   }
@@ -130,7 +130,7 @@
    * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
    */
   public DutchAnalyzer(Version matchVersion, String... stopwords) {
-    this(matchVersion, StopFilter.makeStopSet(stopwords));
+    this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
   }
 
   /**
@@ -168,7 +168,7 @@
    * @deprecated use {@link #DutchAnalyzer(Version, Set, Set)} instead
    */
   public void setStemExclusionTable(String... exclusionlist) {
-    excltable = StopFilter.makeStopSet(exclusionlist);
+    excltable = StopFilter.makeStopSet(matchVersion, exclusionlist);
     setPreviousTokenStream(null); // force a new stemmer to be created
   }
 
@@ -222,8 +222,7 @@
   public TokenStream tokenStream(String fieldName, Reader reader) {
     TokenStream result = new StandardTokenizer(matchVersion, reader);
     result = new StandardFilter(result);
-    result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                            result, stoptable);
+    result = new StopFilter(matchVersion, result, stoptable);
     result = new DutchStemFilter(result, excltable, stemdict);
     return result;
   }
@@ -256,8 +255,7 @@
       streams = new SavedStreams();
       streams.source = new StandardTokenizer(matchVersion, reader);
       streams.result = new StandardFilter(streams.source);
-      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                      streams.result, stoptable);
+      streams.result = new StopFilter(matchVersion, streams.result, stoptable);
       streams.result = new DutchStemFilter(streams.result, excltable, stemdict);
       setPreviousTokenStream(streams);
     } else {

Modified: lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java Wed Dec  2 17:26:34 2009
@@ -179,8 +179,7 @@
     }
     HashSet<String> stopWords = stopWordsPerField.get(fieldName);
     if (stopWords != null) {
-      result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                              result, stopWords);
+      result = new StopFilter(matchVersion, result, stopWords);
     }
     return result;
   }
@@ -223,8 +222,7 @@
       /* if there are any stopwords for the field, save the stopfilter */
       HashSet<String> stopWords = stopWordsPerField.get(fieldName);
       if (stopWords != null)
-        streams.withStopFilter = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                                streams.wrapped, stopWords);
+        streams.withStopFilter = new StopFilter(matchVersion, streams.wrapped, stopWords);
       else
         streams.withStopFilter = streams.wrapped;
 
@@ -245,8 +243,7 @@
         streams.wrapped = result;
         HashSet<String> stopWords = stopWordsPerField.get(fieldName);
         if (stopWords != null)
-          streams.withStopFilter = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                                  streams.wrapped, stopWords);
+          streams.withStopFilter = new StopFilter(matchVersion, streams.wrapped, stopWords);
         else
           streams.withStopFilter = streams.wrapped;
       }

Modified: lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java Wed Dec  2 17:26:34 2009
@@ -20,6 +20,7 @@
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.util.Version;
 
 import java.io.IOException;
 
@@ -31,11 +32,19 @@
  * "&#x5C;u0001yrtnuoc". This is useful when implementing efficient leading
  * wildcards search.
  * </p>
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating ReverseStringFilter, or when using any of
+ * its static methods:
+ * <ul>
+ *   <li> As of 3.1, supplementary characters are handled correctly
+ * </ul>
  */
 public final class ReverseStringFilter extends TokenFilter {
 
   private TermAttribute termAtt;
   private final char marker;
+  private final Version matchVersion;
   private static final char NOMARKER = '\uFFFF';
   
   /**
@@ -66,11 +75,13 @@
    * </p>
    * 
    * @param in {@link TokenStream} to filter
+   * @deprecated use {@link #ReverseStringFilter(Version, TokenStream)} 
+   *    instead. This constructor will be removed in Lucene 4.0
    */
   public ReverseStringFilter(TokenStream in) {
     this(in, NOMARKER);
   }
-
+  
   /**
    * Create a new ReverseStringFilter that reverses and marks all tokens in the
    * supplied {@link TokenStream}.
@@ -81,9 +92,42 @@
    * 
    * @param in {@link TokenStream} to filter
    * @param marker A character used to mark reversed tokens
+   * @deprecated use {@link #ReverseStringFilter(Version, TokenStream, char)} 
+   *    instead. This constructor will be removed in Lucene 4.0 
    */
   public ReverseStringFilter(TokenStream in, char marker) {
+    this(Version.LUCENE_30, in, marker);
+  }
+  
+  /**
+   * Create a new ReverseStringFilter that reverses all tokens in the 
+   * supplied {@link TokenStream}.
+   * <p>
+   * The reversed tokens will not be marked. 
+   * </p>
+   * 
+   * @param matchVersion See <a href="#version">above</a>
+   * @param in {@link TokenStream} to filter
+   */
+  public ReverseStringFilter(Version matchVersion, TokenStream in) {
+    this(matchVersion, in, NOMARKER);
+  }
+
+  /**
+   * Create a new ReverseStringFilter that reverses and marks all tokens in the
+   * supplied {@link TokenStream}.
+   * <p>
+   * The reversed tokens will be prepended (marked) by the <code>marker</code>
+   * character.
+   * </p>
+   * 
+   * @param matchVersion See <a href="#version">above</a>
+   * @param in {@link TokenStream} to filter
+   * @param marker A character used to mark reversed tokens
+   */
+  public ReverseStringFilter(Version matchVersion, TokenStream in, char marker) {
     super(in);
+    this.matchVersion = matchVersion;
     this.marker = marker;
     termAtt = addAttribute(TermAttribute.class);
   }
@@ -97,7 +141,7 @@
         termAtt.resizeTermBuffer(len);
         termAtt.termBuffer()[len - 1] = marker;
       }
-      reverse( termAtt.termBuffer(), len );
+      reverse( matchVersion, termAtt.termBuffer(), 0, len );
       termAtt.setTermLength(len);
       return true;
     } else {
@@ -105,21 +149,94 @@
     }
   }
 
+  /**
+   * Reverses the given input string
+   * 
+   * @param input the string to reverse
+   * @return the given input string in reversed order
+   * @deprecated use {@link #reverse(Version, String)} instead. This method 
+   *    will be removed in Lucene 4.0
+   */
   public static String reverse( final String input ){
-    char[] charInput = input.toCharArray();
-    reverse( charInput );
+    return reverse(Version.LUCENE_30, input);
+  }
+  
+  /**
+   * Reverses the given input string
+   * 
+   * @param matchVersion See <a href="#version">above</a>
+   * @param input the string to reverse
+   * @return the given input string in reversed order
+   */
+  public static String reverse( Version matchVersion, final String input ){
+    final char[] charInput = input.toCharArray();
+    reverse( matchVersion, charInput, 0, charInput.length );
     return new String( charInput );
   }
   
-  public static void reverse( char[] buffer ){
-    reverse( buffer, buffer.length );
+  /**
+   * Reverses the given input buffer in-place
+   * @param buffer the input char array to reverse
+   * @deprecated use {@link #reverse(Version, char[])} instead. This 
+   *    method will be removed in Lucene 4.0
+   */
+  public static void reverse( final char[] buffer ){
+    reverse( buffer, 0, buffer.length );
   }
   
-  public static void reverse( char[] buffer, int len ){
+  /**
+   * Reverses the given input buffer in-place
+   * @param matchVersion See <a href="#version">above</a>
+   * @param buffer the input char array to reverse
+   */
+  public static void reverse(Version matchVersion, final char[] buffer) {
+    reverse(matchVersion, buffer, 0, buffer.length);
+  }
+  
+  /**
+   * Partially reverses the given input buffer in-place from offset 0
+   * up to the given length.
+   * @param buffer the input char array to reverse
+   * @param len the length in the buffer up to where the
+   *        buffer should be reversed
+   * @deprecated use {@link #reverse(Version, char[], int)} instead. This 
+   *    method will be removed in Lucene 4.0
+   */
+  public static void reverse( final char[] buffer, final int len ){
     reverse( buffer, 0, len );
   }
   
-  public static void reverse( char[] buffer, int start, int len ){
+  /**
+   * Partially reverses the given input buffer in-place from offset 0
+   * up to the given length.
+   * @param matchVersion See <a href="#version">above</a>
+   * @param buffer the input char array to reverse
+   * @param len the length in the buffer up to where the
+   *        buffer should be reversed
+   */
+  public static void reverse(Version matchVersion, final char[] buffer,
+      final int len) {
+    reverse( matchVersion, buffer, 0, len );
+  }
+  
+  /**
+   * Partially reverses the given input buffer in-place from the given offset
+   * up to the given length.
+   * @param buffer the input char array to reverse
+   * @param start the offset from where to reverse the buffer
+   * @param len the length in the buffer up to where the
+   *        buffer should be reversed
+   * @deprecated use {@link #reverse(Version, char[], int, int)} instead. This 
+   *    method will be removed in Lucene 4.0
+   */
+  public static void reverse(char[] buffer, int start, int len ) {
+    reverseUnicode3(buffer, start, len);
+  }
+  
+  /**
+   * @deprecated Remove this when support for 3.0 indexes is no longer needed.
+   */
+  private static void reverseUnicode3( char[] buffer, int start, int len ){
     if( len <= 1 ) return;
     int num = len>>1;
     for( int i = start; i < ( start + num ); i++ ){
@@ -128,4 +245,77 @@
       buffer[start * 2 + len - i - 1] = c;
     }
   }
+  
+  /**
+   * Partially reverses the given input buffer in-place from the given offset
+   * up to the given length.
+   * @param matchVersion See <a href="#version">above</a>
+   * @param buffer the input char array to reverse
+   * @param start the offset from where to reverse the buffer
+   * @param len the length in the buffer up to where the
+   *        buffer should be reversed
+   */
+  public static void reverse(Version matchVersion, final char[] buffer,
+      final int start, final int len) {
+    if (!matchVersion.onOrAfter(Version.LUCENE_31)) {
+      reverseUnicode3(buffer, start, len);
+      return;
+    }
+    /* modified version of Apache Harmony AbstractStringBuilder reverse0() */
+    if (len < 2)
+      return;
+    int end = (start + len) - 1;
+    char frontHigh = buffer[start];
+    char endLow = buffer[end];
+    boolean allowFrontSur = true, allowEndSur = true;
+    final int mid = start + (len >> 1);
+    for (int i = start; i < mid; ++i, --end) {
+      final char frontLow = buffer[i + 1];
+      final char endHigh = buffer[end - 1];
+      final boolean surAtFront = allowFrontSur
+          && Character.isSurrogatePair(frontHigh, frontLow);
+      if (surAtFront && (len < 3)) {
+        // nothing to do since surAtFront is allowed and 1 char left
+        return;
+      }
+      final boolean surAtEnd = allowEndSur
+          && Character.isSurrogatePair(endHigh, endLow);
+      allowFrontSur = allowEndSur = true;
+      if (surAtFront == surAtEnd) {
+        if (surAtFront) {
+          // both surrogates
+          buffer[end] = frontLow;
+          buffer[--end] = frontHigh;
+          buffer[i] = endHigh;
+          buffer[++i] = endLow;
+          frontHigh = buffer[i + 1];
+          endLow = buffer[end - 1];
+        } else {
+          // neither surrogates
+          buffer[end] = frontHigh;
+          buffer[i] = endLow;
+          frontHigh = frontLow;
+          endLow = endHigh;
+        }
+      } else {
+        if (surAtFront) {
+          // surrogate only at the front
+          buffer[end] = frontLow;
+          buffer[i] = endLow;
+          endLow = endHigh;
+          allowFrontSur = false;
+        } else {
+          // surrogate only at the end
+          buffer[end] = frontHigh;
+          buffer[i] = endHigh;
+          frontHigh = frontLow;
+          allowEndSur = false;
+        }
+      }
+    }
+    if ((len & 0x01) == 1 && !(allowFrontSur && allowEndSur)) {
+      // only if odd length
+      buffer[end] = allowFrontSur ? endLow : frontHigh;
+    }
+  }
 }

Modified: lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java Wed Dec  2 17:26:34 2009
@@ -59,8 +59,8 @@
     
     private static class DefaultSetHolder {
       static final Set<?> DEFAULT_STOP_SET = CharArraySet
-          .unmodifiableSet(new CharArraySet(Arrays.asList(RUSSIAN_STOP_WORDS),
-              false));
+          .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, 
+              Arrays.asList(RUSSIAN_STOP_WORDS), false));
     }
 
     /**
@@ -79,7 +79,7 @@
      * @deprecated use {@link #RussianAnalyzer(Version, Set)} instead
      */
     public RussianAnalyzer(Version matchVersion, String... stopwords) {
-      this(matchVersion, StopFilter.makeStopSet(stopwords));
+      this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
     }
     
     /**
@@ -91,7 +91,7 @@
      *          a stopword set
      */
     public RussianAnalyzer(Version matchVersion, Set<?> stopwords){
-      stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
+      stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
       this.matchVersion = matchVersion;
     }
    
@@ -111,16 +111,15 @@
      *
      * @return  A {@link TokenStream} built from a 
      *   {@link RussianLetterTokenizer} filtered with 
-     *   {@link RussianLowerCaseFilter}, {@link StopFilter}, 
+     *   {@link LowerCaseFilter}, {@link StopFilter}, 
      *   and {@link RussianStemFilter}
      */
     @Override
     public TokenStream tokenStream(String fieldName, Reader reader)
     {
         TokenStream result = new RussianLetterTokenizer(reader);
-        result = new LowerCaseFilter(result);
-        result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                result, stopSet);
+        result = new LowerCaseFilter(matchVersion, result);
+        result = new StopFilter(matchVersion, result, stopSet);
         result = new RussianStemFilter(result);
         return result;
     }
@@ -136,7 +135,7 @@
      *
      * @return  A {@link TokenStream} built from a 
      *   {@link RussianLetterTokenizer} filtered with 
-     *   {@link RussianLowerCaseFilter}, {@link StopFilter}, 
+     *   {@link LowerCaseFilter}, {@link StopFilter}, 
      *   and {@link RussianStemFilter}
      */
     @Override
@@ -146,9 +145,8 @@
     if (streams == null) {
       streams = new SavedStreams();
       streams.source = new RussianLetterTokenizer(reader);
-      streams.result = new LowerCaseFilter(streams.source);
-      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                      streams.result, stopSet);
+      streams.result = new LowerCaseFilter(matchVersion, streams.source);
+      streams.result = new StopFilter(matchVersion, streams.result, stopSet);
       streams.result = new RussianStemFilter(streams.result);
       setPreviousTokenStream(streams);
     } else {

Modified: lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java Wed Dec  2 17:26:34 2009
@@ -48,8 +48,7 @@
     TokenStream ts = new StandardTokenizer(matchVersion, reader);
     ts = new StandardFilter(ts);
     ts = new ThaiWordFilter(ts);
-    ts = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                        ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+    ts = new StopFilter(matchVersion, ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
     return ts;
   }
   
@@ -73,8 +72,7 @@
       streams.source = new StandardTokenizer(matchVersion, reader);
       streams.result = new StandardFilter(streams.source);
       streams.result = new ThaiWordFilter(streams.result);
-      streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                      streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+      streams.result = new StopFilter(matchVersion, streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
       setPreviousTokenStream(streams);
     } else {
       streams.source.reset(reader);

Modified: lucene/java/branches/flex_1458/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java Wed Dec  2 17:26:34 2009
@@ -24,31 +24,50 @@
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.util.Version;
 
 /**
  * Test the CzechAnalyzer
  * 
- * CzechAnalyzer is like a StandardAnalyzer with a custom stopword list.
+ * Before Lucene 3.1, CzechAnalyzer was a StandardAnalyzer with a custom 
+ * stopword list. As of 3.1 it also includes a stemmer.
  *
  */
 public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
   File dataDir = new File(System.getProperty("dataDir", "./bin"));
   File customStopFile = new File(dataDir, "org/apache/lucene/analysis/cz/customStopWordFile.txt");
   
+  /**
+   * @deprecated Remove this test when support for 3.0 indexes is no longer needed.
+   */
+  public void testStopWordLegacy() throws Exception {
+    assertAnalyzesTo(new CzechAnalyzer(Version.LUCENE_30), "Pokud mluvime o volnem", 
+        new String[] { "mluvime", "volnem" });
+  }
+  
   public void testStopWord() throws Exception {
-    assertAnalyzesTo(new CzechAnalyzer(Version.LUCENE_CURRENT), "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" });
+    assertAnalyzesTo(new CzechAnalyzer(Version.LUCENE_CURRENT), "Pokud mluvime o volnem", 
+        new String[] { "mluvim", "voln" });
   }
-    
-  public void testReusableTokenStream() throws Exception {
-    Analyzer analyzer = new CzechAnalyzer(Version.LUCENE_CURRENT);
+  
+  /**
+   * @deprecated Remove this test when support for 3.0 indexes is no longer needed.
+   */
+  public void testReusableTokenStreamLegacy() throws Exception {
+    Analyzer analyzer = new CzechAnalyzer(Version.LUCENE_30);
     assertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" });
     assertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česká", "republika" });
   }
+  
+  public void testReusableTokenStream() throws Exception {
+    Analyzer analyzer = new CzechAnalyzer(Version.LUCENE_CURRENT);
+    assertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvim", "voln" });
+    assertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česk", "republik" });
+  }
 
-  /*
+  /**
    * An input stream that always throws IOException for testing.
+   * @deprecated Remove this class when the loadStopWords method is removed.
    */
   private class UnreliableInputStream extends InputStream {
     @Override
@@ -57,24 +76,26 @@
     }
   }
   
-  /*
+  /**
    * The loadStopWords method does not throw IOException on error,
    * instead previously it set the stoptable to null (versus empty)
    * this would cause a NPE when it is time to create the StopFilter.
+   * @deprecated Remove this test when the loadStopWords method is removed.
    */
   public void testInvalidStopWordFile() throws Exception {
-    CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
+    CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_30);
     cz.loadStopWords(new UnreliableInputStream(), "UTF-8");
     assertAnalyzesTo(cz, "Pokud mluvime o volnem",
         new String[] { "pokud", "mluvime", "o", "volnem" });
   }
   
-  /* 
+  /** 
    * Test that changes to the stop table via loadStopWords are applied immediately
    * when using reusable token streams.
+   * @deprecated Remove this test when the loadStopWords method is removed.
    */
   public void testStopWordFileReuse() throws Exception {
-    CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
+    CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_30);
     assertAnalyzesToReuse(cz, "Česká Republika", 
       new String[] { "česká", "republika" });
     

Modified: lucene/java/branches/flex_1458/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestElision.java Wed Dec  2 17:26:34 2009
@@ -42,7 +42,7 @@
     Set articles = new HashSet();
     articles.add("l");
     articles.add("M");
-    TokenFilter filter = new ElisionFilter(tokenizer, articles);
+    TokenFilter filter = new ElisionFilter(Version.LUCENE_CURRENT, tokenizer, articles);
     List tas = filtre(filter);
     assertEquals("embrouille", tas.get(4));
     assertEquals("O'brian", tas.get(6));

Modified: lucene/java/branches/flex_1458/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java Wed Dec  2 17:26:34 2009
@@ -23,6 +23,7 @@
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
 
 public class TestReverseStringFilter extends BaseTokenStreamTestCase {
   public void testFilter() throws Exception {
@@ -73,4 +74,47 @@
     ReverseStringFilter.reverse( buffer, 2, 3 );
     assertEquals( "ABEDCF", new String( buffer ) );
   }
+  
+  /**
+   * Test the broken 3.0 behavior, for back compat
+   */
+  public void testBackCompat() throws Exception {
+    assertEquals("\uDF05\uD866\uDF05\uD866", ReverseStringFilter.reverse("𩬅𩬅"));
+  }
+  
+  public void testReverseSupplementary() throws Exception {
+    // supplementary at end
+    assertEquals("𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "瀛愯䇹鍟艱𩬅"));
+    // supplementary at end - 1
+    assertEquals("a𩬅艱鍟䇹愯瀛", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "瀛愯䇹鍟艱𩬅a"));
+    // supplementary at start
+    assertEquals("fedcba𩬅", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "𩬅abcdef"));
+    // supplementary at start + 1
+    assertEquals("fedcba𩬅z", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "z𩬅abcdef"));
+    // supplementary medial
+    assertEquals("gfe𩬅dcba", ReverseStringFilter.reverse(Version.LUCENE_CURRENT, "abcd𩬅efg"));
+  }
+
+  public void testReverseSupplementaryChar() throws Exception {
+    // supplementary at end
+    char[] buffer = "abc瀛愯䇹鍟艱𩬅".toCharArray();
+    ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 7);
+    assertEquals("abc𩬅艱鍟䇹愯瀛", new String(buffer));
+    // supplementary at end - 1
+    buffer = "abc瀛愯䇹鍟艱𩬅d".toCharArray();
+    ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 8);
+    assertEquals("abcd𩬅艱鍟䇹愯瀛", new String(buffer));
+    // supplementary at start
+    buffer = "abc𩬅瀛愯䇹鍟艱".toCharArray();
+    ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 7);
+    assertEquals("abc艱鍟䇹愯瀛𩬅", new String(buffer));
+    // supplementary at start + 1
+    buffer = "abcd𩬅瀛愯䇹鍟艱".toCharArray();
+    ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 8);
+    assertEquals("abc艱鍟䇹愯瀛𩬅d", new String(buffer));
+    // supplementary medial
+    buffer = "abc瀛愯𩬅def".toCharArray();
+    ReverseStringFilter.reverse(Version.LUCENE_CURRENT, buffer, 3, 7);
+    assertEquals("abcfed𩬅愯瀛", new String(buffer));
+  }
 }

Modified: lucene/java/branches/flex_1458/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java Wed Dec  2 17:26:34 2009
@@ -153,8 +153,7 @@
     // The porter stemming is too strict, this is not a bug, this is a feature:)
     result = new PorterStemFilter(result);
     if (!stopWords.isEmpty()) {
-      result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                              result, stopWords, false);
+      result = new StopFilter(matchVersion, result, stopWords, false);
     }
     return result;
   }
@@ -175,8 +174,7 @@
       streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream);
       streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream);
       if (!stopWords.isEmpty()) {
-        streams.filteredTokenStream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
-                                                     streams.filteredTokenStream, stopWords, false);
+        streams.filteredTokenStream = new StopFilter(matchVersion, streams.filteredTokenStream, stopWords, false);
       }
     } else {
       streams.tokenStream.reset(reader);

Modified: lucene/java/branches/flex_1458/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/CHANGES.txt?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/CHANGES.txt (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/CHANGES.txt Wed Dec  2 17:26:34 2009
@@ -3,6 +3,11 @@
 The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
 
 $Id:$
+11/17/2009
+  LUCENE-2079: Allow specifying delta thread priority after the "&";
+  added log.time.step.msec to print per-time-period counts; fixed
+  NearRealTimeTask to print reopen times (in msec) of each reopen, at
+  the end.  (Mike McCandless)
 
 11/08/2009
   LUCENE-2044: Added delete.percent.rand.seed to seed the Random instance

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java Wed Dec  2 17:26:34 2009
@@ -96,7 +96,6 @@
       System.out.println("------------> queries:");
       System.out.println(getQueryMaker(new SearchTask(this)).printQueries());
     }
-
   }
 
   // clean old stuff, reopen 

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/Points.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/Points.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/Points.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/Points.java Wed Dec  2 17:26:34 2009
@@ -38,6 +38,8 @@
 
   private int nextTaskRunNum = 0;
 
+  private TaskStats currentStats;
+
   /**
    * Create a Points statistics object. 
    */
@@ -62,9 +64,14 @@
    */
   public synchronized TaskStats markTaskStart (PerfTask task, int round) {
     TaskStats stats = new TaskStats(task, nextTaskRunNum(), round);
+    this.currentStats = stats;
     points.add(stats);
     return stats;
   }
+
+  public TaskStats getCurrentStats() {
+    return currentStats;
+  }
   
   // return next task num
   private synchronized int nextTaskRunNum() {

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/stats/TaskStats.java Wed Dec  2 17:26:34 2009
@@ -91,6 +91,22 @@
     this.numParallelTasks = numParallelTasks;
     this.count = count;
   }
+  
+  private int[] countsByTime;
+  private long countsByTimeStepMSec;
+
+  public void setCountsByTime(int[] counts, long msecStep) {
+    countsByTime = counts;
+    countsByTimeStepMSec = msecStep;
+  }
+
+  public int[] getCountsByTime() {
+    return countsByTime;
+  }
+
+  public long getCountsByTimeStepMSec() {
+    return countsByTimeStepMSec;
+  }
 
   /**
    * @return the taskRunNum.
@@ -174,6 +190,18 @@
     if (round != stat2.round) {
       round = -1; // no meaning if aggregating tasks of different round. 
     }
+
+    if (countsByTime != null && stat2.countsByTime != null) {
+      if (countsByTimeStepMSec != stat2.countsByTimeStepMSec) {
+        throw new IllegalStateException("different by-time msec step");
+      }
+      if (countsByTime.length != stat2.countsByTime.length) {
+        throw new IllegalStateException("different by-time msec count");
+      }
+      for(int i=0;i<stat2.countsByTime.length;i++) {
+        countsByTime[i] += stat2.countsByTime[i];
+      }
+    }
   }
 
   /* (non-Javadoc)
@@ -181,7 +209,11 @@
    */
   @Override
   public Object clone() throws CloneNotSupportedException {
-    return super.clone();
+    TaskStats c = (TaskStats) super.clone();
+    if (c.countsByTime != null) {
+      c.countsByTime = (int[]) c.countsByTime.clone();
+    }
+    return c;
   }
 
   /**

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java Wed Dec  2 17:26:34 2009
@@ -20,6 +20,7 @@
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.util.ArrayUtil;
 
 /**
  * Spawns a BG thread that periodically (defaults to 3.0
@@ -36,6 +37,9 @@
 
   long pauseMSec = 3000L;
 
+  int reopenCount;
+  int[] reopenTimes = new int[1];
+
   public NearRealtimeReaderTask(PerfRunData runData) {
     super(runData);
   }
@@ -65,22 +69,27 @@
     // stddev, min/max reopen latencies
 
     // Parent sequence sets stopNow
-    int reopenCount = 0;
+    reopenCount = 0;
     while(!stopNow) {
       long waitForMsec = (long) (pauseMSec - (System.currentTimeMillis() - t));
       if (waitForMsec > 0) {
         Thread.sleep(waitForMsec);
+        //System.out.println("NRT wait: " + waitForMsec + " msec");
       }
 
       t = System.currentTimeMillis();
       final IndexReader newReader = r.reopen();
       if (r != newReader) {
+        final int delay = (int) (System.currentTimeMillis()-t);
+        if (reopenTimes.length == reopenCount) {
+          reopenTimes = ArrayUtil.grow(reopenTimes, 1+reopenCount);
+        }
+        reopenTimes[reopenCount++] = delay;
         // TODO: somehow we need to enable warming, here
         runData.setIndexReader(newReader);
         // Transfer our reference to runData
         newReader.decRef();
         r = newReader;
-        reopenCount++;
       }
     }
 
@@ -94,6 +103,15 @@
   }
 
   @Override
+  public void close() {
+    System.out.println("NRT reopen times:");
+    for(int i=0;i<reopenCount;i++) {
+      System.out.print(" " + reopenTimes[i]);
+    }
+    System.out.println();
+  }
+
+  @Override
   public boolean supportsParams() {
     return true;
   }

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java Wed Dec  2 17:26:34 2009
@@ -61,6 +61,7 @@
   protected String params = null;
 
   private boolean runInBackground;
+  private int deltaPri;
 
   protected static final String NEW_LINE = System.getProperty("line.separator");
 
@@ -72,14 +73,19 @@
     }
   }
 
-  public void setRunInBackground() {
+  public void setRunInBackground(int deltaPri) {
     runInBackground = true;
+    this.deltaPri = deltaPri;
   }
 
   public boolean getRunInBackground() {
     return runInBackground;
   }
 
+  public int getBackgroundDeltaPriority() {
+    return deltaPri;
+  }
+
   protected volatile boolean stopNow;
 
   public void stopNow() {
@@ -216,6 +222,10 @@
     sb.append(getName());
     if (getRunInBackground()) {
       sb.append(" &");
+      int x = getBackgroundDeltaPriority();
+      if (x != 0) {
+        sb.append(x);
+      }
     }
     return sb.toString();
   }

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReportTask.java Wed Dec  2 17:26:34 2009
@@ -151,11 +151,26 @@
         line = line.replaceAll("   "," - ");
       }
       sb.append(line);
+      int[] byTime = stat.getCountsByTime();
+      if (byTime != null) {
+        sb.append(newline);
+        int end = -1;
+        for(int i=byTime.length-1;i>=0;i--) {
+          if (byTime[i] != 0) {
+            end = i;
+            break;
+          }
+        }
+        if (end != -1) {
+          sb.append("  by time:");
+          for(int i=0;i<end;i++) {
+            sb.append(' ').append(byTime[i]);
+          }
+        }
+      }
     }
+    
     String reptxt = (reported==0 ? "No Matching Entries Were Found!" : sb.toString());
     return new Report(reptxt,partOfTasks.size(),reported,totalSize);
   }
-
-
-
 }

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java Wed Dec  2 17:26:34 2009
@@ -23,6 +23,8 @@
 
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
+import org.apache.lucene.benchmark.byTask.stats.TaskStats;
+import org.apache.lucene.util.ArrayUtil;
 
 /**
  * Sequence of parallel or sequential tasks.
@@ -45,6 +47,7 @@
   
   private boolean fixedTime;                      // true if we run for fixed time
   private double runTimeSec;                      // how long to run for
+  private final long logByTimeMsec;
 
   public TaskSequence (PerfRunData runData, String name, TaskSequence parent, boolean parallel) {
     super(runData);
@@ -55,6 +58,7 @@
     this.parent = parent;
     this.parallel = parallel;
     tasks = new ArrayList<PerfTask>();
+    logByTimeMsec = runData.getConfig().get("report.time.step.msec", 0);
   }
 
   @Override
@@ -76,6 +80,9 @@
         anyExhaustibleTasks |= tasksArray[k] instanceof TaskSequence;
       }
     }
+    if (!parallel && logByTimeMsec != 0 && !letChildReport) {
+      countsByTime = new int[1];
+    }
   }
 
   /**
@@ -92,6 +99,8 @@
     return repetitions;
   }
 
+  private int[] countsByTime;
+
   public void setRunTime(double sec) throws Exception {
     runTimeSec = sec;
     fixedTime = true;
@@ -108,9 +117,6 @@
       if (isParallel()) {
         throw new Exception("REPEAT_EXHAUST is not allowed for parallel tasks");
       }
-      if (getRunData().getConfig().get("content.source.forever",true)) {
-        throw new Exception("REPEAT_EXHAUST requires setting content.source.forever=false");
-      }
     }
     setSequenceName();
   }
@@ -167,11 +173,10 @@
     initTasksArray();
     int count = 0;
 
-    final long t0 = System.currentTimeMillis();
-
     final long runTime = (long) (runTimeSec*1000);
     List<RunBackgroundTask> bgTasks = null;
 
+    final long t0 = System.currentTimeMillis();
     for (int k=0; fixedTime || (repetitions==REPEAT_EXHAUST && !exhausted) || k<repetitions; k++) {
       if (stopNow) {
         break;
@@ -183,11 +188,20 @@
             bgTasks = new ArrayList<RunBackgroundTask>();
           }
           RunBackgroundTask bgTask = new RunBackgroundTask(task, letChildReport);
+          bgTask.setPriority(getBackgroundDeltaPriority() + Thread.currentThread().getPriority());
           bgTask.start();
           bgTasks.add(bgTask);
         } else {
           try {
-            count += task.runAndMaybeStats(letChildReport);
+            final int inc = task.runAndMaybeStats(letChildReport);
+            count += inc;
+            if (countsByTime != null) {
+              final int slot = (int) ((System.currentTimeMillis()-t0)/logByTimeMsec);
+              if (slot >= countsByTime.length) {
+                countsByTime = ArrayUtil.grow(countsByTime, 1+slot);
+              }
+              countsByTime[slot] += inc;
+            }
             if (anyExhaustibleTasks)
               updateExhausted(task);
           } catch (NoMoreDataException e) {
@@ -210,6 +224,11 @@
         count += bgTask.getCount();
       }
     }
+
+    if (countsByTime != null) {
+      getRunData().getPoints().getCurrentStats().setCountsByTime(countsByTime, logByTimeMsec);
+    }
+
     return count;
   }
 
@@ -218,6 +237,7 @@
     long delayStep = (perMin ? 60000 : 1000) /rate;
     long nextStartTime = System.currentTimeMillis();
     int count = 0;
+    final long t0 = System.currentTimeMillis();
     for (int k=0; (repetitions==REPEAT_EXHAUST && !exhausted) || k<repetitions; k++) {
       if (stopNow) {
         break;
@@ -238,7 +258,16 @@
         }
         nextStartTime += delayStep; // this aims at avarage rate. 
         try {
-          count += task.runAndMaybeStats(letChildReport);
+          final int inc = task.runAndMaybeStats(letChildReport);
+          count += inc;
+          if (countsByTime != null) {
+            final int slot = (int) ((System.currentTimeMillis()-t0)/logByTimeMsec);
+            if (slot >= countsByTime.length) {
+              countsByTime = ArrayUtil.grow(countsByTime, 1+slot);
+            }
+            countsByTime[slot] += inc;
+          }
+
           if (anyExhaustibleTasks)
             updateExhausted(task);
         } catch (NoMoreDataException e) {
@@ -305,6 +334,9 @@
   ParallelTask[] runningParallelTasks;
 
   private int doParallelTasks() throws Exception {
+
+    final TaskStats stats = getRunData().getPoints().getCurrentStats();
+
     initTasksArray();
     ParallelTask t[] = runningParallelTasks = new ParallelTask[repetitions * tasks.size()];
     // prepare threads
@@ -323,6 +355,23 @@
     for (int i = 0; i < t.length; i++) {
       t[i].join();
       count += t[i].count;
+      if (t[i].task instanceof TaskSequence) {
+        TaskSequence sub = (TaskSequence) t[i].task;
+        if (sub.countsByTime != null) {
+          if (countsByTime == null) {
+            countsByTime = new int[sub.countsByTime.length];
+          } else if (countsByTime.length < sub.countsByTime.length) {
+            countsByTime = ArrayUtil.grow(countsByTime, sub.countsByTime.length);
+          }
+          for(int j=0;j<sub.countsByTime.length;j++) {
+            countsByTime[j] += sub.countsByTime[j];
+          }
+        }
+      }
+    }
+
+    if (countsByTime != null) {
+      stats.setCountsByTime(countsByTime, logByTimeMsec);
     }
 
     // return total count
@@ -386,6 +435,10 @@
     }
     if (getRunInBackground()) {
       sb.append(" &");
+      int x = getBackgroundDeltaPriority();
+      if (x != 0) {
+        sb.append(x);
+      }
     }
     return sb.toString();
   }

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java Wed Dec  2 17:26:34 2009
@@ -191,12 +191,22 @@
               if (currSequence.isParallel()) {
                 throw new Exception("Can only create background tasks within a serial task");
               }
+              stok.nextToken();
+              final int deltaPri;
+              if (stok.ttype != StreamTokenizer.TT_NUMBER) {
+                stok.pushBack();
+                deltaPri = 0;
+              } else {
+                // priority
+                deltaPri = (int) stok.nval;
+              }
+
               if (prevTask == null) {
                 throw new Exception("& was unexpected");
               } else if (prevTask.getRunInBackground()) {
                 throw new Exception("double & was unexpected");
               } else {
-                prevTask.setRunInBackground();
+                prevTask.setRunInBackground(deltaPri);
               }
               break;
     

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Wed Dec  2 17:26:34 2009
@@ -17,7 +17,6 @@
 
 package org.apache.lucene.benchmark.byTask;
 
-import java.io.IOException;
 import java.io.StringReader;
 import java.io.File;
 import java.io.FileReader;
@@ -25,9 +24,6 @@
 import java.util.List;
 import java.util.Iterator;
 
-import org.apache.lucene.benchmark.byTask.feeds.DocData;
-import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
-import org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource;
 import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
 import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
 import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask;
@@ -43,13 +39,12 @@
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.search.FieldCache.StringIndex;
 import org.apache.lucene.search.FieldCache;
-
-import junit.framework.TestCase;
+import org.apache.lucene.util.LuceneTestCase;
 
 /**
  * Test very simply that perf tasks - simple algorithms - are doing what they should.
  */
-public class TestPerfTasksLogic extends TestCase {
+public class TestPerfTasksLogic extends LuceneTestCase {
 
   private static final boolean DEBUG = false;
   static final String NEW_LINE = System.getProperty("line.separator");
@@ -107,13 +102,14 @@
    */
   public void xxxtestTimedSearchTask() throws Exception {
     String algLines[] = {
+        "log.step=100000",
         "ResetSystemErase",
         "CreateIndex",
-        "{ AddDoc } : 1000",
+        "{ AddDoc } : 100",
         "Optimize",
         "CloseIndex",
         "OpenReader",
-        "{ CountingSearchTest } : 1.5s",
+        "{ CountingSearchTest } : .5s",
         "CloseReader",
     };
 
@@ -124,15 +120,39 @@
     assertTrue("elapsed time was " + elapsed + " msec", elapsed <= 1500);
   }
 
+  public void testBGSearchTaskThreads() throws Exception {
+    String algLines[] = {
+        "log.time.step.msec = 100",
+        "log.step=100000",
+        "ResetSystemErase",
+        "CreateIndex",
+        "{ AddDoc } : 1000",
+        "Optimize",
+        "CloseIndex",
+        "OpenReader",
+        "{",
+        "  [ \"XSearch\" { CountingSearchTest > : * ] : 2 &-1",
+        "  Wait(0.5)",
+        "}",
+        "CloseReader",
+        "RepSumByPref X"
+    };
+
+    CountingSearchTestTask.numSearches = 0;
+    execBenchmark(algLines);
+    assertTrue(CountingSearchTestTask.numSearches > 0);
+  }
+
   public void testHighlighting() throws Exception {
     // 1. alg definition (required in every "logic" test)
     String algLines[] = {
         "doc.stored=true",
-        "content.source="+Reuters20ContentSource.class.getName(),
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
         "query.maker=" + ReutersQueryMaker.class.getName(),
         "ResetSystemErase",
         "CreateIndex",
-        "{ AddDoc } : 1000",
+        "{ AddDoc } : 100",
         "Optimize",
         "CloseIndex",
         "OpenReader(true)",
@@ -147,7 +167,7 @@
     Benchmark benchmark = execBenchmark(algLines);
 
     // 4. test specific checks after the benchmark run completed.
-    assertEquals("TestSearchTask was supposed to be called!",147,CountingHighlighterTestTask.numDocsRetrieved);
+    assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved);
     //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
     //we probably should use a different doc/query maker, but...
     assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
@@ -157,7 +177,7 @@
     IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false, IndexWriter.MaxFieldLength.LIMITED);
     iw.close();
     IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
-    assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
+    assertEquals("100 docs were added to the index, this is what we expect to find!",100,ir.numDocs());
     ir.close();
   }
 
@@ -166,7 +186,8 @@
     String algLines[] = {
         "doc.stored=true",//doc storage is required in order to have text to highlight
         "doc.term.vector.offsets=true",
-        "content.source="+Reuters20ContentSource.class.getName(),
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
         "query.maker=" + ReutersQueryMaker.class.getName(),
         "ResetSystemErase",
         "CreateIndex",
@@ -185,7 +206,7 @@
     Benchmark benchmark = execBenchmark(algLines);
 
     // 4. test specific checks after the benchmark run completed.
-    assertEquals("TestSearchTask was supposed to be called!",147,CountingHighlighterTestTask.numDocsRetrieved);
+    assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved);
     //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
     //we probably should use a different doc/query maker, but...
     assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
@@ -203,7 +224,8 @@
     // 1. alg definition (required in every "logic" test)
     String algLines[] = {
         "doc.stored=false",
-        "content.source="+Reuters20ContentSource.class.getName(),
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
         "query.maker=" + ReutersQueryMaker.class.getName(),
         "ResetSystemErase",
         "CreateIndex",
@@ -287,7 +309,7 @@
         "doc.index.props=true",
         "# ----- alg ",
         "CreateIndex",
-        "[ { AddDoc > : 2500 ] : 4",
+        "[ { AddDoc > : 250 ] : 4",
         "CloseIndex",
     };
     
@@ -300,8 +322,8 @@
     IndexReader r = IndexReader.open(benchmark.getRunData().getDirectory(), true);
     StringIndex idx = FieldCache.DEFAULT.getStringIndex(r, "country");
     final int maxDoc = r.maxDoc();
-    assertEquals(10000, maxDoc);
-    for(int i=0;i<10000;i++) {
+    assertEquals(1000, maxDoc);
+    for(int i=0;i<1000;i++) {
       assertNotNull("doc " + i + " has null country", idx.lookup[idx.order[i]]);
     }
     r.close();
@@ -314,7 +336,8 @@
     // 1. alg definition (required in every "logic" test)
     String algLines[] = {
         "# ----- properties ",
-        "content.source="+Reuters20ContentSource.class.getName(),
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
         "content.source.log.step=3",
         "doc.term.vector=false",
         "content.source.forever=false",
@@ -332,7 +355,7 @@
 
     // 3. test number of docs in the index
     IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
-    int ndocsExpected = 20; // Reuters20ContentSource exhausts after 20 docs.
+    int ndocsExpected = 20; // first 20 reuters docs.
     assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
     ir.close();
   }
@@ -344,13 +367,13 @@
     File lineFile = new File(System.getProperty("tempDir"), "test.reuters.lines.txt");
 
     // We will call WriteLineDocs this many times
-    final int NUM_TRY_DOCS = 500;
+    final int NUM_TRY_DOCS = 50;
 
-    // Creates a line file with first 500 docs from reuters
+    // Creates a line file with first 50 docs from SingleDocSource
     String algLines1[] = {
       "# ----- properties ",
-      "content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource",
-      "content.source.forever=false",
+      "content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource",
+      "content.source.forever=true",
       "line.file.out=" + lineFile.getAbsolutePath().replace('\\', '/'),
       "# ----- alg ",
       "{WriteLineDoc()}:" + NUM_TRY_DOCS,
@@ -359,15 +382,12 @@
     // Run algo
     Benchmark benchmark = execBenchmark(algLines1);
 
-    // Verify we got somewhere between 1-500 lines (some
-    // Reuters docs have no body, which WriteLineDoc task
-    // skips).
     BufferedReader r = new BufferedReader(new FileReader(lineFile));
     int numLines = 0;
     while(r.readLine() != null)
       numLines++;
     r.close();
-    assertTrue("did not see the right number of docs; should be > 0 and <= " + NUM_TRY_DOCS + " but was " + numLines, numLines > 0 && numLines <= NUM_TRY_DOCS);
+    assertEquals("did not see the right number of docs; should be " + NUM_TRY_DOCS + " but was " + numLines, NUM_TRY_DOCS, numLines);
     
     // Index the line docs
     String algLines2[] = {
@@ -405,14 +425,15 @@
   public void xxxtestReadTokens() throws Exception {
 
     // We will call ReadTokens on this many docs
-    final int NUM_DOCS = 100;
+    final int NUM_DOCS = 20;
 
     // Read tokens from first NUM_DOCS docs from Reuters and
     // then build index from the same docs
     String algLines1[] = {
       "# ----- properties ",
       "analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer",
-      "content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource",
+      "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+      "docs.file=" + getReuters20LinesFile(),
       "# ----- alg ",
       "{ReadTokens}: " + NUM_DOCS,
       "ResetSystemErase",
@@ -464,7 +485,8 @@
     // 1. alg definition (required in every "logic" test)
     String algLines[] = {
         "# ----- properties ",
-        "content.source="+Reuters20ContentSource.class.getName(),
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
         "content.source.log.step=3",
         "doc.term.vector=false",
         "content.source.forever=false",
@@ -485,7 +507,7 @@
 
     // 3. test number of docs in the index
     IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
-    int ndocsExpected = 2 * 20; // Reuters20ContentSource exhausts after 20 docs.
+    int ndocsExpected = 2 * 20; // first 20 reuters docs.
     assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
     ir.close();
   }
@@ -519,27 +541,6 @@
     System.out.println(txt);
   }
 
-  /** use reuters and the exhaust mechanism, but to be faster, add 20 docs only... */
-  public static class Reuters20ContentSource extends ReutersContentSource {
-    private int nDocs = 0;
-
-    @Override
-    public synchronized DocData getNextDocData(DocData docData)
-        throws NoMoreDataException, IOException {
-      if (nDocs >= 20 && !forever) {
-        throw new NoMoreDataException();
-      }
-      nDocs++;
-      return super.getNextDocData(docData);
-    }
-
-    @Override
-    public synchronized void resetInputs() throws IOException {
-      super.resetInputs();
-      nDocs = 0;
-    }
-  }
-  
   /**
    * Test that exhaust in loop works as expected (LUCENE-1115).
    */
@@ -547,7 +548,8 @@
     // 1. alg definition (required in every "logic" test)
     String algLines[] = {
         "# ----- properties ",
-        "content.source="+Reuters20ContentSource.class.getName(),
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
         "content.source.log.step=3",
         "doc.term.vector=false",
         "content.source.forever=false",
@@ -569,7 +571,7 @@
 
     // 3. test number of docs in the index
     IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
-    int ndocsExpected = 20; // Reuters20ContentSource exhausts after 20 docs.
+    int ndocsExpected = 20;  // first 20 reuters docs.
     assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
     ir.close();
   }
@@ -581,7 +583,8 @@
     // 1. alg definition (required in every "logic" test)
     String algLines[] = {
         "# ----- properties ",
-        "content.source="+Reuters20ContentSource.class.getName(),
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
         "ram.flush.mb=-1",
         "max.buffered=2",
         "content.source.log.step=3",
@@ -605,7 +608,7 @@
 
     // 3. test number of docs in the index
     IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
-    int ndocsExpected = 20; // Reuters20ContentSource exhausts after 20 docs.
+    int ndocsExpected = 20; // first 20 reuters docs.
     assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
     ir.close();
   }
@@ -625,7 +628,8 @@
     // 1. alg definition (required in every "logic" test)
     String algLines[] = {
         "# ----- properties ",
-        "content.source="+Reuters20ContentSource.class.getName(),
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
         "content.source.log.step=3",
         "doc.term.vector=false",
         "content.source.forever=false",
@@ -649,7 +653,7 @@
 
     // 3. test number of docs in the index
     IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
-    int ndocsExpected = 20; // Reuters20ContentSource exhausts after 20 docs.
+    int ndocsExpected = 20; // first 20 reuters docs.
     assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
     ir.close();
   }
@@ -668,7 +672,8 @@
     // 1. alg definition (required in every "logic" test)
     String algLines[] = {
         "# ----- properties ",
-        "content.source="+Reuters20ContentSource.class.getName(),
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
         "content.source.log.step=3",
         "ram.flush.mb=-1",
         "max.buffered=2",
@@ -694,7 +699,7 @@
     
     // 3. test number of docs in the index
     IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
-    int ndocsExpected = 20; // Reuters20ContentSource exhausts after 20 docs.
+    int ndocsExpected = 20; // first 20 reuters docs.
     assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
     ir.close();
   }
@@ -706,7 +711,8 @@
     // 1. alg definition (required in every "logic" test)
     String algLines[] = {
         "# ----- properties ",
-        "content.source="+Reuters20ContentSource.class.getName(),
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
         "content.source.log.step=3",
         "ram.flush.mb=-1",
         "max.buffered=2",
@@ -750,7 +756,8 @@
     // 1. alg definition (required in every "logic" test)
     String algLines[] = {
         "# ----- properties ",
-        "content.source="+Reuters20ContentSource.class.getName(),
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
         "content.source.log.step=3",
         "ram.flush.mb=-1",
         "max.buffered=3",
@@ -776,7 +783,7 @@
 
     // 3. test number of docs in the index
     IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
-    int ndocsExpected = 20; // Reuters20ContentSource exhausts after 20 docs.
+    int ndocsExpected = 20; // first 20 reuters docs.
     assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
     ir.close();
 
@@ -828,7 +835,8 @@
     String dis = disable ? "-" : "";
     return new String[] {
         "# ----- properties ",
-        "content.source="+Reuters20ContentSource.class.getName(),
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
         "content.source.log.step=30",
         "doc.term.vector=false",
         "content.source.forever=false",
@@ -846,5 +854,9 @@
         "RepSumByName",
     };
   }
-  
+
+  private static String getReuters20LinesFile() {
+    return System.getProperty("lucene.common.dir").replace('\\','/') +
+      "/contrib/benchmark/src/test/org/apache/lucene/benchmark/reuters.first20.lines.txt";
+  }  
 }

Propchange: lucene/java/branches/flex_1458/contrib/highlighter/src/test/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Dec  2 17:26:34 2009
@@ -2,4 +2,4 @@
 /lucene/java/branches/lucene_2_9/contrib/highlighter/src/test:817269-818600,825998,826775,829134,829816,829881,831036
 /lucene/java/branches/lucene_2_9_back_compat_tests/contrib/highlighter/src/test:818601-821336
 /lucene/java/branches/lucene_3_0/contrib/highlighter/src/test:880793
-/lucene/java/trunk/contrib/highlighter/src/test:829439-833960,880727-882265,883074
+/lucene/java/trunk/contrib/highlighter/src/test:829439-833960,880727-886190

Modified: lucene/java/branches/flex_1458/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (original)
+++ lucene/java/branches/flex_1458/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java Wed Dec  2 17:26:34 2009
@@ -43,7 +43,6 @@
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermVectorOffsetInfo;
-import org.apache.lucene.search.DefaultSimilarity;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.AttributeImpl;
@@ -202,9 +201,9 @@
       byte[] oldNorms = index.getNormsByFieldNameAndDocumentNumber().get(field);
       if (oldNorms != null) {
         System.arraycopy(oldNorms, 0, norms, 0, oldNorms.length);
-        Arrays.fill(norms, oldNorms.length, norms.length, DefaultSimilarity.encodeNorm(1.0f));
+        Arrays.fill(norms, oldNorms.length, norms.length, similarity.encodeNormValue(1.0f));
       } else {
-        Arrays.fill(norms, 0, norms.length, DefaultSimilarity.encodeNorm(1.0f));
+        Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f));
       }
       normsByFieldNameAndDocumentNumber.put(field, norms);
       fieldNames.remove(field);
@@ -212,7 +211,7 @@
     for (String field : fieldNames) {
       //System.out.println(field);
       byte[] norms = new byte[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()];
-      Arrays.fill(norms, 0, norms.length, DefaultSimilarity.encodeNorm(1.0f));
+      Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f));
       normsByFieldNameAndDocumentNumber.put(field, norms);
     }
     fieldNames.clear();
@@ -240,7 +239,7 @@
           float norm = eFieldTermDocInfoFactoriesByTermText.getKey().boost;
           norm *= document.getDocument().getBoost();
           norm *= similarity.lengthNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
-          normsByFieldNameAndDocumentNumber.get(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName)[document.getDocumentNumber()] = Similarity.encodeNorm(norm);
+          normsByFieldNameAndDocumentNumber.get(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName)[document.getDocumentNumber()] = similarity.encodeNormValue(norm);
         } else {
           System.currentTimeMillis();
         }

Modified: lucene/java/branches/flex_1458/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=886210&r1=886209&r2=886210&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/java/branches/flex_1458/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Wed Dec  2 17:26:34 2009
@@ -50,7 +50,7 @@
 import org.apache.lucene.search.Searcher;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.Similarity;
-import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.store.RAMDirectory; // for javadocs
 
 /**
  * High-performance single-document main memory Apache Lucene fulltext search index. 
@@ -1102,7 +1102,7 @@
         float boost = info != null ? info.getBoost() : 1.0f; 
         FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost);
         float n = sim.computeNorm(fieldName, invertState);
-        byte norm = Similarity.encodeNorm(n);
+        byte norm = sim.encodeNormValue(n);
         norms = new byte[] {norm};
         
         // cache it for future reuse



Mime
View raw message