lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r888891 [3/3] - in /lucene/java/branches/flex_1458: ./ src/java/org/apache/lucene/search/ src/java/org/apache/lucene/util/ src/java/org/apache/lucene/util/automaton/ src/test/org/apache/lucene/search/
Date Wed, 09 Dec 2009 17:45:59 GMT
Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/Transition.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/Transition.java?rev=888891&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/Transition.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/Transition.java Wed Dec  9 17:45:58 2009
@@ -0,0 +1,179 @@
+/*
+ * dk.brics.automaton
+ * 
+ * Copyright (c) 2001-2009 Anders Moeller
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.apache.lucene.util.automaton;
+
+import java.io.Serializable;
+
+/**
+ * <tt>Automaton</tt> transition.
+ * <p>
+ * A transition, which belongs to a source state, consists of a Unicode
+ * character interval and a destination state.
+ * 
+ * <p><font color="#FF0000">
+ * WARNING: The status of the <b>Automaton</b> feature is experimental.
+ * The APIs introduced here might change in the future and will not be
+ * supported anymore in such a case.</font>
+ */
+public class Transition implements Serializable, Cloneable {
+  
+  static final long serialVersionUID = 40001;
+  
+  /*
+   * CLASS INVARIANT: min<=max
+   */
+
+  char min;
+  char max;
+  
+  State to;
+  
+  /**
+   * Constructs a new singleton interval transition.
+   * 
+   * @param c transition character
+   * @param to destination state
+   */
+  public Transition(char c, State to) {
+    min = max = c;
+    this.to = to;
+  }
+  
+  /**
+   * Constructs a new transition. Both end points are included in the interval.
+   * 
+   * @param min transition interval minimum
+   * @param max transition interval maximum
+   * @param to destination state
+   */
+  public Transition(char min, char max, State to) {
+    if (max < min) {
+      char t = max;
+      max = min;
+      min = t;
+    }
+    this.min = min;
+    this.max = max;
+    this.to = to;
+  }
+  
+  /** Returns minimum of this transition interval. */
+  public char getMin() {
+    return min;
+  }
+  
+  /** Returns maximum of this transition interval. */
+  public char getMax() {
+    return max;
+  }
+  
+  /** Returns destination of this transition. */
+  public State getDest() {
+    return to;
+  }
+  
+  /**
+   * Checks for equality.
+   * 
+   * @param obj object to compare with
+   * @return true if <tt>obj</tt> is a transition with same character interval
+   *         and destination state as this transition.
+   */
+  @Override
+  public boolean equals(Object obj) {
+    if (obj instanceof Transition) {
+      Transition t = (Transition) obj;
+      return t.min == min && t.max == max && t.to == to;
+    } else return false;
+  }
+  
+  /**
+   * Returns hash code. The hash code is based on the character interval (not
+   * the destination state).
+   * 
+   * @return hash code
+   */
+  @Override
+  public int hashCode() {
+    return min * 2 + max * 3;
+  }
+  
+  /**
+   * Clones this transition.
+   * 
+   * @return clone with same character interval and destination state
+   */
+  @Override
+  public Transition clone() {
+    try {
+      return (Transition) super.clone();
+    } catch (CloneNotSupportedException e) {
+      throw new RuntimeException(e);
+    }
+  }
+  
+  static void appendCharString(char c, StringBuilder b) {
+    if (c >= 0x21 && c <= 0x7e && c != '\\' && c != '"') b.append(c);
+    else {
+      b.append("\\u");
+      String s = Integer.toHexString(c);
+      if (c < 0x10) b.append("000").append(s);
+      else if (c < 0x100) b.append("00").append(s);
+      else if (c < 0x1000) b.append("0").append(s);
+      else b.append(s);
+    }
+  }
+  
+  /**
+   * Returns a string describing this state. Normally invoked via
+   * {@link Automaton#toString()}.
+   */
+  @Override
+  public String toString() {
+    StringBuilder b = new StringBuilder();
+    appendCharString(min, b);
+    if (min != max) {
+      b.append("-");
+      appendCharString(max, b);
+    }
+    b.append(" -> ").append(to.number);
+    return b.toString();
+  }
+  
+  void appendDot(StringBuilder b) {
+    b.append(" -> ").append(to.number).append(" [label=\"");
+    appendCharString(min, b);
+    if (min != max) {
+      b.append("-");
+      appendCharString(max, b);
+    }
+    b.append("\"]\n");
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/Transition.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/TransitionComparator.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/TransitionComparator.java?rev=888891&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/TransitionComparator.java (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/TransitionComparator.java Wed Dec  9 17:45:58 2009
@@ -0,0 +1,80 @@
+/*
+ * dk.brics.automaton
+ * 
+ * Copyright (c) 2001-2009 Anders Moeller
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.apache.lucene.util.automaton;
+
+import java.io.Serializable;
+import java.util.Comparator;
+
+/**
+ * Comparator for state {@link Transition}s that orders unicode char range
+ * transitions in lexicographic order.
+ * 
+ * <p><font color="#FF0000">
+ * WARNING: The status of the <b>Automaton</b> feature is experimental.
+ * The APIs introduced here might change in the future and will not be
+ * supported anymore in such a case.</font>
+ */
+class TransitionComparator implements Comparator<Transition>, Serializable {
+  
+  static final long serialVersionUID = 10001;
+  
+  boolean to_first;
+  
+  TransitionComparator(boolean to_first) {
+    this.to_first = to_first;
+  }
+  
+  /**
+   * Compares by (min, reverse max, to) or (to, min, reverse max).
+   */
+  public int compare(Transition t1, Transition t2) {
+    if (to_first) {
+      if (t1.to != t2.to) {
+        if (t1.to == null) return -1;
+        else if (t2.to == null) return 1;
+        else if (t1.to.number < t2.to.number) return -1;
+        else if (t1.to.number > t2.to.number) return 1;
+      }
+    }
+    if (t1.min < t2.min) return -1;
+    if (t1.min > t2.min) return 1;
+    if (t1.max > t2.max) return -1;
+    if (t1.max < t2.max) return 1;
+    if (!to_first) {
+      if (t1.to != t2.to) {
+        if (t1.to == null) return -1;
+        else if (t2.to == null) return 1;
+        else if (t1.to.number < t2.to.number) return -1;
+        else if (t1.to.number > t2.to.number) return 1;
+      }
+    }
+    return 0;
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/TransitionComparator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/package.html
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/package.html?rev=888891&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/package.html (added)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/package.html Wed Dec  9 17:45:58 2009
@@ -0,0 +1,50 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ dk.brics.automaton
+ 
+ Copyright (c) 2001-2009 Anders Moeller
+ All rights reserved.
+ 
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+ 3. The name of the author may not be used to endorse or promote products
+    derived from this software without specific prior written permission.
+ 
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+<html>
+<body>
+Finite-state automaton for regular expressions.
+<p>
+This package contains a full DFA/NFA implementation with Unicode
+alphabet and support for all standard (and a number of non-standard)
+regular expression operations.
+<p>
+The most commonly used functionality is located in the classes
+<tt>{@link org.apache.lucene.util.automaton.Automaton}</tt> and
+<tt>{@link org.apache.lucene.util.automaton.RegExp}</tt>.
+<p>
+For more information, go to the package home page at 
+<tt><a href="http://www.brics.dk/automaton/" 
+target="_top">http://www.brics.dk/automaton/</a></tt>.
+<p><font color="#FF0000">
+WARNING: The status of the <b>Automaton</b> feature is experimental.
+The APIs introduced here might change in the future and will not be
+supported anymore in such a case.</font>
+</body>
+</html>

Propchange: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/automaton/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestAutomatonQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestAutomatonQuery.java?rev=888891&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestAutomatonQuery.java (added)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestAutomatonQuery.java Wed Dec  9 17:45:58 2009
@@ -0,0 +1,224 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collections;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.Version;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.BasicAutomata;
+import org.apache.lucene.util.automaton.BasicOperations;
+
+public class TestAutomatonQuery extends LuceneTestCase {
+  private IndexSearcher searcher;
+  
+  private final String FN = "field";
+  
+  public void setUp() throws Exception {
+    super.setUp();
+    RAMDirectory directory = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(
+        Version.LUCENE_CURRENT, Collections.emptySet()), true,
+        IndexWriter.MaxFieldLength.LIMITED);
+    Document doc = new Document();
+    Field titleField = new Field("title", "some title", Field.Store.NO,
+        Field.Index.ANALYZED);
+    Field field = new Field(FN, "this is document one 2345", Field.Store.NO,
+        Field.Index.ANALYZED);
+    Field footerField = new Field("footer", "a footer", Field.Store.NO,
+        Field.Index.ANALYZED);
+    doc.add(titleField);
+    doc.add(field);
+    doc.add(footerField);
+    writer.addDocument(doc);
+    field.setValue("some text from doc two, a short piece. 5678.91");
+    writer.addDocument(doc);
+    field.setValue("doc three has some different stuff"
+        + ": with numbers 1234 5678.9 and letter b");
+    writer.addDocument(doc);
+    writer.optimize();
+    writer.close();
+    searcher = new IndexSearcher(directory, true);
+  }
+  
+  public void tearDown() throws Exception {
+    searcher.close();
+    super.tearDown();
+  }
+  
+  private Term newTerm(String value) {
+    return new Term(FN, value);
+  }
+  
+  private int automatonQueryNrHits(AutomatonQuery query) throws IOException {
+    return searcher.search(query, 5).totalHits;
+  }
+  
+  private void assertAutomatonHits(int expected, Automaton automaton)
+      throws IOException {
+    AutomatonQuery query = new AutomatonQuery(newTerm("bogus"), automaton);
+    
+    query.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+    assertEquals(expected, automatonQueryNrHits(query));
+    
+    query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
+    assertEquals(expected, automatonQueryNrHits(query));
+    
+    query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+    assertEquals(expected, automatonQueryNrHits(query));
+    
+    query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+    assertEquals(expected, automatonQueryNrHits(query));
+  }
+  
+  /**
+   * Test some very simple automata.
+   */
+  public void testBasicAutomata() throws IOException {
+    assertAutomatonHits(0, BasicAutomata.makeEmpty());
+    assertAutomatonHits(0, BasicAutomata.makeEmptyString());
+    assertAutomatonHits(2, BasicAutomata.makeAnyChar());
+    assertAutomatonHits(3, BasicAutomata.makeAnyString());
+    assertAutomatonHits(2, BasicAutomata.makeString("doc"));
+    assertAutomatonHits(1, BasicAutomata.makeChar('a'));
+    assertAutomatonHits(2, BasicAutomata.makeCharRange('a', 'b'));
+    assertAutomatonHits(2, BasicAutomata.makeCharSet("ab"));
+    assertAutomatonHits(1, BasicAutomata.makeDecimalValue("5678.9"));
+    assertAutomatonHits(1, BasicAutomata.makeDecimalValue("2345"));
+    assertAutomatonHits(3, BasicAutomata.makeFractionDigits(3));
+    assertAutomatonHits(1, BasicAutomata.makeIntegerValue("1234"));
+    assertAutomatonHits(2, BasicAutomata.makeInterval(1233, 2346, 0));
+    assertAutomatonHits(1, BasicAutomata.makeInterval(0, 2000, 0));
+    assertAutomatonHits(2, BasicAutomata.makeMaxInteger("003000"));
+    assertAutomatonHits(1, BasicAutomata.makeMinInteger("002000"));
+    assertAutomatonHits(2, BasicAutomata.makeStringMatcher("ome"));
+    assertAutomatonHits(2, BasicAutomata.makeTotalDigits(5));
+    assertAutomatonHits(2, BasicOperations.union(BasicAutomata.makeChar('a'),
+        BasicAutomata.makeChar('b')));
+    assertAutomatonHits(0, BasicOperations.intersection(BasicAutomata
+        .makeChar('a'), BasicAutomata.makeChar('b')));
+    assertAutomatonHits(1, BasicOperations.minus(BasicAutomata
+        .makeMaxInteger("3000"), BasicAutomata.makeIntegerValue("1234")));
+  }
+  
+  /**
+   * Test that a nondeterministic automaton works correctly. (It should will be
+   * determinized)
+   */
+  public void testNFA() throws IOException {
+    // accept this or three, the union is an NFA (two transitions for 't' from
+    // initial state)
+    Automaton nfa = BasicOperations.union(BasicAutomata.makeString("this"),
+        BasicAutomata.makeString("three"));
+    assertAutomatonHits(2, nfa);
+  }
+  
+  public void testEquals() {
+    AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), BasicAutomata
+        .makeString("foobar"));
+    // reference to a1
+    AutomatonQuery a2 = a1;
+    // same as a1 (accepts the same language, same term)
+    AutomatonQuery a3 = new AutomatonQuery(newTerm("foobar"), BasicOperations
+        .concatenate(BasicAutomata.makeString("foo"), BasicAutomata
+            .makeString("bar")));
+    // different than a1 (same term, but different language)
+    AutomatonQuery a4 = new AutomatonQuery(newTerm("foobar"), BasicAutomata
+        .makeString("different"));
+    // different than a1 (different term, same language)
+    AutomatonQuery a5 = new AutomatonQuery(newTerm("blah"), BasicAutomata
+        .makeString("foobar"));
+    
+    assertEquals(a1, a2);
+    assertEquals(a1.hashCode(), a2.hashCode());
+    
+    assertEquals(a1, a3);
+    assertEquals(a1.hashCode(), a3.hashCode());
+    
+    assertEquals(a1.toString(), a3.toString());
+    
+    // different class
+    AutomatonQuery w1 = new WildcardQuery(newTerm("foobar"));
+    // different class
+    AutomatonQuery w2 = new RegexpQuery(newTerm("foobar"));
+    
+    assertFalse(a1.equals(w1));
+    assertFalse(a1.equals(w2));
+    assertFalse(w1.equals(w2));
+    assertFalse(a1.equals(a4));
+    assertFalse(a1.equals(a5));
+    assertFalse(a1.equals(null));
+  }
+  
+  /**
+   * Test that rewriting to a single term works as expected, preserves
+   * MultiTermQuery semantics.
+   */
+  public void testRewriteSingleTerm() throws IOException {
+    AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata
+        .makeString("piece"));
+    assertTrue(aq.getTermsEnum(searcher.getIndexReader()) instanceof SingleTermsEnum);
+    assertEquals(1, automatonQueryNrHits(aq));
+  }
+  
+  /**
+   * Test that rewriting to a prefix query works as expected, preserves
+   * MultiTermQuery semantics.
+   */
+  public void testRewritePrefix() throws IOException {
+    Automaton pfx = BasicAutomata.makeString("do");
+    pfx.expandSingleton(); // expand singleton representation for testing
+    Automaton prefixAutomaton = BasicOperations.concatenate(pfx, BasicAutomata
+        .makeAnyString());
+    AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
+    assertTrue(aq.getTermsEnum(searcher.getIndexReader()) instanceof PrefixTermsEnum);
+    assertEquals(3, automatonQueryNrHits(aq));
+  }
+  
+  /**
+   * Test that a badly-performing automaton that must visit all the terms does
+   * not use the smart enumeration, this will just waste cpu.
+   */
+  public void testLinearOptimization() throws IOException {
+    AutomatonQuery aq = new RegexpQuery(newTerm(".*ument"));
+    assertTrue(((AutomatonTermsEnum) aq.getTermsEnum(searcher.getIndexReader()))
+        .usesLinearMode());
+    assertEquals(1, automatonQueryNrHits(aq));
+  }
+  
+  /**
+   * Test that a badly-performing automaton that must visit all the terms does
+   * not use the smart enumeration, this will just waste cpu.
+   */
+  public void testEmptyOptimization() throws IOException {
+    AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata
+        .makeEmpty());
+    // not yet available: assertTrue(aq.getEnum(searcher.getIndexReader())
+    // instanceof EmptyTermEnum);
+    assertTrue(aq.getTermsEnum(searcher.getIndexReader()) instanceof EmptyTermsEnum);
+    assertEquals(0, automatonQueryNrHits(aq));
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestAutomatonQuery.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java?rev=888891&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java (added)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java Wed Dec  9 17:45:58 2009
@@ -0,0 +1,178 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.KeywordAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.RegExp;
+
+/**
+ * Test the automaton query for several unicode corner cases,
+ * specifically enumerating strings/indexes containing supplementary characters,
+ * and the differences between UTF-8/UTF-32 and UTF-16 binary sort order.
+ */
+public class TestAutomatonQueryUnicode extends LuceneTestCase {
+  private IndexSearcher searcher;
+
+  private final String FN = "field";
+
+  public void setUp() throws Exception {
+    super.setUp();
+    RAMDirectory directory = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(directory, new KeywordAnalyzer(), true,
+        IndexWriter.MaxFieldLength.LIMITED);
+    Document doc = new Document();
+    Field titleField = new Field("title", "some title", Field.Store.NO,
+        Field.Index.ANALYZED);
+    Field field = new Field(FN, "", Field.Store.NO,
+        Field.Index.ANALYZED);
+    Field footerField = new Field("footer", "a footer", Field.Store.NO,
+        Field.Index.ANALYZED);
+    doc.add(titleField);
+    doc.add(field);
+    doc.add(footerField);
+    field.setValue("\uD866\uDF05abcdef");
+    writer.addDocument(doc);
+    field.setValue("\uD866\uDF06ghijkl");
+    writer.addDocument(doc);
+    // this sorts before the previous two in UTF-8/UTF-32, but after in UTF-16!!!
+    field.setValue("\uFB94mnopqr"); 
+    writer.addDocument(doc);
+    field.setValue("\uFB95stuvwx"); // this one too.
+    writer.addDocument(doc);
+    field.setValue("a\uFFFCbc");
+    writer.addDocument(doc);
+    field.setValue("a\uFFFDbc");
+    writer.addDocument(doc);
+    field.setValue("a\uFFFEbc");
+    writer.addDocument(doc);
+    field.setValue("a\uFB94bc");
+    writer.addDocument(doc);
+    field.setValue("bacadaba");
+    writer.addDocument(doc);
+    field.setValue("\uFFFD");
+    writer.addDocument(doc);
+    field.setValue("\uFFFD\uD866\uDF05");
+    writer.addDocument(doc);
+    field.setValue("\uFFFD\uFFFD");
+    writer.addDocument(doc);
+    writer.optimize();
+    writer.close();
+    searcher = new IndexSearcher(directory, true);
+  }
+
+  public void tearDown() throws Exception {
+    searcher.close();
+    super.tearDown();
+  }
+
+  private Term newTerm(String value) {
+    return new Term(FN, value);
+  }
+
+  private int automatonQueryNrHits(AutomatonQuery query) throws IOException {
+    return searcher.search(query, 5).totalHits;
+  }
+
+  private void assertAutomatonHits(int expected, Automaton automaton)
+      throws IOException {
+    AutomatonQuery query = new AutomatonQuery(newTerm("bogus"), automaton);
+
+    query.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+    assertEquals(expected, automatonQueryNrHits(query));
+
+    query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
+    assertEquals(expected, automatonQueryNrHits(query));
+
+    query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+    assertEquals(expected, automatonQueryNrHits(query));
+
+    query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
+    assertEquals(expected, automatonQueryNrHits(query));
+  }
+
+  /**
+   * Test that AutomatonQuery interacts with lucene's sort order correctly.
+   * 
+   * This expression matches something either starting with the arabic
+   * presentation forms block, or a supplementary character.
+   */
+  public void testSortOrder() throws IOException {
+    Automaton a = new RegExp("((\uD866\uDF05)|\uFB94).*").toAutomaton();
+    assertAutomatonHits(2, a);
+  }
+  
+  /**
+   * Test that AutomatonQuery properly seeks to supplementary characters.
+   * Transitions are modeled as UTF-16 code units, so without special handling
+   * by default it will try to seek to a lead surrogate with some DFAs
+   */
+  public void testSeekSurrogate() throws IOException {
+    Automaton a = new RegExp("\uD866[a\uDF05\uFB93][a-z]{0,5}[fl]").toAutomaton();
+    assertAutomatonHits(1, a);
+  }
+  
+  /**
+   * Try seeking to an ending lead surrogate.
+   */
+  public void testSeekSurrogate2() throws IOException {
+    Automaton a = new RegExp("\uD866(\uDF06ghijkl)?").toAutomaton();
+    assertAutomatonHits(1, a);
+  }
+  
+  /**
+   * Try seeking to an starting trail surrogate.
+   */
+  public void testSeekSurrogate3() throws IOException {
+    Automaton a = new RegExp("[\uDF06\uFB94]mnopqr").toAutomaton();
+    assertAutomatonHits(1, a);
+  }
+  
+  /**
+   * Try seeking to an medial/final trail surrogate.
+   */
+  public void testSeekSurrogate4() throws IOException {
+    Automaton a = new RegExp("a[\uDF06\uFB94]bc").toAutomaton();
+    assertAutomatonHits(1, a);
+  }
+  
+  /**
+   * Ensure the 'constant suffix' does not contain a leading trail surrogate.
+   */
+  public void testSurrogateSuffix() throws IOException {
+    Automaton a = new RegExp(".*[\uD865\uD866]\uDF06ghijkl").toAutomaton();
+    assertAutomatonHits(1, a);
+  }
+  
+  /**
+   * Try when the constant suffix is only a leading trail surrogate.
+   * instead this must use an empty suffix.
+   */
+  public void testSurrogateSuffix2() throws IOException {
+    Automaton a = new RegExp(".*\uDF05").toAutomaton();
+    assertAutomatonHits(1, a);
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestRegexpQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestRegexpQuery.java?rev=888891&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestRegexpQuery.java (added)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestRegexpQuery.java Wed Dec  9 17:45:58 2009
@@ -0,0 +1,123 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.AutomatonProvider;
+import org.apache.lucene.util.automaton.BasicAutomata;
+import org.apache.lucene.util.automaton.BasicOperations;
+import org.apache.lucene.util.automaton.RegExp;
+
+/**
+ * Some simple regex tests, mostly converted from contrib's TestRegexQuery.
+ */
+public class TestRegexpQuery extends LuceneTestCase {
+  private IndexSearcher searcher;
+  private final String FN = "field";
+  
+  public void setUp() throws Exception {
+    super.setUp();
+    RAMDirectory directory = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
+        true, IndexWriter.MaxFieldLength.LIMITED);
+    Document doc = new Document();
+    doc.add(new Field(FN,
+        "the quick brown fox jumps over the lazy ??? dog 493432 49344",
+        Field.Store.NO, Field.Index.ANALYZED));
+    writer.addDocument(doc);
+    writer.optimize();
+    writer.close();
+    searcher = new IndexSearcher(directory, true);
+  }
+  
+  public void tearDown() throws Exception {
+    searcher.close();
+    super.tearDown();
+  }
+  
+  private Term newTerm(String value) {
+    return new Term(FN, value);
+  }
+  
+  private int regexQueryNrHits(String regex) throws IOException {
+    RegexpQuery query = new RegexpQuery(newTerm(regex));
+    return searcher.search(query, 5).totalHits;
+  }
+  
+  public void testRegex1() throws IOException {
+    assertEquals(1, regexQueryNrHits("q.[aeiou]c.*"));
+  }
+  
+  public void testRegex2() throws IOException {
+    assertEquals(0, regexQueryNrHits(".[aeiou]c.*"));
+  }
+  
+  public void testRegex3() throws IOException {
+    assertEquals(0, regexQueryNrHits("q.[aeiou]c"));
+  }
+  
+  public void testNumericRange() throws IOException {
+    assertEquals(1, regexQueryNrHits("<420000-600000>"));
+    assertEquals(0, regexQueryNrHits("<493433-600000>"));
+  }
+  
+  public void testRegexComplement() throws IOException {
+    assertEquals(1, regexQueryNrHits("4934~[3]"));
+    // not the empty lang, i.e. match all docs
+    assertEquals(1, regexQueryNrHits("~#"));
+  }
+  
+  public void testCustomProvider() throws IOException {
+    AutomatonProvider myProvider = new AutomatonProvider() {
+      // automaton that matches quick or brown
+      private Automaton quickBrownAutomaton = BasicOperations.union(Arrays
+          .asList(new Automaton[] {BasicAutomata.makeString("quick"),
+              BasicAutomata.makeString("brown"),
+              BasicAutomata.makeString("bob")}));
+      
+      public Automaton getAutomaton(String name) throws IOException {
+        if (name.equals("quickBrown")) return quickBrownAutomaton;
+        else return null;
+      }
+    };
+    RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
+        myProvider);
+    assertEquals(1, searcher.search(query, 5).totalHits);
+  }
+  
+  /**
+   * Test a corner case for backtracking: In this case the term dictionary has
+   * 493432 followed by 49344. When backtracking from 49343... to 4934, its
+   * necessary to test that 4934 itself is ok before trying to append more
+   * characters.
+   */
+  public void testBacktracking() throws IOException {
+    assertEquals(1, regexQueryNrHits("4934[314]"));
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestRegexpQuery.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestRegexpRandom.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestRegexpRandom.java?rev=888891&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestRegexpRandom.java (added)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestRegexpRandom.java Wed Dec  9 17:45:58 2009
@@ -0,0 +1,144 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.text.DecimalFormat;
+import java.text.NumberFormat;
+import java.util.Random;
+
+import org.apache.lucene.analysis.KeywordAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+
+/**
+ * Create an index with terms from 0000-9999.
+ * Generates random regexps according to simple patterns,
+ * and validates the correct number of hits are returned.
+ */
+public class TestRegexpRandom extends LuceneTestCase {
+  private Searcher searcher;
+  private Random random;
+  
+  @Override
+  protected void setUp() throws Exception {
+    super.setUp();
+    RAMDirectory dir = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
+    
+    Document doc = new Document();
+    Field field = new Field("field", "", Field.Store.NO, Field.Index.ANALYZED);
+    doc.add(field);
+    
+    NumberFormat df = new DecimalFormat("0000");
+    for (int i = 0; i < 10000; i++) {
+      field.setValue(df.format(i));
+      writer.addDocument(doc);
+    }
+    
+    writer.optimize();
+    writer.close();
+    searcher = new IndexSearcher(dir);
+  }
+  
+  private char N() {
+    return (char) (0x30 + random.nextInt(10));
+  }
+  
+  private String fillPattern(String wildcardPattern) {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < wildcardPattern.length(); i++) {
+      switch(wildcardPattern.charAt(i)) {
+        case 'N':
+          sb.append(N());
+          break;
+        default:
+          sb.append(wildcardPattern.charAt(i));
+      }
+    }
+    return sb.toString();
+  }
+  
+  private void assertPatternHits(String pattern, int numHits) throws Exception {
+    Query wq = new RegexpQuery(new Term("field", fillPattern(pattern)));
+    TopDocs docs = searcher.search(wq, 25);
+    assertEquals("Incorrect hits for pattern: " + pattern, numHits, docs.totalHits);
+  }
+
+  @Override
+  protected void tearDown() throws Exception {
+    searcher.close();
+    super.tearDown();
+  }
+  
+  public void testRegexps() throws Exception {
+    random = newRandom(System.nanoTime());
+    for (int i = 0; i < 100; i++) {
+      assertPatternHits("NNNN", 1);
+      assertPatternHits(".NNN", 10);
+      assertPatternHits("N.NN", 10);
+      assertPatternHits("NN.N", 10);
+      assertPatternHits("NNN.", 10);
+    }
+    
+    for (int i = 0; i < 10; i++) {
+      assertPatternHits(".{1,2}NN", 100);
+      assertPatternHits("N.{1,2}N", 100);
+      assertPatternHits("NN.{1,2}", 100);
+      assertPatternHits(".{1,3}N", 1000);
+      assertPatternHits("N.{1,3}", 1000);
+      assertPatternHits(".{1,4}", 10000);
+      
+      assertPatternHits("NNN[3-7]", 5);
+      assertPatternHits("NN[2-6][3-7]", 25);
+      assertPatternHits("N[1-5][2-6][3-7]", 125);
+      assertPatternHits("[0-4][3-7][4-8][5-9]", 625);
+      assertPatternHits("[3-7][2-6][0-4]N", 125);
+      assertPatternHits("[2-6][3-7]NN", 25);
+      assertPatternHits("[3-7]NNN", 5);
+      
+      assertPatternHits("NNN.*", 10);
+      assertPatternHits("NN.*", 100);
+      assertPatternHits("N.*", 1000);
+      assertPatternHits(".*", 10000);
+      
+      assertPatternHits(".*NNN", 10);
+      assertPatternHits(".*NN", 100);
+      assertPatternHits(".*N", 1000);
+      
+      assertPatternHits("N.*NN", 10);
+      assertPatternHits("NN.*N", 10);
+      
+      // combo of ? and * operators
+      assertPatternHits(".NN.*", 100);
+      assertPatternHits("N.N.*", 100);
+      assertPatternHits("NN..*", 100);
+      assertPatternHits(".N..*", 1000);
+      assertPatternHits("N...*", 1000);
+      
+      assertPatternHits(".*NN.", 100);
+      assertPatternHits(".*N..", 1000);
+      assertPatternHits(".*...", 10000);
+      assertPatternHits(".*.N.", 1000);
+      assertPatternHits(".*..N", 1000);
+    }
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestRegexpRandom.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestWildcard.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestWildcard.java?rev=888891&r1=888890&r2=888891&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestWildcard.java (original)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestWildcard.java Wed Dec  9 17:45:58 2009
@@ -24,6 +24,7 @@
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queryParser.QueryParser;
@@ -120,14 +121,12 @@
     MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
     assertMatches(searcher, wq, 2);
     
-    assertTrue(wq.getEnum(searcher.getIndexReader()) instanceof PrefixTermEnum);
     assertTrue(wq.getTermsEnum(searcher.getIndexReader()) instanceof PrefixTermsEnum);
     
     wq = new WildcardQuery(new Term("field", "*"));
     assertMatches(searcher, wq, 2);
-    assertTrue(wq.getEnum(searcher.getIndexReader()) instanceof PrefixTermEnum);
     assertFalse(wq.getTermsEnum(searcher.getIndexReader()) instanceof PrefixTermsEnum);
-    assertFalse(wq.getTermsEnum(searcher.getIndexReader()) instanceof WildcardTermsEnum);
+    assertFalse(wq.getTermsEnum(searcher.getIndexReader()) instanceof AutomatonTermsEnum);
   }
 
   /**
@@ -309,5 +308,56 @@
 
     searcher.close();
   }
+  @Deprecated
+  private static final class OldWildcardQuery extends MultiTermQuery {
+    final Term term;
   
+    OldWildcardQuery(Term term) {
+      this.term = term;
+    }
+      
+    @Override
+    protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
+      return new WildcardTermEnum(reader, term);
+    }
+    
+    @Override
+    public String toString(String field) {
+      return "OldWildcard(" + term.toString()+ ")";
+    }
+  }
+  
+  @Deprecated
+  public void testDeprecatedTermEnum() throws Exception {
+    RAMDirectory indexStore = getIndexStore("body", new String[]
+    {"metal", "metals"});
+    IndexSearcher searcher = new IndexSearcher(indexStore, true);
+    Query query1 = new TermQuery(new Term("body", "metal"));
+    Query query2 = new OldWildcardQuery(new Term("body", "metal*"));
+    Query query3 = new OldWildcardQuery(new Term("body", "m*tal"));
+    Query query4 = new OldWildcardQuery(new Term("body", "m*tal*"));
+    Query query5 = new OldWildcardQuery(new Term("body", "m*tals"));
+
+    BooleanQuery query6 = new BooleanQuery();
+    query6.add(query5, BooleanClause.Occur.SHOULD);
+
+    BooleanQuery query7 = new BooleanQuery();
+    query7.add(query3, BooleanClause.Occur.SHOULD);
+    query7.add(query5, BooleanClause.Occur.SHOULD);
+
+    // Queries do not automatically lower-case search terms:
+    Query query8 = new OldWildcardQuery(new Term("body", "M*tal*"));
+
+    assertMatches(searcher, query1, 1);
+    assertMatches(searcher, query2, 2);
+    assertMatches(searcher, query3, 1);
+    assertMatches(searcher, query4, 2);
+    assertMatches(searcher, query5, 1);
+    assertMatches(searcher, query6, 1);
+    assertMatches(searcher, query7, 2);
+    assertMatches(searcher, query8, 0);
+    assertMatches(searcher, new OldWildcardQuery(new Term("body", "*tall")), 0);
+    assertMatches(searcher, new OldWildcardQuery(new Term("body", "*tal")), 1);
+    assertMatches(searcher, new OldWildcardQuery(new Term("body", "*tal*")), 2);
+  }
 }

Added: lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestWildcardRandom.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestWildcardRandom.java?rev=888891&view=auto
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestWildcardRandom.java (added)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestWildcardRandom.java Wed Dec  9 17:45:58 2009
@@ -0,0 +1,137 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.text.DecimalFormat;
+import java.text.NumberFormat;
+import java.util.Random;
+
+import org.apache.lucene.analysis.KeywordAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+
+/**
+ * Create an index with terms from 0000-9999.
+ * Generates random wildcards according to patterns,
+ * and validates the correct number of hits are returned.
+ */
+public class TestWildcardRandom extends LuceneTestCase {
+  private Searcher searcher;
+  private Random random;
+  
+  @Override
+  protected void setUp() throws Exception {
+    super.setUp();
+    RAMDirectory dir = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(),
+        IndexWriter.MaxFieldLength.UNLIMITED);
+    
+    Document doc = new Document();
+    Field field = new Field("field", "", Field.Store.NO, Field.Index.ANALYZED);
+    doc.add(field);
+    
+    NumberFormat df = new DecimalFormat("0000");
+    for (int i = 0; i < 10000; i++) {
+      field.setValue(df.format(i));
+      writer.addDocument(doc);
+    }
+    
+    writer.optimize();
+    writer.close();
+    searcher = new IndexSearcher(dir);
+  }
+  
+  private char N() {
+    return (char) (0x30 + random.nextInt(10));
+  }
+  
+  private String fillPattern(String wildcardPattern) {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < wildcardPattern.length(); i++) {
+      switch(wildcardPattern.charAt(i)) {
+        case 'N':
+          sb.append(N());
+          break;
+        default:
+          sb.append(wildcardPattern.charAt(i));
+      }
+    }
+    return sb.toString();
+  }
+  
+  private void assertPatternHits(String pattern, int numHits) throws Exception {
+    Query wq = new WildcardQuery(new Term("field", fillPattern(pattern)));
+    TopDocs docs = searcher.search(wq, 25);
+    assertEquals("Incorrect hits for pattern: " + pattern, numHits, docs.totalHits);
+  }
+
+  @Override
+  protected void tearDown() throws Exception {
+    searcher.close();
+    super.tearDown();
+  }
+  
+  public void testWildcards() throws Exception {
+    random = newRandom(System.nanoTime());
+    for (int i = 0; i < 100; i++) {
+      assertPatternHits("NNNN", 1);
+      assertPatternHits("?NNN", 10);
+      assertPatternHits("N?NN", 10);
+      assertPatternHits("NN?N", 10);
+      assertPatternHits("NNN?", 10);
+    }
+    
+    for (int i = 0; i < 10; i++) {
+      assertPatternHits("??NN", 100);
+      assertPatternHits("N??N", 100);
+      assertPatternHits("NN??", 100);
+      assertPatternHits("???N", 1000);
+      assertPatternHits("N???", 1000);
+      assertPatternHits("????", 10000);
+      
+      assertPatternHits("NNN*", 10);
+      assertPatternHits("NN*", 100);
+      assertPatternHits("N*", 1000);
+      assertPatternHits("*", 10000);
+      
+      assertPatternHits("*NNN", 10);
+      assertPatternHits("*NN", 100);
+      assertPatternHits("*N", 1000);
+      
+      assertPatternHits("N*NN", 10);
+      assertPatternHits("NN*N", 10);
+      
+      // combo of ? and * operators
+      assertPatternHits("?NN*", 100);
+      assertPatternHits("N?N*", 100);
+      assertPatternHits("NN?*", 100);
+      assertPatternHits("?N?*", 1000);
+      assertPatternHits("N??*", 1000);
+      
+      assertPatternHits("*NN?", 100);
+      assertPatternHits("*N??", 1000);
+      assertPatternHits("*???", 10000);
+      assertPatternHits("*?N?", 1000);
+      assertPatternHits("*??N", 1000);
+    }
+  }
+}

Propchange: lucene/java/branches/flex_1458/src/test/org/apache/lucene/search/TestWildcardRandom.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message