lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1529482 - in /lucene/dev/trunk: lucene/ lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/ lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/ lucene/analysis/kuromoji/src/test/org/apache/lucene/analys...
Date Sat, 05 Oct 2013 16:41:28 GMT
Author: rmuir
Date: Sat Oct  5 16:41:28 2013
New Revision: 1529482

URL: http://svn.apache.org/r1529482
Log:
LUCENE-5240: additional safety in Tokenizer state machine

Removed:
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
    lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java
    lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
    lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TrieField.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestTrie.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/rest/schema/TestFieldTypeResource.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1529482&r1=1529481&r2=1529482&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Sat Oct  5 16:41:28 2013
@@ -80,6 +80,10 @@ New Features
   on best effort which was not user-friendly.
   (Uwe Schindler, Robert Muir)
 
+* LUCENE-5240: Tokenizers now throw an IllegalStateException if the
+  consumer neglects to call close() on the previous stream before consuming
+  the next one. (Uwe Schindler, Robert Muir)
+
 * LUCENE-5214: Add new FreeTextSuggester, to predict the next word
   using a simple ngram language model.  This is useful for the "long
   tail" suggestions, when a primary suggester fails to find a

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java?rev=1529482&r1=1529481&r2=1529482&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
Sat Oct  5 16:41:28 2013
@@ -48,6 +48,7 @@ public class CommonGramsFilterTest exten
     assertEquals("the", term.toString());
     assertTrue(cgf.incrementToken());
     assertEquals("the_s", term.toString());
+    cgf.close();
     
     wt.setReader(new StringReader(input));
     cgf.reset();
@@ -67,6 +68,7 @@ public class CommonGramsFilterTest exten
     assertEquals("How_the", term.toString());
     assertTrue(nsf.incrementToken());
     assertEquals("the_s", term.toString());
+    nsf.close();
     
     wt.setReader(new StringReader(input));
     nsf.reset();

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=1529482&r1=1529481&r2=1529482&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
Sat Oct  5 16:41:28 2013
@@ -240,6 +240,8 @@ public class TestCompoundWordTokenFilter
     assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
     assertTrue(tf.incrementToken());
     assertEquals("Rind", termAtt.toString());
+    tf.end();
+    tf.close();
     wsTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz"));
     tf.reset();
     assertTrue(tf.incrementToken());

Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java?rev=1529482&r1=1529481&r2=1529482&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java
(original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java
Sat Oct  5 16:41:28 2013
@@ -59,6 +59,8 @@ public class TestExtendedMode extends Ba
       while (ts.incrementToken()) {
         assertTrue(UnicodeUtil.validUTF16String(termAtt));
       }
+      ts.end();
+      ts.close();
     }
   }
   

Modified: lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java?rev=1529482&r1=1529481&r2=1529482&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
(original)
+++ lucene/dev/trunk/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
Sat Oct  5 16:41:28 2013
@@ -217,6 +217,8 @@ public class TestJapaneseTokenizer exten
       ts.reset();
       while (ts.incrementToken()) {
       }
+      ts.end();
+      ts.close();
     }
   }
   
@@ -240,6 +242,8 @@ public class TestJapaneseTokenizer exten
       while (ts.incrementToken()) {
         assertTrue(UnicodeUtil.validUTF16String(termAtt));
       }
+      ts.end();
+      ts.close();
     }
   }
 
@@ -630,6 +634,8 @@ public class TestJapaneseTokenizer exten
       final TokenStream ts = analyzer.tokenStream("ignored", line);
       ts.reset();
       while(ts.incrementToken());
+      ts.end();
+      ts.close();
     }
     String[] sentences = line.split("、|。");
     if (VERBOSE) {
@@ -642,6 +648,8 @@ public class TestJapaneseTokenizer exten
         final TokenStream ts = analyzer.tokenStream("ignored", sentence);
         ts.reset();
         while(ts.incrementToken());
+        ts.end();
+        ts.close();
       }
     }
     if (VERBOSE) {

Modified: lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java?rev=1529482&r1=1529481&r2=1529482&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java
(original)
+++ lucene/dev/trunk/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java
Sat Oct  5 16:41:28 2013
@@ -90,6 +90,8 @@ public class ReadTokensTask extends Perf
         termAtt.fillBytesRef();
         tokenCount++;
       }
+      stream.end();
+      stream.close();
     }
     totalTokenCount += tokenCount;
     return tokenCount;

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java?rev=1529482&r1=1529481&r2=1529482&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java Sat Oct
 5 16:41:28 2013
@@ -85,8 +85,9 @@ public abstract class Tokenizer extends 
   public final void setReader(Reader input) throws IOException {
     if (input == null) {
       throw new NullPointerException("input must not be null");
+    } else if (this.input != ILLEGAL_STATE_READER) {
+      throw new IllegalStateException("TokenStream contract violation: close() call missing");
     }
-    this.input = ILLEGAL_STATE_READER;
     this.inputPending = input;
     assert setReaderTestPoint();
   }

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1529482&r1=1529481&r2=1529482&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
(original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
Sat Oct  5 16:41:28 2013
@@ -401,6 +401,20 @@ public abstract class BaseTokenStreamTes
       ts.end();
       ts.close();
     }
+    
+    // check for a missing close()
+    ts = a.tokenStream("bogus", input);
+    ts.reset();
+    while (ts.incrementToken()) {}
+    ts.end();
+    try {
+      ts = a.tokenStream("bogus", input);
+      fail("didn't get expected exception when close() not called");
+    } catch (IllegalStateException expected) {
+      // ok
+    } finally {
+      ts.close();
+    }
   }
 
   // simple utility method for testing stemmers

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java?rev=1529482&r1=1529481&r2=1529482&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java
(original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java
Sat Oct  5 16:41:28 2013
@@ -266,6 +266,9 @@ public abstract class CollationTestBase 
       termAtt.fillBytesRef();
       // ensure we make a copy of the actual bytes too
       map.put(term, BytesRef.deepCopyOf(bytes));
+      assertFalse(ts.incrementToken());
+      ts.end();
+      ts.close();
     }
     
     Thread threads[] = new Thread[numThreads];
@@ -284,6 +287,9 @@ public abstract class CollationTestBase 
               assertTrue(ts.incrementToken());
               termAtt.fillBytesRef();
               assertEquals(expected, bytes);
+              assertFalse(ts.incrementToken());
+              ts.end();
+              ts.close();
             }
           } catch (IOException e) {
             throw new RuntimeException(e);

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java?rev=1529482&r1=1529481&r2=1529482&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
(original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
Sat Oct  5 16:41:28 2013
@@ -30,6 +30,7 @@ import org.apache.lucene.util.AttributeS
 import org.apache.lucene.util.AttributeReflector;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.IOUtils;
 import org.apache.solr.analysis.TokenizerChain;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
@@ -138,9 +139,10 @@ public abstract class AnalysisRequestHan
    * @param analyzer The analyzer to use.
    */
   protected Set<BytesRef> getQueryTokenSet(String query, Analyzer analyzer) {
+    TokenStream tokenStream = null;
     try {
       final Set<BytesRef> tokens = new HashSet<BytesRef>();
-      final TokenStream tokenStream = analyzer.tokenStream("", query);
+      tokenStream = analyzer.tokenStream("", query);
       final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
       final BytesRef bytes = bytesAtt.getBytesRef();
 
@@ -152,10 +154,11 @@ public abstract class AnalysisRequestHan
       }
 
       tokenStream.end();
-      tokenStream.close();
       return tokens;
     } catch (IOException ioe) {
       throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
+    } finally {
+      IOUtils.closeWhileHandlingException(tokenStream);
     }
   }
 
@@ -181,8 +184,11 @@ public abstract class AnalysisRequestHan
         trackerAtt.setActPosition(position);
         tokens.add(tokenStream.cloneAttributes());
       }
+      tokenStream.end();
     } catch (IOException ioe) {
       throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
+    } finally {
+      IOUtils.closeWhileHandlingException(tokenStream);
     }
 
     return tokens;

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TrieField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TrieField.java?rev=1529482&r1=1529481&r2=1529482&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TrieField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TrieField.java Sat Oct  5 16:41:28
2013
@@ -24,8 +24,6 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 
-import org.apache.lucene.analysis.util.CharFilterFactory;
-import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.lucene.document.DoubleField;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.FieldType.NumericType;
@@ -51,8 +49,6 @@ import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.mutable.MutableValueDate;
 import org.apache.lucene.util.mutable.MutableValueLong;
-import org.apache.solr.analysis.TokenizerChain;
-import org.apache.solr.analysis.TrieTokenizerFactory;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.response.TextResponseWriter;
 import org.apache.solr.search.QParser;
@@ -111,12 +107,6 @@ public class TrieField extends Primitive
                 "Invalid type specified in schema.xml for field: " + args.get("name"), e);
       }
     }
-
-    CharFilterFactory[] filterFactories = new CharFilterFactory[0];
-    TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
-    analyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(type, precisionStep),
tokenFilterFactories);
-    // for query time we only need one token, so we use the biggest possible precisionStep:
-    queryAnalyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(type, Integer.MAX_VALUE),
tokenFilterFactories);
   }
 
   @Override
@@ -223,7 +213,7 @@ public class TrieField extends Primitive
 
   @Override
   public boolean isTokenized() {
-    return true;
+    return false;
   }
 
   @Override
@@ -382,24 +372,29 @@ public class TrieField extends Primitive
   @Override
   public void readableToIndexed(CharSequence val, BytesRef result) {
     String s = val.toString();
-    switch (type) {
-      case INTEGER:
-        NumericUtils.intToPrefixCodedBytes(Integer.parseInt(s), 0, result);
-        break;
-      case FLOAT:
-        NumericUtils.intToPrefixCodedBytes(NumericUtils.floatToSortableInt(Float.parseFloat(s)),
0, result);
-        break;
-      case LONG:
-        NumericUtils.longToPrefixCodedBytes(Long.parseLong(s), 0, result);
-        break;
-      case DOUBLE:
-        NumericUtils.longToPrefixCodedBytes(NumericUtils.doubleToSortableLong(Double.parseDouble(s)),
0, result);
-        break;
-      case DATE:
-        NumericUtils.longToPrefixCodedBytes(dateField.parseMath(null, s).getTime(), 0, result);
-        break;
-      default:
-        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie
field: " + type);
+    try {
+      switch (type) {
+        case INTEGER:
+          NumericUtils.intToPrefixCodedBytes(Integer.parseInt(s), 0, result);
+          break;
+        case FLOAT:
+          NumericUtils.intToPrefixCodedBytes(NumericUtils.floatToSortableInt(Float.parseFloat(s)),
0, result);
+          break;
+        case LONG:
+          NumericUtils.longToPrefixCodedBytes(Long.parseLong(s), 0, result);
+          break;
+        case DOUBLE:
+          NumericUtils.longToPrefixCodedBytes(NumericUtils.doubleToSortableLong(Double.parseDouble(s)),
0, result);
+          break;
+        case DATE:
+          NumericUtils.longToPrefixCodedBytes(dateField.parseMath(null, s).getTime(), 0,
result);
+          break;
+        default:
+          throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for
trie field: " + type);
+      }
+    } catch (NumberFormatException nfe) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, 
+                              "Invalid Number: " + val);
     }
   }
 

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestTrie.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestTrie.java?rev=1529482&r1=1529481&r2=1529482&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestTrie.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestTrie.java Sat Oct  5 16:41:28
2013
@@ -16,8 +16,6 @@
  */
 package org.apache.solr;
 
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.schema.DateField;
 import org.apache.solr.schema.FieldType;
@@ -49,38 +47,6 @@ public class TestTrie extends SolrTestCa
     clearIndex();
     super.tearDown();
   }
-  
-  @Test
-  public void testTokenizer() throws Exception {
-    FieldType type = h.getCore().getLatestSchema().getFieldType("tint");
-    assertTrue(type instanceof TrieField);
-    
-    String value = String.valueOf(random().nextInt());
-    TokenStream ts = type.getAnalyzer().tokenStream("dummy", value);
-    OffsetAttribute ofsAtt = ts.addAttribute(OffsetAttribute.class);
-    ts.reset();
-    int count = 0;
-    while (ts.incrementToken()) {
-      count++;
-      assertEquals(0, ofsAtt.startOffset());
-      assertEquals(value.length(), ofsAtt.endOffset());
-    }
-    final int precStep = ((TrieField) type).getPrecisionStep();
-    assertEquals( (32 + precStep - 1) / precStep, count);
-    ts.end();
-    assertEquals(value.length(), ofsAtt.startOffset());
-    assertEquals(value.length(), ofsAtt.endOffset());
-    ts.close();
-    
-    // Test empty one:
-    ts = type.getAnalyzer().tokenStream("dummy", "");
-    ts.reset();
-    assertFalse(ts.incrementToken());
-    ts.end();
-    assertEquals(0, ofsAtt.startOffset());
-    assertEquals(0, ofsAtt.endOffset());
-    ts.close();    
-  }
 
   @Test
   public void testTrieIntRangeSearch() throws Exception {

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/rest/schema/TestFieldTypeResource.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/rest/schema/TestFieldTypeResource.java?rev=1529482&r1=1529481&r2=1529482&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/rest/schema/TestFieldTypeResource.java
(original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/rest/schema/TestFieldTypeResource.java
Sat Oct  5 16:41:28 2013
@@ -39,7 +39,7 @@ public class TestFieldTypeResource exten
             "/response/lst[@name='fieldType']/bool[@name='omitPositions'] = 'false'",
             "/response/lst[@name='fieldType']/bool[@name='storeOffsetsWithPositions'] = 'false'",
             "/response/lst[@name='fieldType']/bool[@name='multiValued'] = 'false'",
-            "/response/lst[@name='fieldType']/bool[@name='tokenized'] = 'true'",
+            "/response/lst[@name='fieldType']/bool[@name='tokenized'] = 'false'",
             "/response/lst[@name='fieldType']/arr[@name='fields']/str = 'weight'",
             "/response/lst[@name='fieldType']/arr[@name='dynamicFields']/str = '*_f'");
   }
@@ -69,7 +69,7 @@ public class TestFieldTypeResource exten
              "/fieldType/omitPositions==false",
              "/fieldType/storeOffsetsWithPositions==false",
              "/fieldType/multiValued==false",
-             "/fieldType/tokenized==true",
+             "/fieldType/tokenized==false",
              "/fieldType/fields==['weight']",
              "/fieldType/dynamicFields==['*_f']");
   }



Mime
View raw message