lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1583530 - in /lucene/dev/trunk/lucene: ./ analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ analysis/common/src/java/org/apache/lucene/analysis/sinks/ analysis/common/src/java/org/apache/lucene/analysis/standard/ analysis/...
Date Tue, 01 Apr 2014 04:43:02 GMT
Author: rmuir
Date: Tue Apr  1 04:43:01 2014
New Revision: 1583530

URL: http://svn.apache.org/r1583530
Log:
LUCENE-5559: Add missing checks to TokenFilters with numeric arguments

Added:
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilter.java
  (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Apr  1 04:43:01 2014
@@ -215,6 +215,9 @@ Bug fixes
 
 * LUCENE-5555: Fix SortedInputIterator to correctly encode/decode contexts in presence of
payload (Areek Zillur)
 
+* LUCENE-5559: Add missing argument checks to tokenfilters taking
+  numeric arguments.  (Ahmet Arslan via Robert Muir)
+
 Test Framework
 
 * LUCENE-5449: Rename _TestUtil and _TestHelper to remove the leading _.

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
Tue Apr  1 04:43:01 2014
@@ -32,7 +32,7 @@ public final class LengthFilter extends 
 
   private final int min;
   private final int max;
-  
+
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
 
   /**
@@ -46,6 +46,12 @@ public final class LengthFilter extends 
    */
   public LengthFilter(Version version, TokenStream in, int min, int max) {
     super(version, in);
+    if (min < 0) {
+      throw new IllegalArgumentException("minimum length must be greater than or equal to
zero");
+    }
+    if (min > max) {
+      throw new IllegalArgumentException("maximum length must not be greater than minimum
length");
+    }
     this.min = min;
     this.max = max;
   }

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java
Tue Apr  1 04:43:01 2014
@@ -61,6 +61,9 @@ public final class LimitTokenCountFilter
    */
   public LimitTokenCountFilter(TokenStream in, int maxTokenCount, boolean consumeAllTokens)
{
     super(in);
+    if (maxTokenCount < 1) {
+      throw new IllegalArgumentException("maxTokenCount must be greater than zero");
+    }
     this.maxTokenCount = maxTokenCount;
     this.consumeAllTokens = consumeAllTokens;
   }

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenPositionFilter.java
Tue Apr  1 04:43:01 2014
@@ -67,6 +67,9 @@ public final class LimitTokenPositionFil
    */
   public LimitTokenPositionFilter(TokenStream in, int maxTokenPosition, boolean consumeAllTokens)
{
     super(in);
+    if (maxTokenPosition < 1) {
+      throw new IllegalArgumentException("maxTokenPosition must be greater than zero");
+    }
     this.maxTokenPosition = maxTokenPosition;
     this.consumeAllTokens = consumeAllTokens;
   }

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java
Tue Apr  1 04:43:01 2014
@@ -31,6 +31,12 @@ public class TokenRangeSinkFilter extend
   private int count;
 
   public TokenRangeSinkFilter(int lower, int upper) {
+    if (lower < 1) {
+      throw new IllegalArgumentException("lower must be greater than zero");
+    }
+    if (lower > upper) {
+      throw new IllegalArgumentException("lower must not be greater than upper");
+    }
     this.lower = lower;
     this.upper = upper;
   }

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
Tue Apr  1 04:43:01 2014
@@ -84,6 +84,9 @@ public final class ClassicTokenizer exte
   /** Set the max allowed token length.  Any token longer
    *  than this is skipped. */
   public void setMaxTokenLength(int length) {
+    if (length < 1) {
+      throw new IllegalArgumentException("maxTokenLength must be greater than zero");
+    }
     this.maxTokenLength = length;
   }
 

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
Tue Apr  1 04:43:01 2014
@@ -98,6 +98,9 @@ public final class StandardTokenizer ext
   /** Set the max allowed token length.  Any token longer
    *  than this is skipped. */
   public void setMaxTokenLength(int length) {
+    if (length < 1) {
+      throw new IllegalArgumentException("maxTokenLength must be greater than zero");
+    }
     this.maxTokenLength = length;
   }
 

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
Tue Apr  1 04:43:01 2014
@@ -84,6 +84,9 @@ public final class UAX29URLEmailTokenize
   /** Set the max allowed token length.  Any token longer
    *  than this is skipped. */
   public void setMaxTokenLength(int length) {
+    if (length < 1) {
+      throw new IllegalArgumentException("maxTokenLength must be greater than zero");
+    }
     this.maxTokenLength = length;
   }
 

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
Tue Apr  1 04:43:01 2014
@@ -27,6 +27,7 @@ import org.apache.lucene.analysis.MockTo
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.junit.Test;
 
 public class TestLengthFilter extends BaseTokenStreamTestCase {
 
@@ -50,4 +51,11 @@ public class TestLengthFilter extends Ba
     checkOneTerm(a, "", "");
   }
 
+  /**
+   * checking the validity of constructor arguments
+   */
+  @Test(expected = IllegalArgumentException.class)
+  public void testIllegalArguments() throws Exception {
+    new LengthFilter(TEST_VERSION_CURRENT, whitespaceMockTokenizer("accept only valid arguments"),
-4, -1);
+  }
 }

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilterFactory.java
Tue Apr  1 04:43:01 2014
@@ -1,11 +1,12 @@
 package org.apache.lucene.analysis.miscellaneous;
 
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -31,21 +32,36 @@ public class TestLengthFilterFactory ext
     TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
     ((Tokenizer)stream).setReader(reader);
     stream = tokenFilterFactory("Length",
-        "min", "4",
-        "max", "10").create(stream);
+        LengthFilterFactory.MIN_KEY, "4",
+        LengthFilterFactory.MAX_KEY, "10").create(stream);
     assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 2 });
   }
-  
+
   /** Test that bogus arguments result in exception */
   public void testBogusArguments() throws Exception {
     try {
-      tokenFilterFactory("Length", 
-          "min", "4", 
-          "max", "5", 
+      tokenFilterFactory("Length",
+          LengthFilterFactory.MIN_KEY, "4",
+          LengthFilterFactory.MAX_KEY, "5",
           "bogusArg", "bogusValue");
       fail();
     } catch (IllegalArgumentException expected) {
       assertTrue(expected.getMessage().contains("Unknown parameters"));
     }
   }
+
+  /** Test that invalid arguments result in exception */
+  public void testInvalidArguments() throws Exception {
+    try {
+      Reader reader = new StringReader("foo foobar super-duper-trooper");
+      TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+      ((Tokenizer)stream).setReader(reader);
+      tokenFilterFactory("Length",
+          LengthFilterFactory.MIN_KEY, "5",
+          LengthFilterFactory.MAX_KEY, "4").create(stream);
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("maximum length must not be greater than
minimum length"));
+    }
+  }
 }
\ No newline at end of file

Added: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilter.java?rev=1583530&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilter.java
(added)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilter.java
Tue Apr  1 04:43:01 2014
@@ -0,0 +1,40 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.junit.Test;
+
+public class TestLimitTokenCountFilter extends BaseTokenStreamTestCase {
+
+  public void test() throws Exception {
+    for (final boolean consumeAll : new boolean[]{true, false}) {
+      MockTokenizer tokenizer = whitespaceMockTokenizer("A1 B2 C3 D4 E5 F6");
+      tokenizer.setEnableChecks(consumeAll);
+      TokenStream stream = new LimitTokenCountFilter(tokenizer, 3, consumeAll);
+      assertTokenStreamContents(stream, new String[]{"A1", "B2", "C3"});
+    }
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testIllegalArguments() throws Exception {
+    new LimitTokenCountFilter(whitespaceMockTokenizer("A1 B2 C3 D4 E5 F6"), -1);
+  }
+}

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountFilterFactory.java
Tue Apr  1 04:43:01 2014
@@ -1,11 +1,12 @@
 package org.apache.lucene.analysis.miscellaneous;
 
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -16,25 +17,28 @@ package org.apache.lucene.analysis.misce
  * limitations under the License.
  */
 
-import java.io.Reader;
-import java.io.StringReader;
-
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 
+import java.io.Reader;
+import java.io.StringReader;
+
 public class TestLimitTokenCountFilterFactory extends BaseTokenStreamFactoryTestCase {
 
   public void test() throws Exception {
-    Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
-    MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
-    tokenizer.setReader(reader);
-    // LimitTokenCountFilter doesn't consume the entire stream that it wraps
-    tokenizer.setEnableChecks(false);
-    TokenStream stream = tokenizer;
-    stream = tokenFilterFactory("LimitTokenCount",
-        "maxTokenCount", "3").create(stream);
-    assertTokenStreamContents(stream, new String[] { "A1", "B2", "C3" });
+    for (final boolean consumeAll : new boolean[]{true, false}) {
+      Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
+      MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+      tokenizer.setReader(reader);
+      tokenizer.setEnableChecks(consumeAll);
+      TokenStream stream = tokenizer;
+      stream = tokenFilterFactory("LimitTokenCount",
+          LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY, "3",
+          LimitTokenCountFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
+      ).create(stream);
+      assertTokenStreamContents(stream, new String[]{"A1", "B2", "C3"});
+    }
   }
 
   public void testRequired() throws Exception {
@@ -44,15 +48,17 @@ public class TestLimitTokenCountFilterFa
       fail();
     } catch (IllegalArgumentException e) {
       assertTrue("exception doesn't mention param: " + e.getMessage(),
-                 0 < e.getMessage().indexOf(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY));
+          0 < e.getMessage().indexOf(LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY));
     }
   }
-  
-  /** Test that bogus arguments result in exception */
+
+  /**
+   * Test that bogus arguments result in exception
+   */
   public void testBogusArguments() throws Exception {
     try {
-      tokenFilterFactory("LimitTokenCount", 
-          "maxTokenCount", "3", 
+      tokenFilterFactory("LimitTokenCount",
+          LimitTokenCountFilterFactory.MAX_TOKEN_COUNT_KEY, "3",
           "bogusArg", "bogusValue");
       fail();
     } catch (IllegalArgumentException expected) {

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java
Tue Apr  1 04:43:01 2014
@@ -16,10 +16,6 @@ package org.apache.lucene.analysis.misce
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
@@ -27,11 +23,15 @@ import org.apache.lucene.analysis.TokenS
 import org.apache.lucene.analysis.synonym.SynonymFilter;
 import org.apache.lucene.analysis.synonym.SynonymMap;
 import org.apache.lucene.util.CharsRef;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.io.StringReader;
 
 public class TestLimitTokenPositionFilter extends BaseTokenStreamTestCase {
 
   public void testMaxPosition2() throws IOException {
-    for (final boolean consumeAll : new boolean[] { true, false }) {
+    for (final boolean consumeAll : new boolean[]{true, false}) {
       Analyzer a = new Analyzer() {
         @Override
         protected TokenStreamComponents createComponents(String fieldName) {
@@ -42,43 +42,50 @@ public class TestLimitTokenPositionFilte
         }
       };
 
-      // dont use assertAnalyzesTo here, as the end offset is not the end of the string (unless
consumeAll is true, in which case its correct)!
-      assertTokenStreamContents(a.tokenStream("dummy", "1  2     3  4  5"), 
-                                new String[] { "1", "2" }, new int[] { 0, 3 }, new int[]
{ 1, 4 }, consumeAll ? 16 : null);
-      assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), 
-                                new String[] { "1", "2" }, new int[] { 0, 2 }, new int[]
{ 1, 3 }, consumeAll ? 9 : null);
+      // don't use assertAnalyzesTo here, as the end offset is not the end of the string
(unless consumeAll is true, in which case its correct)!
+      assertTokenStreamContents(a.tokenStream("dummy", "1  2     3  4  5"),
+          new String[]{"1", "2"}, new int[]{0, 3}, new int[]{1, 4}, consumeAll ? 16 : null);
+      assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")),
+          new String[]{"1", "2"}, new int[]{0, 2}, new int[]{1, 3}, consumeAll ? 9 : null);
 
       // less than the limit, ensure we behave correctly
       assertTokenStreamContents(a.tokenStream("dummy", "1  "),
-                                new String[] { "1" }, new int[] { 0 }, new int[] { 1 }, consumeAll
? 3 : null);
-                                                                                   
+          new String[]{"1"}, new int[]{0}, new int[]{1}, consumeAll ? 3 : null);
+
       // equal to limit
-      assertTokenStreamContents(a.tokenStream("dummy", "1  2  "), 
-                                new String[] { "1", "2" }, new int[] { 0, 3 }, new int[]
{ 1, 4 }, consumeAll ? 6 : null);
+      assertTokenStreamContents(a.tokenStream("dummy", "1  2  "),
+          new String[]{"1", "2"}, new int[]{0, 3}, new int[]{1, 4}, consumeAll ? 6 : null);
     }
   }
-  
+
   public void testMaxPosition3WithSynomyms() throws IOException {
-    MockTokenizer tokenizer = whitespaceMockTokenizer("one two three four five");
-    tokenizer.setEnableChecks(false); // LimitTokenPositionFilter doesn't consume the entire
stream that it wraps
-    
-    SynonymMap.Builder builder = new SynonymMap.Builder(true);
-    builder.add(new CharsRef("one"), new CharsRef("first"), true);
-    builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
-    builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
-    CharsRef multiWordCharsRef = new CharsRef();
-    SynonymMap.Builder.join(new String[] { "and", "indubitably", "single", "only" }, multiWordCharsRef);
-    builder.add(new CharsRef("one"), multiWordCharsRef, true);
-    SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
-    builder.add(new CharsRef("two"), multiWordCharsRef, true);
-    SynonymMap synonymMap = builder.build();
-    TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
-    stream = new LimitTokenPositionFilter(stream, 3); // consumeAllTokens defaults to false
-    
-    // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted,
since its position is greater than 3.
-    assertTokenStreamContents(stream, 
-        new String[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple",
"three", "single", "ganger" },
-        new int[]    {     1,       0,       0,         0,    0,     1,              0, 
      0,       1,       0,         0 });
-    
+    for (final boolean consumeAll : new boolean[]{true, false}) {
+      MockTokenizer tokenizer = whitespaceMockTokenizer("one two three four five");
+      // if we are consuming all tokens, we can use the checks, otherwise we can't
+      tokenizer.setEnableChecks(consumeAll);
+
+      SynonymMap.Builder builder = new SynonymMap.Builder(true);
+      builder.add(new CharsRef("one"), new CharsRef("first"), true);
+      builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
+      builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
+      CharsRef multiWordCharsRef = new CharsRef();
+      SynonymMap.Builder.join(new String[]{"and", "indubitably", "single", "only"}, multiWordCharsRef);
+      builder.add(new CharsRef("one"), multiWordCharsRef, true);
+      SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
+      builder.add(new CharsRef("two"), multiWordCharsRef, true);
+      SynonymMap synonymMap = builder.build();
+      TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
+      stream = new LimitTokenPositionFilter(stream, 3, consumeAll);
+
+      // "only", the 4th word of multi-word synonym "and indubitably single only" is not
emitted, since its position is greater than 3.
+      assertTokenStreamContents(stream,
+          new String[]{"one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple",
"three", "single", "ganger"},
+          new int[]{1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0});
+    }
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testIllegalArguments() throws Exception {
+    new LimitTokenPositionFilter(whitespaceMockTokenizer("one two three four five"), 0);
   }
 }

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilterFactory.java
Tue Apr  1 04:43:01 2014
@@ -16,26 +16,30 @@ package org.apache.lucene.analysis.misce
  * limitations under the License.
  */
 
-import java.io.Reader;
-import java.io.StringReader;
-
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
 
+import java.io.Reader;
+import java.io.StringReader;
+
 public class TestLimitTokenPositionFilterFactory extends BaseTokenStreamFactoryTestCase {
 
   public void testMaxPosition1() throws Exception {
-    Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
-    MockTokenizer tokenizer = whitespaceMockTokenizer(reader);
-    // LimitTokenPositionFilter doesn't consume the entire stream that it wraps
-    tokenizer.setEnableChecks(false);
-    TokenStream stream = tokenizer;
-    stream = tokenFilterFactory("LimitTokenPosition",
-        "maxTokenPosition", "1").create(stream);
-    assertTokenStreamContents(stream, new String[] { "A1" });
+    for (final boolean consumeAll : new boolean[]{true, false}) {
+      Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
+      MockTokenizer tokenizer = whitespaceMockTokenizer(reader);
+      // if we are consuming all tokens, we can use the checks, otherwise we can't
+      tokenizer.setEnableChecks(consumeAll);
+      TokenStream stream = tokenizer;
+      stream = tokenFilterFactory("LimitTokenPosition",
+          LimitTokenPositionFilterFactory.MAX_TOKEN_POSITION_KEY, "1",
+          LimitTokenPositionFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
+      ).create(stream);
+      assertTokenStreamContents(stream, new String[]{"A1"});
+    }
   }
-  
+
   public void testMissingParam() throws Exception {
     try {
       tokenFilterFactory("LimitTokenPosition");
@@ -47,34 +51,31 @@ public class TestLimitTokenPositionFilte
   }
 
   public void testMaxPosition1WithShingles() throws Exception {
-    Reader reader = new StringReader("one two three four five");
-    MockTokenizer tokenizer = whitespaceMockTokenizer(reader);
-    // LimitTokenPositionFilter doesn't consume the entire stream that it wraps
-    tokenizer.setEnableChecks(false);
-    TokenStream stream = tokenizer;
-    stream = tokenFilterFactory("Shingle",
-        "minShingleSize", "2",
-        "maxShingleSize", "3",
-        "outputUnigrams", "true").create(stream);
-    stream = tokenFilterFactory("LimitTokenPosition",
-        "maxTokenPosition", "1").create(stream);
-    assertTokenStreamContents(stream, new String[] { "one", "one two", "one two three" });
-  }
-  
-  public void testConsumeAllTokens() throws Exception {
-    Reader reader = new StringReader("A1 B2 C3 D4 E5 F6");
-    TokenStream stream = whitespaceMockTokenizer(reader);
-    stream = tokenFilterFactory("LimitTokenPosition",
-        "maxTokenPosition", "3",
-        "consumeAllTokens", "true").create(stream);
-    assertTokenStreamContents(stream, new String[] { "A1", "B2", "C3" });
+    for (final boolean consumeAll : new boolean[]{true, false}) {
+      Reader reader = new StringReader("one two three four five");
+      MockTokenizer tokenizer = whitespaceMockTokenizer(reader);
+      // if we are consuming all tokens, we can use the checks, otherwise we can't
+      tokenizer.setEnableChecks(consumeAll);
+      TokenStream stream = tokenizer;
+      stream = tokenFilterFactory("Shingle",
+          "minShingleSize", "2",
+          "maxShingleSize", "3",
+          "outputUnigrams", "true").create(stream);
+      stream = tokenFilterFactory("LimitTokenPosition",
+          LimitTokenPositionFilterFactory.MAX_TOKEN_POSITION_KEY, "1",
+          LimitTokenPositionFilterFactory.CONSUME_ALL_TOKENS_KEY, Boolean.toString(consumeAll)
+      ).create(stream);
+      assertTokenStreamContents(stream, new String[]{"one", "one two", "one two three"});
+    }
   }
-  
-  /** Test that bogus arguments result in exception */
+
+  /**
+   * Test that bogus arguments result in exception
+   */
   public void testBogusArguments() throws Exception {
     try {
-      tokenFilterFactory("LimitTokenPosition", 
-          "maxTokenPosition", "3", 
+      tokenFilterFactory("LimitTokenPosition",
+          "maxTokenPosition", "3",
           "bogusArg", "bogusValue");
       fail();
     } catch (IllegalArgumentException expected) {

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
Tue Apr  1 04:43:01 2014
@@ -1,11 +1,12 @@
 package org.apache.lucene.analysis.sinks;
 
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
@@ -21,6 +22,7 @@ import java.io.StringReader;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.junit.Test;
 
 public class TokenRangeSinkTokenizerTest extends BaseTokenStreamTestCase {
 
@@ -29,20 +31,25 @@ public class TokenRangeSinkTokenizerTest
     String test = "The quick red fox jumped over the lazy brown dogs";
     TeeSinkTokenFilter tee = new TeeSinkTokenFilter(whitespaceMockTokenizer(test));
     TeeSinkTokenFilter.SinkTokenStream rangeToks = tee.newSinkTokenStream(sinkFilter);
-    
+
     int count = 0;
     tee.reset();
     while(tee.incrementToken()) {
       count++;
     }
-    
+
     int sinkCount = 0;
     rangeToks.reset();
     while (rangeToks.incrementToken()) {
       sinkCount++;
     }
-    
+
     assertTrue(count + " does not equal: " + 10, count == 10);
     assertTrue("rangeToks Size: " + sinkCount + " is not: " + 2, sinkCount == 2);
   }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testIllegalArguments() throws Exception {
+    new TokenRangeSinkFilter(4, 2);
+  }
 }
\ No newline at end of file

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java?rev=1583530&r1=1583529&r2=1583530&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java
(original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java
Tue Apr  1 04:43:01 2014
@@ -172,4 +172,13 @@ public class TestUAX29URLEmailTokenizerF
       assertTrue(expected.getMessage().contains("Unknown parameters"));
     }
   }
+
+ public void testIllegalArguments() throws Exception {
+    try {
+      tokenizerFactory("UAX29URLEmail", "maxTokenLength", "-1").create();
+      fail();
+    } catch (IllegalArgumentException expected) {
+      assertTrue(expected.getMessage().contains("maxTokenLength must be greater than zero"));
+    }
+  }
 }



Mime
View raw message