lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r897692 - in /lucene/java/branches/lucene_2_9: ./ contrib/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ src/java/org/apache/lucene/search/ src/ja...
Date Sun, 10 Jan 2010 19:47:28 GMT
Author: rmuir
Date: Sun Jan 10 19:47:27 2010
New Revision: 897692

URL: http://svn.apache.org/viewvc?rev=897692&view=rev
Log:
LUCENE-2199: ShingleFilter skipped over tri-gram shingles if outputUnigram was set to false

Modified:
    lucene/java/branches/lucene_2_9/   (props changed)
    lucene/java/branches/lucene_2_9/CHANGES.txt   (props changed)
    lucene/java/branches/lucene_2_9/contrib/   (props changed)
    lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
    lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
    lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
    lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
  (props changed)
    lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/util/AttributeSource.java 
 (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
  (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java
  (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestDateTools.java
  (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestNumberTools.java
  (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
  (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/util/TestAttributeSource.java
  (props changed)

Propchange: lucene/java/branches/lucene_2_9/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jan 10 19:47:27 2010
@@ -1,3 +1,3 @@
 /lucene/java/branches/lucene_2_4:748824
 /lucene/java/branches/lucene_3_0:886275
-/lucene/java/trunk:824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,886257,887532,891189,891363
+/lucene/java/trunk:824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,886257,887532,891189,891363,897672

Propchange: lucene/java/branches/lucene_2_9/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jan 10 19:47:27 2010
@@ -1 +1 @@
-/lucene/java/trunk/CHANGES.txt:881819,886257,887532,891189,891363
+/lucene/java/trunk/CHANGES.txt:881819,886257,887532,891189,891363,897672

Propchange: lucene/java/branches/lucene_2_9/contrib/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jan 10 19:47:27 2010
@@ -1 +1 @@
-/lucene/java/trunk/contrib:881819,886257,887532,891189,891363
+/lucene/java/trunk/contrib:881819,886257,887532,891189,891363,897672

Modified: lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/CHANGES.txt?rev=897692&r1=897691&r2=897692&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/CHANGES.txt (original)
+++ lucene/java/branches/lucene_2_9/contrib/CHANGES.txt Sun Jan 10 19:47:27 2010
@@ -15,6 +15,11 @@
    open until the last thread accessing them releases the reference.
    (Simon Willnauer)
 
+Bug Fixes
+
+ * LUCENE-2199: ShingleFilter skipped over tri-gram shingles if outputUnigram
+   was set to false. (Simon Willnauer)
+
 ======================= Release 2.9.1 2009-11-06 =======================
 
 Changes in backwards compatibility policy

Modified: lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java?rev=897692&r1=897691&r2=897692&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
(original)
+++ lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
Sun Jan 10 19:47:27 2010
@@ -180,7 +180,7 @@
           shingleBufferPosition++;
           return true;
         }
-      } else {
+      } else if (shingleBufferPosition % this.maxShingleSize == 0){
         shingleBufferPosition++;
       }
   
@@ -195,7 +195,7 @@
           termBuffer = termAtt.resizeTermBuffer(termLength);
         buf.getChars(0, termLength, termBuffer, 0);
         termAtt.setTermLength(termLength);
-        if ((! outputUnigrams) && shingleBufferPosition == 1) {
+        if ((! outputUnigrams) && shingleBufferPosition % this.maxShingleSize ==
1) {
           posIncrAtt.setPositionIncrement(1);
         } else {
           posIncrAtt.setPositionIncrement(0);

Modified: lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java?rev=897692&r1=897691&r2=897692&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
(original)
+++ lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/ShingleFilterTest.java
Sun Jan 10 19:47:27 2010
@@ -199,7 +199,92 @@
     "word", "shingle",
     "word"
   };
+  
+  public static final Token[] TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS = new Token[] {
+    createToken("please divide", 0, 13),
+    createToken("please divide this", 0, 18),
+    createToken("divide this", 7, 18),
+    createToken("divide this sentence", 7, 27),
+    createToken("this sentence", 14, 27),
+    createToken("this sentence into", 14, 32),
+    createToken("sentence into", 19, 32),
+    createToken("sentence into shingles", 19, 39),
+    createToken("into shingles", 28, 39),
+  };
+
+  public static final int[] TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS = new int[] {
+    1, 0, 1, 0, 1, 0, 1, 0, 1
+  };
+  
+  public static final String[] TRI_GRAM_TYPES_WITHOUT_UNIGRAMS = new String[] {
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle",
+  };
+  
+  public static final Token[] FOUR_GRAM_TOKENS = new Token[] {
+    createToken("please", 0, 6),
+    createToken("please divide", 0, 13),
+    createToken("please divide this", 0, 18),
+    createToken("please divide this sentence", 0, 27),
+    createToken("divide", 7, 13),
+    createToken("divide this", 7, 18),
+    createToken("divide this sentence", 7, 27),
+    createToken("divide this sentence into", 7, 32),
+    createToken("this", 14, 18),
+    createToken("this sentence", 14, 27),
+    createToken("this sentence into", 14, 32),
+    createToken("this sentence into shingles", 14, 39),
+    createToken("sentence", 19, 27),
+    createToken("sentence into", 19, 32),
+    createToken("sentence into shingles", 19, 39),
+    createToken("into", 28, 32),
+    createToken("into shingles", 28, 39),
+    createToken("shingles", 33, 39)
+  };
+
+  public static final int[] FOUR_GRAM_POSITION_INCREMENTS = new int[] {
+    1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1
+  };
+
+  public static final String[] FOUR_GRAM_TYPES = new String[] {
+    "word", "shingle", "shingle", "shingle",
+    "word", "shingle", "shingle", "shingle",
+    "word", "shingle", "shingle", "shingle",
+    "word", "shingle", "shingle",
+    "word", "shingle",
+    "word"
+  };
+
+  public static final Token[] FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS = new Token[] {
+    createToken("please divide", 0, 13),
+    createToken("please divide this", 0, 18),
+    createToken("please divide this sentence", 0, 27),
+    createToken("divide this", 7, 18),
+    createToken("divide this sentence", 7, 27),
+    createToken("divide this sentence into", 7, 32),
+    createToken("this sentence", 14, 27),
+    createToken("this sentence into", 14, 32),
+    createToken("this sentence into shingles", 14, 39),
+    createToken("sentence into", 19, 32),
+    createToken("sentence into shingles", 19, 39),
+    createToken("into shingles", 28, 39),
+  };
+
+  public static final int[] FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS = new int[] {
+    1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1
+  };
 
+  public static final String[] FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS = new String[] {
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+    "shingle", "shingle",
+  };
 
   protected void setUp() throws Exception {
     super.setUp();
@@ -270,8 +355,25 @@
                            TRI_GRAM_POSITION_INCREMENTS, TRI_GRAM_TYPES,
                            true);
   }
-
-
+  
+  public void testTriGramFilterWithoutUnigrams() throws IOException {
+    this.shingleFilterTest(3, TEST_TOKEN, TRI_GRAM_TOKENS_WITHOUT_UNIGRAMS,
+                           TRI_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS, TRI_GRAM_TYPES_WITHOUT_UNIGRAMS,
+                           false);
+  }
+  
+  public void testFourGramFilter() throws IOException {
+    this.shingleFilterTest(4, TEST_TOKEN, FOUR_GRAM_TOKENS,
+        FOUR_GRAM_POSITION_INCREMENTS, FOUR_GRAM_TYPES,
+                           true);
+  }
+  
+  public void testFourGramFilterWithoutUnigrams() throws IOException {
+    this.shingleFilterTest(4, TEST_TOKEN, FOUR_GRAM_TOKENS_WITHOUT_UNIGRAMS,
+        FOUR_GRAM_POSITION_INCREMENTS_WITHOUT_UNIGRAMS,
+        FOUR_GRAM_TYPES_WITHOUT_UNIGRAMS, false);
+  }
+  
   
   public void testReset() throws Exception {
     Tokenizer wsTokenizer = new WhitespaceTokenizer(new StringReader("please divide this
sentence"));

Propchange: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jan 10 19:47:27 2010
@@ -1 +1 @@
-/lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:881819,886257,887532,891189,891363
+/lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:881819,886257,887532,891189,891363,897672

Propchange: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/util/AttributeSource.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jan 10 19:47:27 2010
@@ -1,3 +1,3 @@
 /lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/util/AttributeSource.java:748824
 /lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/util/AttributeSource.java:886275
-/lucene/java/trunk/src/java/org/apache/lucene/util/AttributeSource.java:824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,886257,887532,891189,891363,894348
+/lucene/java/trunk/src/java/org/apache/lucene/util/AttributeSource.java:824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,886257,887532,891189,891363,894348,897672

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jan 10 19:47:27 2010
@@ -1,3 +1,3 @@
 /lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:748824
 /lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:886275
-/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:818920,824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,887532,891189,891363
+/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:818920,824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,887532,891189,891363,897672

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jan 10 19:47:27 2010
@@ -1 +1 @@
-/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:881819,886257,887532,891189,891363
+/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:881819,886257,887532,891189,891363,897672

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestDateTools.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jan 10 19:47:27 2010
@@ -1 +1 @@
-/lucene/java/trunk/src/test/org/apache/lucene/document/TestDateTools.java:881819,886257,887532,891189,891363
+/lucene/java/trunk/src/test/org/apache/lucene/document/TestDateTools.java:881819,886257,887532,891189,891363,897672

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestNumberTools.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jan 10 19:47:27 2010
@@ -1 +1 @@
-/lucene/java/trunk/src/test/org/apache/lucene/document/TestNumberTools.java:881819,886257,887532,891189,891363
+/lucene/java/trunk/src/test/org/apache/lucene/document/TestNumberTools.java:881819,886257,887532,891189,891363,897672

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jan 10 19:47:27 2010
@@ -1 +1 @@
-/lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:881819,886257,887532,891189,891363
+/lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:881819,886257,887532,891189,891363,897672

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/util/TestAttributeSource.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Jan 10 19:47:27 2010
@@ -1 +1 @@
-/lucene/java/trunk/src/test/org/apache/lucene/util/TestAttributeSource.java:881819,886257,887532,891189,891363
+/lucene/java/trunk/src/test/org/apache/lucene/util/TestAttributeSource.java:881819,886257,887532,891189,891363,897672



Mime
View raw message