Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 25B18F1AC for ; Fri, 10 May 2013 08:01:52 +0000 (UTC) Received: (qmail 91936 invoked by uid 500); 10 May 2013 08:01:52 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 91928 invoked by uid 99); 10 May 2013 08:01:52 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 10 May 2013 08:01:51 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED,T_FILL_THIS_FORM_SHORT X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 10 May 2013 08:01:48 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 9859B238889B; Fri, 10 May 2013 08:01:27 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1480912 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/analysis/ lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/ lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/ Date: Fri, 10 May 2013 08:01:27 -0000 To: commits@lucene.apache.org From: uschindler@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130510080127.9859B238889B@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: uschindler Date: Fri May 10 08:01:26 2013 New Revision: 1480912 URL: http://svn.apache.org/r1480912 Log: Merged revision(s) 1480911 from lucene/dev/trunk: LUCENE-4993: Fix BeiderMorseFilter to preserve custom attributes when inserting tokens with position increment 0. Modified: lucene/dev/branches/branch_4x/ (props changed) lucene/dev/branches/branch_4x/lucene/ (props changed) lucene/dev/branches/branch_4x/lucene/CHANGES.txt (contents, props changed) lucene/dev/branches/branch_4x/lucene/analysis/ (props changed) lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1480912&r1=1480911&r2=1480912&view=diff ============================================================================== --- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original) +++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Fri May 10 08:01:26 2013 @@ -92,6 +92,9 @@ Bug Fixes * LUCENE-4994: Fix PatternKeywordMarkerFilter to have public constructor. (Uwe Schindler) +* LUCENE-4993: Fix BeiderMorseFilter to preserve custom attributes when + inserting tokens with position increment 0. (Uwe Schindler) + Optimizations * LUCENE-4938: Don't use an unnecessarily large priority queue in IndexSearcher Modified: lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java?rev=1480912&r1=1480911&r2=1480912&view=diff ============================================================================== --- lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java (original) +++ lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java Fri May 10 08:01:26 2013 @@ -27,7 +27,6 @@ import org.apache.commons.codec.language import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; /** @@ -48,13 +47,11 @@ public final class BeiderMorseFilter ext private final Matcher matcher = pattern.matcher(""); // encoded representation private String encoded; - // offsets for any buffered outputs - private int startOffset; - private int endOffset; + // preserves all attributes for any buffered outputs + private State state; private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); - private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** @@ -83,10 +80,10 @@ public final class BeiderMorseFilter ext @Override public boolean incrementToken() throws IOException { if (matcher.find()) { - clearAttributes(); + assert state != null && encoded != null; + restoreState(state); termAtt.setEmpty().append(encoded, matcher.start(1), matcher.end(1)); posIncAtt.setPositionIncrement(0); - offsetAtt.setOffset(startOffset, endOffset); return true; } @@ -94,8 +91,7 @@ public final class BeiderMorseFilter ext encoded = (languages == null) ? engine.encode(termAtt.toString()) : engine.encode(termAtt.toString(), languages); - startOffset = offsetAtt.startOffset(); - endOffset = offsetAtt.endOffset(); + state = captureState(); matcher.reset(encoded); if (matcher.find()) { termAtt.setEmpty().append(encoded, matcher.start(1), matcher.end(1)); Modified: lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java?rev=1480912&r1=1480911&r2=1480912&view=diff ============================================================================== --- lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java (original) +++ lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilter.java Fri May 10 08:01:26 2013 @@ -19,7 +19,9 @@ package org.apache.lucene.analysis.phone import java.io.IOException; import java.io.Reader; +import java.io.StringReader; import java.util.HashSet; +import java.util.regex.Pattern; import org.apache.commons.codec.language.bm.NameType; import org.apache.commons.codec.language.bm.PhoneticEngine; @@ -29,7 +31,10 @@ import org.apache.lucene.analysis.Analyz import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; import org.junit.Ignore; /** Tests {@link BeiderMorseFilter} */ @@ -103,4 +108,20 @@ public class TestBeiderMorseFilter exten }; checkOneTermReuse(a, "", ""); } + + public void testCustomAttribute() throws IOException { + TokenStream stream = new KeywordTokenizer(new StringReader("D'Angelo")); + stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*")); + stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)); + KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class); + stream.reset(); + int i = 0; + while(stream.incrementToken()) { + assertTrue(keyAtt.isKeyword()); + i++; + } + assertEquals(12, i); + stream.end(); + stream.close(); + } }