lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sha...@apache.org
Subject [16/47] lucene-solr:feature/autoscaling: LUCENE-7877: Add ConcatenatingTokenStream, remove PrefixAwareTokenFilter
Date Sun, 25 Jun 2017 02:06:38 GMT
LUCENE-7877: Add ConcatenatingTokenStream, remove PrefixAwareTokenFilter


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/a948e171
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/a948e171
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/a948e171

Branch: refs/heads/feature/autoscaling
Commit: a948e1714609ef662184c71eedb219caf44fc037
Parents: ad2cb77
Author: Alan Woodward <romseygeek@apache.org>
Authored: Thu Jun 22 09:13:13 2017 +0100
Committer: Alan Woodward <romseygeek@apache.org>
Committed: Thu Jun 22 09:13:13 2017 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   3 +
 lucene/MIGRATE.txt                              |   6 +
 .../miscellaneous/ConcatenatingTokenStream.java | 121 +++++++++++
 .../PrefixAndSuffixAwareTokenFilter.java        |  84 --------
 .../miscellaneous/PrefixAwareTokenFilter.java   | 202 -------------------
 .../TestConcatenatingTokenStream.java           |  82 ++++++++
 .../TestPrefixAndSuffixAwareTokenFilter.java    |  49 -----
 .../TestPrefixAwareTokenFilter.java             |  60 ------
 8 files changed, 212 insertions(+), 395 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a948e171/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 56da726..0142f46 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -83,6 +83,9 @@ API Changes
 * LUCENE-7868: IndexWriterConfig.setMaxBufferedDeleteTerms is
   removed. (Simon Willnauer, Mike McCandless)
 
+* LUCENE-7877: PrefixAwareTokenStream is replaced with ConcatenatingTokenStream
+  (Alan Woodward, Uwe Schindler, Adrien Grand)
+
 Bug Fixes
 
 * LUCENE-7626: IndexWriter will no longer accept broken token offsets

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a948e171/lucene/MIGRATE.txt
----------------------------------------------------------------------
diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt
index 10ce4bc..089d196 100644
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@@ -131,3 +131,9 @@ they might have more than 2B matches in total. However TopDocs instances
 returned by IndexSearcher will still have a total number of hits which is less
 than 2B since Lucene indexes are still bound to at most 2B documents, so it
 can safely be casted to an int in that case.
+
+## PrefixAwareTokenFilter and PrefixAndSuffixAwareTokenFilter removed
+(LUCENE-7877)
+
+Instead use ConcatentingTokenStream, which will allow for the use of custom
+attributes.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a948e171/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenatingTokenStream.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenatingTokenStream.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenatingTokenStream.java
new file mode 100644
index 0000000..960cae1
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ConcatenatingTokenStream.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * A TokenStream that takes an array of input TokenStreams as sources, and
+ * concatenates them together.
+ *
+ * Offsets from the second and subsequent sources are incremented to behave
+ * as if all the inputs were from a single source.
+ *
+ * All of the input TokenStreams must have the same attribute implementations
+ */
+public final class ConcatenatingTokenStream extends TokenStream {
+
+  private final TokenStream[] sources;
+  private final OffsetAttribute[] sourceOffsets;
+  private final OffsetAttribute offsetAtt;
+
+  private int currentSource;
+  private int offsetIncrement;
+
+  /**
+   * Create a new ConcatenatingTokenStream from a set of inputs
+   * @param sources an array of TokenStream inputs to concatenate
+   */
+  public ConcatenatingTokenStream(TokenStream... sources) {
+    super(combineSources(sources));
+    this.sources = sources;
+    this.offsetAtt = addAttribute(OffsetAttribute.class);
+    this.sourceOffsets = new OffsetAttribute[sources.length];
+    for (int i = 0; i < sources.length; i++) {
+      this.sourceOffsets[i] = sources[i].addAttribute(OffsetAttribute.class);
+    }
+  }
+
+  private static AttributeSource combineSources(TokenStream... sources) {
+    AttributeSource base = sources[0].cloneAttributes();
+    try {
+      for (int i = 1; i < sources.length; i++) {
+        Iterator<Class<? extends Attribute>> it = sources[i].getAttributeClassesIterator();
+        while (it.hasNext()) {
+          base.addAttribute(it.next());
+        }
+        // check attributes can be captured
+        sources[i].copyTo(base);
+      }
+      return base;
+    }
+    catch (IllegalArgumentException e) {
+      throw new IllegalArgumentException("Attempted to concatenate TokenStreams with different
attribute types", e);
+    }
+  }
+
+  @Override
+  public boolean incrementToken() throws IOException {
+    while (sources[currentSource].incrementToken() == false) {
+      if (currentSource >= sources.length - 1)
+        return false;
+      sources[currentSource].end();
+      OffsetAttribute att = sourceOffsets[currentSource];
+      if (att != null)
+        offsetIncrement += att.endOffset();
+      currentSource++;
+    }
+
+    clearAttributes();
+    sources[currentSource].copyTo(this);
+    offsetAtt.setOffset(offsetAtt.startOffset() + offsetIncrement, offsetAtt.endOffset()
+ offsetIncrement);
+
+    return true;
+  }
+
+  @Override
+  public void end() throws IOException {
+    sources[currentSource].end();
+    super.end();
+  }
+
+  @Override
+  public void reset() throws IOException {
+    for (TokenStream source : sources) {
+      source.reset();
+    }
+    super.reset();
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      IOUtils.close(sources);
+    }
+    finally {
+      super.close();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a948e171/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
deleted file mode 100644
index ee669e0..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.miscellaneous;
-
-
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenStream;
-
-import java.io.IOException;
-
-/**
- * Links two {@link PrefixAwareTokenFilter}.
- * <p>
- * <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes,
i.e. Attributes other than
- * the ones located in org.apache.lucene.analysis.tokenattributes. 
- */
-public class PrefixAndSuffixAwareTokenFilter extends TokenStream {
-
-  private PrefixAwareTokenFilter suffix;
-
-  public PrefixAndSuffixAwareTokenFilter(TokenStream prefix, TokenStream input, TokenStream
suffix) {
-    super(suffix);
-    prefix = new PrefixAwareTokenFilter(prefix, input) {
-      @Override
-      public Token updateSuffixToken(Token suffixToken, Token lastInputToken) {
-        return PrefixAndSuffixAwareTokenFilter.this.updateInputToken(suffixToken, lastInputToken);
-      }
-    };
-    this.suffix = new PrefixAwareTokenFilter(prefix, suffix) {
-      @Override
-      public Token updateSuffixToken(Token suffixToken, Token lastInputToken) {
-        return PrefixAndSuffixAwareTokenFilter.this.updateSuffixToken(suffixToken, lastInputToken);
-      }
-    };
-  }
-
-  public Token updateInputToken(Token inputToken, Token lastPrefixToken) {
-    inputToken.setOffset(lastPrefixToken.endOffset() + inputToken.startOffset(), 
-                         lastPrefixToken.endOffset() + inputToken.endOffset());
-    return inputToken;
-  }
-
-  public Token updateSuffixToken(Token suffixToken, Token lastInputToken) {
-    suffixToken.setOffset(lastInputToken.endOffset() + suffixToken.startOffset(),
-                          lastInputToken.endOffset() + suffixToken.endOffset());
-    return suffixToken;
-  }
-
-
-  @Override
-  public final boolean incrementToken() throws IOException {
-    return suffix.incrementToken();
-  }
-
-  @Override
-  public void reset() throws IOException {
-    suffix.reset();
-  }
-
-
-  @Override
-  public void close() throws IOException {
-    suffix.close();
-  }
-
-  @Override
-  public void end() throws IOException {
-    suffix.end();
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a948e171/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
deleted file mode 100644
index cb866bd..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.miscellaneous;
-
-
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.util.BytesRef;
-
-import java.io.IOException;
-
-
-/**
- * Joins two token streams and leaves the last token of the first stream available
- * to be used when updating the token values in the second stream based on that token.
- *
- * The default implementation adds last prefix token end offset to the suffix token start
and end offsets.
- * <p>
- * <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes,
i.e. Attributes other than
- * the ones located in org.apache.lucene.analysis.tokenattributes. 
- */
-public class PrefixAwareTokenFilter extends TokenStream {
-
-  private TokenStream prefix;
-  private TokenStream suffix;
-  
-  private CharTermAttribute termAtt;
-  private PositionIncrementAttribute posIncrAtt;
-  private PayloadAttribute payloadAtt;
-  private OffsetAttribute offsetAtt;
-  private TypeAttribute typeAtt;
-  private FlagsAttribute flagsAtt;
-
-  private CharTermAttribute p_termAtt;
-  private PositionIncrementAttribute p_posIncrAtt;
-  private PayloadAttribute p_payloadAtt;
-  private OffsetAttribute p_offsetAtt;
-  private TypeAttribute p_typeAtt;
-  private FlagsAttribute p_flagsAtt;
-
-  public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
-    super(suffix);
-    this.suffix = suffix;
-    this.prefix = prefix;
-    prefixExhausted = false;
-    
-    termAtt = addAttribute(CharTermAttribute.class);
-    posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-    payloadAtt = addAttribute(PayloadAttribute.class);
-    offsetAtt = addAttribute(OffsetAttribute.class);
-    typeAtt = addAttribute(TypeAttribute.class);
-    flagsAtt = addAttribute(FlagsAttribute.class);
-
-    p_termAtt = prefix.addAttribute(CharTermAttribute.class);
-    p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
-    p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
-    p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
-    p_typeAtt = prefix.addAttribute(TypeAttribute.class);
-    p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
-  }
-
-  private Token previousPrefixToken = new Token();
-  private Token reusableToken = new Token();
-
-  private boolean prefixExhausted;
-
-  @Override
-  public final boolean incrementToken() throws IOException {
-    if (!prefixExhausted) {
-      Token nextToken = getNextPrefixInputToken(reusableToken);
-      if (nextToken == null) {
-        prefixExhausted = true;
-      } else {
-        previousPrefixToken.reinit(nextToken);
-        // Make it a deep copy
-        BytesRef p = previousPrefixToken.getPayload();
-        if (p != null) {
-          previousPrefixToken.setPayload(p.clone());
-        }
-        setCurrentToken(nextToken);
-        return true;
-      }
-    }
-
-    Token nextToken = getNextSuffixInputToken(reusableToken);
-    if (nextToken == null) {
-      return false;
-    }
-
-    nextToken = updateSuffixToken(nextToken, previousPrefixToken);
-    setCurrentToken(nextToken);
-    return true;
-  }
-  
-  private void setCurrentToken(Token token) {
-    if (token == null) return;
-    clearAttributes();
-    termAtt.copyBuffer(token.buffer(), 0, token.length());
-    posIncrAtt.setPositionIncrement(token.getPositionIncrement());
-    flagsAtt.setFlags(token.getFlags());
-    offsetAtt.setOffset(token.startOffset(), token.endOffset());
-    typeAtt.setType(token.type());
-    payloadAtt.setPayload(token.getPayload());
-  }
-  
-  private Token getNextPrefixInputToken(Token token) throws IOException {
-    if (!prefix.incrementToken()) return null;
-    token.copyBuffer(p_termAtt.buffer(), 0, p_termAtt.length());
-    token.setPositionIncrement(p_posIncrAtt.getPositionIncrement());
-    token.setFlags(p_flagsAtt.getFlags());
-    token.setOffset(p_offsetAtt.startOffset(), p_offsetAtt.endOffset());
-    token.setType(p_typeAtt.type());
-    token.setPayload(p_payloadAtt.getPayload());
-    return token;
-  }
-
-  private Token getNextSuffixInputToken(Token token) throws IOException {
-    if (!suffix.incrementToken()) return null;
-    token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
-    token.setPositionIncrement(posIncrAtt.getPositionIncrement());
-    token.setFlags(flagsAtt.getFlags());
-    token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
-    token.setType(typeAtt.type());
-    token.setPayload(payloadAtt.getPayload());
-    return token;
-  }
-
-  /**
-   * The default implementation adds last prefix token end offset to the suffix token start
and end offsets.
-   *
-   * @param suffixToken a token from the suffix stream
-   * @param lastPrefixToken the last token from the prefix stream
-   * @return consumer token
-   */
-  public Token updateSuffixToken(Token suffixToken, Token lastPrefixToken) {
-    suffixToken.setOffset(lastPrefixToken.endOffset() + suffixToken.startOffset(),
-                          lastPrefixToken.endOffset() + suffixToken.endOffset());
-    return suffixToken;
-  }
-
-  @Override
-  public void end() throws IOException {
-    prefix.end();
-    suffix.end();
-  }
-
-  @Override
-  public void close() throws IOException {
-    prefix.close();
-    suffix.close();
-  }
-
-  @Override
-  public void reset() throws IOException {
-    super.reset();
-    if (prefix != null) {
-      prefixExhausted = false;
-      prefix.reset();
-    }
-    if (suffix != null) {
-      suffix.reset();
-    }
-
-
-  }
-
-  public TokenStream getPrefix() {
-    return prefix;
-  }
-
-  public void setPrefix(TokenStream prefix) {
-    this.prefix = prefix;
-  }
-
-  public TokenStream getSuffix() {
-    return suffix;
-  }
-
-  public void setSuffix(TokenStream suffix) {
-    this.suffix = suffix;
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a948e171/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenatingTokenStream.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenatingTokenStream.java
b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenatingTokenStream.java
new file mode 100644
index 0000000..258f9b8
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestConcatenatingTokenStream.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.util.AttributeFactory;
+
+public class TestConcatenatingTokenStream extends BaseTokenStreamTestCase {
+
+  public void testBasic() throws IOException {
+
+    AttributeFactory factory = newAttributeFactory();
+
+    final MockTokenizer first = new MockTokenizer(factory, MockTokenizer.WHITESPACE, false);
+    first.setReader(new StringReader("first words "));
+    final MockTokenizer second = new MockTokenizer(factory, MockTokenizer.WHITESPACE, false);
+    second.setReader(new StringReader("second words"));
+    final MockTokenizer third = new MockTokenizer(factory, MockTokenizer.WHITESPACE, false);
+    third.setReader(new StringReader(" third words"));
+
+    TokenStream ts = new ConcatenatingTokenStream(first, second, new EmptyTokenStream(),
third);
+    assertTokenStreamContents(ts,
+        new String[] { "first", "words", "second", "words", "third", "words" },
+        new int[]{ 0, 6, 12, 19, 25, 31 },
+        new int[]{ 5, 11, 18, 24, 30, 36 });
+
+  }
+
+  public void testInconsistentAttributes() throws IOException {
+
+    AttributeFactory factory = newAttributeFactory();
+
+    final MockTokenizer first = new MockTokenizer(factory, MockTokenizer.WHITESPACE, false);
+    first.setReader(new StringReader("first words "));
+    first.addAttribute(PayloadAttribute.class);
+    final MockTokenizer second = new MockTokenizer(factory, MockTokenizer.WHITESPACE, false);
+    second.setReader(new StringReader("second words"));
+    second.addAttribute(FlagsAttribute.class);
+
+    TokenStream ts = new ConcatenatingTokenStream(first, second);
+    assertTrue(ts.hasAttribute(FlagsAttribute.class));
+    assertTrue(ts.hasAttribute(PayloadAttribute.class));
+
+    assertTokenStreamContents(ts,
+        new String[] { "first", "words", "second", "words" },
+        new int[]{ 0, 6, 12, 19, },
+        new int[]{ 5, 11, 18, 24, });
+
+  }
+
+  public void testInconsistentAttributeFactories() throws IOException {
+
+    final MockTokenizer first = new MockTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY,
MockTokenizer.WHITESPACE, true);
+    final MockTokenizer second = new MockTokenizer(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY,
MockTokenizer.WHITESPACE, true);
+
+    expectThrows(IllegalArgumentException.class, () -> new ConcatenatingTokenStream(first,
second));
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a948e171/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
deleted file mode 100644
index 0e6c61a..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.miscellaneous;
-
-
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CannedTokenStream;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.Token;
-
-import java.io.IOException;
-import java.io.StringReader;
-
-public class TestPrefixAndSuffixAwareTokenFilter extends BaseTokenStreamTestCase {
-
-  public void test() throws IOException {
-
-    final MockTokenizer input = new MockTokenizer(MockTokenizer.WHITESPACE, false);
-    input.setReader(new StringReader("hello world"));
-    PrefixAndSuffixAwareTokenFilter ts = new PrefixAndSuffixAwareTokenFilter(
-        new CannedTokenStream(createToken("^", 0, 0)),
-        input,
-        new CannedTokenStream(createToken("$", 0, 0)));
-
-    assertTokenStreamContents(ts,
-        new String[] { "^", "hello", "world", "$" },
-        new int[] { 0, 0, 6, 11 },
-        new int[] { 0, 5, 11, 11 });
-  }
-
-  private static Token createToken(String term, int start, int offset)
-  {
-    return new Token(term, start, offset);
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a948e171/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
deleted file mode 100644
index c407c79..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.miscellaneous;
-
-
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CannedTokenStream;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.Token;
-
-import java.io.IOException;
-import java.io.StringReader;
-
-public class TestPrefixAwareTokenFilter extends BaseTokenStreamTestCase {
-
-  public void test() throws IOException {
-
-    PrefixAwareTokenFilter ts;
-
-    ts = new PrefixAwareTokenFilter(
-        new CannedTokenStream(createToken("a", 0, 1)),
-        new CannedTokenStream(createToken("b", 0, 1)));
-    assertTokenStreamContents(ts, 
-        new String[] { "a", "b" },
-        new int[] { 0, 1 },
-        new int[] { 1, 2 });
-
-    // prefix and suffix using 2x prefix
-
-    final MockTokenizer suffix = new MockTokenizer(MockTokenizer.WHITESPACE, false);
-    suffix.setReader(new StringReader("hello world"));
-    ts = new PrefixAwareTokenFilter(new CannedTokenStream(createToken("^", 0, 0)),
-        suffix);
-    ts = new PrefixAwareTokenFilter(ts, new CannedTokenStream(createToken("$", 0, 0)));
-
-    assertTokenStreamContents(ts,
-        new String[] { "^", "hello", "world", "$" },
-        new int[] { 0, 0, 6, 11 },
-        new int[] { 0, 5, 11, 11 });
-  }
-
-  private static Token createToken(String term, int start, int offset)
-  {
-    return new Token(term, start, offset);
-  }
-}


Mime
View raw message