lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r787875 - in /lucene/java/trunk/src/test/org/apache/lucene/analysis: BaseTokenTestCase.java TestMappingCharFilter.java
Date Wed, 24 Jun 2009 00:11:50 GMT
Author: mikemccand
Date: Wed Jun 24 00:11:50 2009
New Revision: 787875

URL: http://svn.apache.org/viewvc?rev=787875&view=rev
Log:
LUCENE-1466: add test case for MappingCharFilter

Added:
    lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenTestCase.java   (with props)
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java   (with
props)

Added: lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenTestCase.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenTestCase.java?rev=787875&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenTestCase.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenTestCase.java Wed Jun 24
00:11:50 2009
@@ -0,0 +1,185 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public abstract class BaseTokenTestCase extends LuceneTestCase {
+  public static String tsToString(TokenStream in) throws IOException {
+    StringBuilder out = new StringBuilder();
+    Token t = in.next();
+    if (null != t)
+      out.append(new String(t.termBuffer(), 0, t.termLength()));
+    
+    for (t = in.next(); null != t; t = in.next()) {
+      out.append(" ").append(new String(t.termBuffer(), 0, t.termLength()));
+    }
+    in.close();
+    return out.toString();
+  }
+/*
+  public List<String> tok2str(Iterable<Token> tokLst) {
+    ArrayList<String> lst = new ArrayList<String>();
+    for ( Token t : tokLst ) {
+      lst.add( new String(t.termBuffer(), 0, t.termLength()));
+    }
+    return lst;
+  }
+*/
+
+  public void assertTokEqual(List/*<Token>*/ a, List/*<Token>*/ b) {
+    assertTokEq(a,b,false);
+    assertTokEq(b,a,false);
+  }
+
+  public void assertTokEqualOff(List/*<Token>*/ a, List/*<Token>*/ b) {
+    assertTokEq(a,b,true);
+    assertTokEq(b,a,true);
+  }
+
+  private void assertTokEq(List/*<Token>*/ a, List/*<Token>*/ b, boolean checkOff)
{
+    int pos=0;
+    for (Iterator iter = a.iterator(); iter.hasNext();) {
+      Token tok = (Token)iter.next();
+      pos += tok.getPositionIncrement();
+      if (!tokAt(b, new String(tok.termBuffer(), 0, tok.termLength()), pos
+              , checkOff ? tok.startOffset() : -1
+              , checkOff ? tok.endOffset() : -1
+              )) 
+      {
+        fail(a + "!=" + b);
+      }
+    }
+  }
+
+  public boolean tokAt(List/*<Token>*/ lst, String val, int tokPos, int startOff, int
endOff) {
+    int pos=0;
+    for (Iterator iter = lst.iterator(); iter.hasNext();) {
+      Token tok = (Token)iter.next();
+      pos += tok.getPositionIncrement();
+      if (pos==tokPos && new String(tok.termBuffer(), 0, tok.termLength()).equals(val)
+          && (startOff==-1 || tok.startOffset()==startOff)
+          && (endOff  ==-1 || tok.endOffset()  ==endOff  )
+           )
+      {
+        return true;
+      }
+    }
+    return false;
+  }
+
+
+  /***
+   * Return a list of tokens according to a test string format:
+   * a b c  =>  returns List<Token> [a,b,c]
+   * a/b   => tokens a and b share the same spot (b.positionIncrement=0)
+   * a,3/b/c => a,b,c all share same position (a.positionIncrement=3, b.positionIncrement=0,
c.positionIncrement=0)
+   * a,1,10,11  => "a" with positionIncrement=1, startOffset=10, endOffset=11
+   */
+  public List/*<Token>*/ tokens(String str) {
+    String[] arr = str.split(" ");
+    List/*<Token>*/ result = new ArrayList/*<Token>*/();
+    for (int i=0; i<arr.length; i++) {
+      String[] toks = arr[i].split("/");
+      String[] params = toks[0].split(",");
+
+      int posInc;
+      int start;
+      int end;
+
+      if (params.length > 1) {
+        posInc = Integer.parseInt(params[1]);
+      } else {
+        posInc = 1;
+      }
+
+      if (params.length > 2) {
+        start = Integer.parseInt(params[2]);
+      } else {
+        start = 0;
+      }
+
+      if (params.length > 3) {
+        end = Integer.parseInt(params[3]);
+      } else {
+        end = start + params[0].length();
+      }
+
+      Token t = new Token(params[0],start,end,"TEST");
+      t.setPositionIncrement(posInc);
+      
+      result.add(t);
+      for (int j=1; j<toks.length; j++) {
+        t = new Token(toks[j],0,0,"TEST");
+        t.setPositionIncrement(0);
+        result.add(t);
+      }
+    }
+    return result;
+  }
+
+  //------------------------------------------------------------------------
+  // These may be useful beyond test cases...
+  //------------------------------------------------------------------------
+
+  static List/*<Token>*/ getTokens(TokenStream tstream) throws IOException {
+    List/*<Token>*/ tokens = new ArrayList/*<Token>*/();
+    while (true) {
+      Token t = tstream.next();
+      if (t==null) break;
+      tokens.add(t);
+    }
+    return tokens;
+  }
+/*
+  public static class IterTokenStream extends TokenStream {
+    Iterator<Token> toks;
+    public IterTokenStream(Token... toks) {
+      this.toks = Arrays.asList(toks).iterator();
+    }
+    public IterTokenStream(Iterable<Token> toks) {
+      this.toks = toks.iterator();
+    }
+    public IterTokenStream(Iterator<Token> toks) {
+      this.toks = toks;
+    }
+    public IterTokenStream(String ... text) {
+      int off = 0;
+      ArrayList<Token> t = new ArrayList<Token>( text.length );
+      for( String txt : text ) {
+        t.add( new Token( txt, off, off+txt.length() ) );
+        off += txt.length() + 2;
+      }
+      this.toks = t.iterator();
+    }
+    @Override
+    public Token next() {
+      if (toks.hasNext()) {
+        return toks.next();
+      }
+      return null;
+    }
+  }
+*/
+}

Propchange: lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenTestCase.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java?rev=787875&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java Wed Jun
24 00:11:50 2009
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.io.StringReader;
+import java.util.List;
+
+public class TestMappingCharFilter extends BaseTokenTestCase {
+
+  NormalizeCharMap normMap;
+
+  public void setUp() throws Exception {
+    super.setUp();
+    normMap = new NormalizeCharMap();
+
+    normMap.add( "aa", "a" );
+    normMap.add( "bbb", "b" );
+    normMap.add( "cccc", "cc" );
+
+    normMap.add( "h", "i" );
+    normMap.add( "j", "jj" );
+    normMap.add( "k", "kkk" );
+    normMap.add( "ll", "llll" );
+
+    normMap.add( "empty", "" );
+  }
+
+  public void testNothingChange() throws Exception {
+    CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "x"
) ) );
+    TokenStream ts = new WhitespaceTokenizer( cs );
+    List real = getTokens( ts );
+    List expect = tokens( "x" );
+    assertTokEqualOff( expect, real );
+  }
+
+  public void test1to1() throws Exception {
+    CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h"
) ) );
+    TokenStream ts = new WhitespaceTokenizer( cs );
+    List real = getTokens( ts );
+    List expect = tokens( "i" );
+    assertTokEqualOff( expect, real );
+  }
+
+  public void test1to2() throws Exception {
+    CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "j"
) ) );
+    TokenStream ts = new WhitespaceTokenizer( cs );
+    List real = getTokens( ts );
+    List expect = tokens( "jj,1,0,1" );
+    assertTokEqualOff( expect, real );
+  }
+
+  public void test1to3() throws Exception {
+    CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "k"
) ) );
+    TokenStream ts = new WhitespaceTokenizer( cs );
+    List real = getTokens( ts );
+    List expect = tokens( "kkk,1,0,1" );
+    assertTokEqualOff( expect, real );
+  }
+
+  public void test2to4() throws Exception {
+    CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "ll"
) ) );
+    TokenStream ts = new WhitespaceTokenizer( cs );
+    List real = getTokens( ts );
+    List expect = tokens( "llll,1,0,2" );
+    assertTokEqualOff( expect, real );
+  }
+
+  public void test2to1() throws Exception {
+    CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "aa"
) ) );
+    TokenStream ts = new WhitespaceTokenizer( cs );
+    List real = getTokens( ts );
+    List expect = tokens( "a,1,0,2" );
+    assertTokEqualOff( expect, real );
+  }
+
+  public void test3to1() throws Exception {
+    CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "bbb"
) ) );
+    TokenStream ts = new WhitespaceTokenizer( cs );
+    List real = getTokens( ts );
+    List expect = tokens( "b,1,0,3" );
+    assertTokEqualOff( expect, real );
+  }
+
+  public void test4to2() throws Exception {
+    CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "cccc"
) ) );
+    TokenStream ts = new WhitespaceTokenizer( cs );
+    List real = getTokens( ts );
+    List expect = tokens( "cc,1,0,4" );
+    assertTokEqualOff( expect, real );
+  }
+
+  public void test5to0() throws Exception {
+    CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "empty"
) ) );
+    TokenStream ts = new WhitespaceTokenizer( cs );
+    List real = getTokens( ts );
+    assertEquals( 0, real.size() );
+  }
+
+  //
+  //                1111111111222
+  //      01234567890123456789012
+  //(in)  h i j k ll cccc bbb aa
+  //
+  //                1111111111222
+  //      01234567890123456789012
+  //(out) i i jj kkk llll cc b a
+  //
+  //    h, 0, 1 =>    i, 0, 1
+  //    i, 2, 3 =>    i, 2, 3
+  //    j, 4, 5 =>   jj, 4, 5
+  //    k, 6, 7 =>  kkk, 6, 7
+  //   ll, 8,10 => llll, 8,10
+  // cccc,11,15 =>   cc,11,15
+  //  bbb,16,19 =>    b,16,19
+  //   aa,20,22 =>    a,20,22
+  //
+  public void testTokenStream() throws Exception {
+    CharStream cs = new MappingCharFilter( normMap, CharReader.get( new StringReader( "h
i j k ll cccc bbb aa" ) ) );
+    TokenStream ts = new WhitespaceTokenizer( cs );
+    List real = getTokens( ts );
+    List expect = tokens( "i,1,0,1 i,1,2,3 jj,1,4,5 kkk,1,6,7 llll,1,8,10 cc,1,11,15 b,1,16,19
a,1,20,22" );
+    assertTokEqualOff( expect, real );
+  }
+
+  //
+  //
+  //        0123456789
+  //(in)    aaaa ll h
+  //(out-1) aa llll i
+  //(out-2) a llllllll i
+  //
+  // aaaa,0,4 => a,0,4
+  //   ll,5,7 => llllllll,5,7
+  //    h,8,9 => i,8,9
+  public void testChained() throws Exception {
+    CharStream cs = new MappingCharFilter( normMap,
+        new MappingCharFilter( normMap, CharReader.get( new StringReader( "aaaa ll h" ) )
) );
+    TokenStream ts = new WhitespaceTokenizer( cs );
+    List real = getTokens( ts );
+    List expect = tokens( "a,1,0,4 llllllll,1,5,7 i,1,8,9" );
+    assertTokEqualOff( expect, real );
+  }
+}

Propchange: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestMappingCharFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message