lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1175532 [2/2] - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/backwards/src/test/ solr/ solr/contrib/clustering/src/test-files/clustering/solr/conf/ solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/conf/ solr/cont...
Date Sun, 25 Sep 2011 19:30:35 GMT
Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java?rev=1175532&r1=1175531&r2=1175532&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java
(original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java
Sun Sep 25 19:30:34 2011
@@ -17,6 +17,7 @@
 
 package org.apache.solr.handler;
 
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.KeywordTokenizer;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 import org.apache.solr.common.params.AnalysisParams;
@@ -261,10 +262,10 @@ public class FieldAnalysisRequestHandler
 
     indexPart = whitetok.get("index");
     assertNotNull("expecting an index token analysis for field 'whitetok'", indexPart);
-    assertEquals("expecting only WhitespaceTokenizer to be applied", 1, indexPart.size());
-    tokenList = indexPart.get(WhitespaceTokenizer.class.getName());
-    assertNotNull("expecting only WhitespaceTokenizer to be applied", tokenList);
-    assertEquals("expecting WhitespaceTokenizer to produce 10 tokens", 10, tokenList.size());
+    assertEquals("expecting only MockTokenizer to be applied", 1, indexPart.size());
+    tokenList = indexPart.get(MockTokenizer.class.getName());
+    assertNotNull("expecting only MockTokenizer to be applied", tokenList);
+    assertEquals("expecting MockTokenizer to produce 10 tokens", 10, tokenList.size());
     assertToken(tokenList.get(0), new TokenInfo("the", null, "word", 0, 3, 1, new int[]{1},
null, false));
     assertToken(tokenList.get(1), new TokenInfo("quick", null, "word", 4, 9, 2, new int[]{2},
null, false));
     assertToken(tokenList.get(2), new TokenInfo("red", null, "word", 10, 13, 3, new int[]{3},
null, false));
@@ -278,10 +279,10 @@ public class FieldAnalysisRequestHandler
 
     queryPart = whitetok.get("query");
     assertNotNull("expecting a query token analysis for field 'whitetok'", queryPart);
-    assertEquals("expecting only WhitespaceTokenizer to be applied", 1, queryPart.size());
-    tokenList = queryPart.get(WhitespaceTokenizer.class.getName());
-    assertNotNull("expecting only WhitespaceTokenizer to be applied", tokenList);
-    assertEquals("expecting WhitespaceTokenizer to produce 2 tokens", 2, tokenList.size());
+    assertEquals("expecting only MockTokenizer to be applied", 1, queryPart.size());
+    tokenList = queryPart.get(MockTokenizer.class.getName());
+    assertNotNull("expecting only MockTokenizer to be applied", tokenList);
+    assertEquals("expecting MockTokenizer to produce 2 tokens", 2, tokenList.size());
     assertToken(tokenList.get(0), new TokenInfo("fox", null, "word", 0, 3, 1, new int[]{1},
null, false));
     assertToken(tokenList.get(1), new TokenInfo("brown", null, "word", 4, 9, 2, new int[]{2},
null, false));
 
@@ -328,8 +329,8 @@ public class FieldAnalysisRequestHandler
     assertEquals("  whátëvêr  ", indexPart.get("org.apache.solr.analysis.HTMLStripCharFilter"));
     assertEquals("  whatever  ", indexPart.get("org.apache.lucene.analysis.MappingCharFilter"));
 
-    List<NamedList> tokenList = (List<NamedList>)indexPart.get("org.apache.lucene.analysis.WhitespaceTokenizer");
-    assertNotNull("Expecting WhitespaceTokenizer analysis breakdown", tokenList);
+    List<NamedList> tokenList = (List<NamedList>)indexPart.get(MockTokenizer.class.getName());
+    assertNotNull("Expecting MockTokenizer analysis breakdown", tokenList);
     assertEquals(tokenList.size(), 1);
     assertToken(tokenList.get(0), new TokenInfo("whatever", null, "word", 12, 20, 1, new
int[]{1}, null, false));
   }
@@ -353,8 +354,8 @@ public class FieldAnalysisRequestHandler
     NamedList<List<NamedList>> indexPart = textType.get("index");
     assertNotNull("expecting an index token analysis for field type 'skutype1'", indexPart);
 
-    List<NamedList> tokenList = indexPart.get("org.apache.lucene.analysis.WhitespaceTokenizer");
-    assertNotNull("Expcting WhitespaceTokenizer analysis breakdown", tokenList);
+    List<NamedList> tokenList = indexPart.get(MockTokenizer.class.getName());
+    assertNotNull("Expcting MockTokenizer analysis breakdown", tokenList);
     assertEquals(4, tokenList.size());
     assertToken(tokenList.get(0), new TokenInfo("hi,", null, "word", 0, 3, 1, new int[]{1},
null, false));
     assertToken(tokenList.get(1), new TokenInfo("3456-12", null, "word", 4, 11, 2, new int[]{2},
null, false));

Modified: lucene/dev/branches/branch_3x/solr/solrj/src/test-files/solrj/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/solrj/src/test-files/solrj/solr/conf/schema.xml?rev=1175532&r1=1175531&r2=1175532&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/solrj/src/test-files/solrj/solr/conf/schema.xml (original)
+++ lucene/dev/branches/branch_3x/solr/solrj/src/test-files/solrj/solr/conf/schema.xml Sun
Sep 25 19:30:34 2011
@@ -86,7 +86,7 @@
     <!-- Field type demonstrating an Analyzer failure -->
     <fieldtype name="failtype1" class="solr.TextField">
       <analyzer type="index">
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0"
catenateWords="0" catenateNumbers="0" catenateAll="0"/>
           <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
@@ -95,7 +95,7 @@
     <!-- Demonstrating ignoreCaseChange -->
     <fieldtype name="wdf_nocase" class="solr.TextField">
       <analyzer>
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0"
catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
           <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
@@ -103,7 +103,7 @@
 
      <fieldtype name="wdf_preserve" class="solr.TextField">
       <analyzer>
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1"
catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
           <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
@@ -165,7 +165,7 @@
       <analyzer><tokenizer class="solr.LetterTokenizerFactory"/></analyzer>
     </fieldtype>
     <fieldtype name="whitetok" class="solr.TextField">
-      <analyzer><tokenizer class="solr.WhitespaceTokenizerFactory"/></analyzer>
+      <analyzer><tokenizer class="solr.MockTokenizerFactory"/></analyzer>
     </fieldtype>
     <fieldtype name="HTMLstandardtok" class="solr.TextField">
       <analyzer>
@@ -176,7 +176,7 @@
     <fieldtype name="HTMLwhitetok" class="solr.TextField">
       <analyzer>
       <charFilter class="solr.HTMLStripCharFilterFactory"/>
-      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      <tokenizer class="solr.MockTokenizerFactory"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="standardtokfilt" class="solr.TextField">
@@ -187,19 +187,19 @@
     </fieldtype>
     <fieldtype name="standardfilt" class="solr.TextField">
       <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.MockTokenizerFactory"/>
         <filter class="solr.StandardFilterFactory"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="lowerfilt" class="solr.TextField">
       <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.MockTokenizerFactory"/>
         <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="lowerpunctfilt" class="solr.TextField">
       <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.MockTokenizerFactory"/>
         <filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt" expand="true"/>
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1"
catenateWords="1" catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
         <filter class="solr.LowerCaseFilterFactory"/>
@@ -223,43 +223,43 @@
     </fieldtype>
     <fieldtype name="porterfilt" class="solr.TextField">
       <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.MockTokenizerFactory"/>
         <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldtype>
     <!-- fieldtype name="snowballfilt" class="solr.TextField">
       <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.MockTokenizerFactory"/>
         <filter class="solr.SnowballPorterFilterFactory"/>
       </analyzer>
     </fieldtype -->
     <fieldtype name="engporterfilt" class="solr.TextField">
       <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.MockTokenizerFactory"/>
         <filter class="solr.EnglishPorterFilterFactory"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="custengporterfilt" class="solr.TextField">
       <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.MockTokenizerFactory"/>
         <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="stopfilt" class="solr.TextField">
       <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.MockTokenizerFactory"/>
         <filter class="solr.StopFilterFactory" ignoreCase="true"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="custstopfilt" class="solr.TextField">
       <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.MockTokenizerFactory"/>
         <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="lengthfilt" class="solr.TextField">
       <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.MockTokenizerFactory"/>
         <filter class="solr.LengthFilterFactory" min="2" max="5"/>
       </analyzer>
     </fieldtype>
@@ -267,20 +267,20 @@
       <analyzer>
         <charFilter class="solr.HTMLStripCharFilterFactory"/>
         <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.MockTokenizerFactory"/>
       </analyzer>
     </fieldType>
 
     <fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
       <analyzer type="index">
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1"
catenateWords="1" catenateNumbers="1" catenateAll="0"/>
           <filter class="solr.LowerCaseFilterFactory"/>
           <filter class="solr.StopFilterFactory"/>
           <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
       <analyzer type="query">
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1"
catenateWords="0" catenateNumbers="0" catenateAll="0"/>
           <filter class="solr.LowerCaseFilterFactory"/>
           <filter class="solr.StopFilterFactory"/>
@@ -290,14 +290,14 @@
 
     <fieldtype name="numericsubword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
       <analyzer type="index">
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.LowerCaseFilterFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0"
catenateAll="0"/>
           <filter class="solr.StopFilterFactory"/>
           <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
       <analyzer type="query">
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.LowerCaseFilterFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1"
catenateAll="0"/>
           <filter class="solr.StopFilterFactory"/>
@@ -307,12 +307,12 @@
 
     <fieldtype name="protectedsubword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
       <analyzer type="index">
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.LowerCaseFilterFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0"
splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0"
catenateAll="0"/>
       </analyzer>
       <analyzer type="query">
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
     </fieldtype>
@@ -321,12 +321,12 @@
     <!-- more flexible in matching skus, but more chance of a false match -->
     <fieldtype name="skutype1" class="solr.TextField">
       <analyzer type="index">
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1"
catenateWords="1" catenateNumbers="1" catenateAll="0"/>
           <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
       <analyzer type="query">
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0"
catenateWords="1" catenateNumbers="1" catenateAll="0"/>
           <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
@@ -335,12 +335,12 @@
     <!-- less flexible in matching skus, but less chance of a false match -->
     <fieldtype name="skutype2" class="solr.TextField">
       <analyzer type="index">
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0"
catenateWords="1" catenateNumbers="1" catenateAll="0"/>
           <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
       <analyzer type="query">
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0"
catenateWords="1" catenateNumbers="1" catenateAll="0"/>
           <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
@@ -349,7 +349,7 @@
     <!-- less flexible in matching skus, but less chance of a false match -->
     <fieldtype name="syn" class="solr.TextField">
       <analyzer>
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter name="syn" class="solr.SynonymFilterFactory" synonyms="old_synonyms.txt"/>
       </analyzer>
     </fieldtype>
@@ -359,7 +359,7 @@
       -->
     <fieldtype name="dedup" class="solr.TextField">
       <analyzer>
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.SynonymFilterFactory"
                   synonyms="old_synonyms.txt" expand="true" />
           <filter class="solr.PorterStemFilterFactory"/>
@@ -372,7 +372,7 @@
 
   <fieldtype name="textgap" class="solr.TextField" multiValued="true" positionIncrementGap="100">
       <analyzer>
-          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <tokenizer class="solr.MockTokenizerFactory"/>
           <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
   </fieldtype>
@@ -394,7 +394,7 @@
   <!-- omitPositions example -->
   <fieldType name="nopositions" class="solr.TextField" omitPositions="true">
     <analyzer>
-      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      <tokenizer class="solr.MockTokenizerFactory"/>
     </analyzer>
   </fieldType>
  </types>

Copied: lucene/dev/branches/branch_3x/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenizerFactory.java
(from r1175529, lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenizerFactory.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenizerFactory.java?p2=lucene/dev/branches/branch_3x/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenizerFactory.java&p1=lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenizerFactory.java&r1=1175529&r2=1175532&rev=1175532&view=diff
==============================================================================
--- lucene/dev/trunk/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenizerFactory.java
(original)
+++ lucene/dev/branches/branch_3x/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenizerFactory.java
Sun Sep 25 19:30:34 2011
@@ -22,13 +22,12 @@ import java.util.Map;
 
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 
 /**
  * Factory for {@link MockTokenizer} for testing purposes.
  */
 public class MockTokenizerFactory extends BaseTokenizerFactory {
-  CharacterRunAutomaton pattern;
+  int pattern;
   boolean enableChecks;
   
   @Override



Mime
View raw message