lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1559538 - in /lucene/dev/branches/lucene5376/lucene: analysis/common/src/java/org/apache/lucene/analysis/util/ server/src/java/org/apache/lucene/server/handlers/ server/src/java/org/apache/lucene/server/params/ server/src/test/org/apache/l...
Date Sun, 19 Jan 2014 17:47:31 GMT
Author: mikemccand
Date: Sun Jan 19 17:47:30 2014
New Revision: 1559538

URL: http://svn.apache.org/r1559538
Log:
LCUENE-5376: add charFilters via analysis factories too; don't need while loop to get to the
root cause of the InvocationTargetException (thanks Uwe)

Modified:
    lucene/dev/branches/lucene5376/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java
    lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RAMResourceLoaderWrapper.java
    lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RegisterFieldHandler.java
    lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/params/Request.java
    lucene/dev/branches/lucene5376/lucene/server/src/test/org/apache/lucene/server/TestAnalysis.java
    lucene/dev/branches/lucene5376/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java

Modified: lucene/dev/branches/lucene5376/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java?rev=1559538&r1=1559537&r2=1559538&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java
(original)
+++ lucene/dev/branches/lucene5376/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AnalysisSPILoader.java
Sun Jan 19 17:47:30 2014
@@ -112,15 +112,12 @@ final class AnalysisSPILoader<S extends 
     } catch (InvocationTargetException ite) {
       // nocommit ... trying to throw the "original" IAE,
       // but is this correct/safe?
-      Throwable t = ite;
-      while (t.getCause() != null) {
-        if (t.getCause() instanceof IllegalArgumentException) {
-          throw (IllegalArgumentException) t.getCause();
-        }
-        t = t.getCause();
+      if (ite.getCause() instanceof IllegalArgumentException) {
+        throw (IllegalArgumentException) ite.getCause();
+      } else {
+        throw new IllegalArgumentException("SPI class of type "+clazz.getName()+" with name
'"+name+"' cannot be instantiated. " +
+                                           "This is likely due to a misconfiguration of the
java class '" + service.getName() + "': ", ite);
       }
-      throw new IllegalArgumentException("SPI class of type "+clazz.getName()+" with name
'"+name+"' cannot be instantiated. " +
-            "This is likely due to a misconfiguration of the java class '" + service.getName()
+ "': ", ite);
     } catch (Exception e) {
       throw new IllegalArgumentException("SPI class of type "+clazz.getName()+" with name
'"+name+"' cannot be instantiated. " +
             "This is likely due to a misconfiguration of the java class '" + service.getName()
+ "': ", e);

Modified: lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RAMResourceLoaderWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RAMResourceLoaderWrapper.java?rev=1559538&r1=1559537&r2=1559538&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RAMResourceLoaderWrapper.java
(original)
+++ lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RAMResourceLoaderWrapper.java
Sun Jan 19 17:47:30 2014
@@ -28,8 +28,10 @@ import org.apache.lucene.analysis.util.R
 
 /** Holds "files" in RAM, and falls back to delegate if the
  *  resource isn't in RAM.  This is used for analysis
- *  components that specify their "files" as strings in
- *  RAM. */
+ *  components that specify their "files" as strings inlined
+ *  in the JSON (e.g. list of stopwords for a stop filter). */
+
+// nocommit hacky ... e.g. how are these "files" gc'd after loading/parsing
 
 class RAMResourceLoaderWrapper implements ResourceLoader {
 

Modified: lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RegisterFieldHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RegisterFieldHandler.java?rev=1559538&r1=1559537&r2=1559538&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RegisterFieldHandler.java
(original)
+++ lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/handlers/RegisterFieldHandler.java
Sun Jan 19 17:47:30 2014
@@ -36,6 +36,7 @@ import java.util.regex.PatternSyntaxExce
 
 import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
@@ -68,6 +69,7 @@ import org.apache.lucene.analysis.synony
 import org.apache.lucene.analysis.synonym.SynonymFilterFactory;
 import org.apache.lucene.analysis.synonym.SynonymMap;
 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.CharFilterFactory;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -245,17 +247,21 @@ public class RegisterFieldHandler extend
                    new Param("positionIncrementGap", "How many positions to insert between
separate values in a multi-valued field", new IntType(), 0),
                    new Param("offsetGap", "How many offsets to insert between separate values
in a multi-valued field", new IntType(), 1),
                    new Param("tokenizer", "Tokenizer class (for a custom analysis chain).",
-                             new OrType(new StringType(), new StructType())),
+                             new OrType(new StringType(), new StructType(new Param("class",
"Tokenizer short name (e.g, 'Whitespace')", new StringType())))),
                              // nocommit somehow tap into TokenizerFactory.availableTokenizers
-                   new Param("tokenFilters", "Optional list of TokenFilters to apply after
the Tokenizer",
+                   new Param("tokenFilters", "Optional chain of TokenFilters to apply after
the Tokenizer",
                              new ListType(
-                                 new OrType(new StringType(), new StructType(new Param("class",
"TokenFilter class name", new StringType()))))),
+                                 new OrType(new StringType(), new StructType(new Param("class",
"TokenFilter short name (e.g. 'Stop')", new StringType()))))),
+                   new Param("charFilters", "Optional chain of CharFilters to apply beforethe
Tokenizer",
+                             new ListType(
+                                 new OrType(new StringType(), new StructType(new Param("class",
"CharFilter short name", new StringType()))))),
                    MATCH_VERSION_PARAM);
 
   static StructType SYNONYM_FILTER_TYPE = new StructType(
                                               new Param("ignoreCase", "True if matching should
be case insensitive", new BooleanType(), true),
                                               new Param("analyzer", "Analyzer to use to tokenize
synonym inputs", ANALYZER_TYPE_WRAP),
                                               new Param("synonyms", "Synonyms",
+                                                  /** nocommit syn filter: maybe the simpler
groups / aliases format? */
                                                   new ListType(
                                                       new StructType(
                                                           new Param("input", "String or list
of strings with input token(s) to match", new OrType(new ListType(new StringType()), new StringType())),
@@ -784,6 +790,8 @@ public class RegisterFieldHandler extend
     };
   }
 
+  // nocommit can we use SynonmyFilterFactory???
+
   static TokenFilterFactory buildSynonymFilterFactory(IndexState state, Request r) throws
IOException {
 
     Analyzer a = getAnalyzer(state, r, "analyzer");
@@ -813,6 +821,57 @@ public class RegisterFieldHandler extend
 
     // nocommit charFilters
 
+    List<CharFilterFactory> charFilters;
+
+    if (chain.hasParam("charFilters")) {
+      charFilters = new ArrayList<CharFilterFactory>();
+      for(Object o : chain.getList("charFilters")) {
+        Request sub;
+        String className;
+        if (o instanceof String) {
+          className = (String) o;
+          sub = null;
+        } else {
+          if ((o instanceof Request) == false) {
+            // nocommit make sure test hits this
+            chain.failWrongClass("charFilters", "each char filter must be string or struct",
o);
+          }
+          sub = (Request) o;
+          className = sub.getString("class");
+        }
+
+        Map<String,String> factoryArgs = new HashMap<String,String>();
+        // nocommit how to allow the SPI name and separately
+        // also a fully qualified class name ...
+        factoryArgs.put("class", className);
+        factoryArgs.put("luceneMatchVersion", matchVersion.toString());
+        if (sub != null) {
+          for(Map.Entry<String,Object> ent : sub.getRawParams().entrySet()) {
+            factoryArgs.put(ent.getKey(), ent.getValue().toString());
+          }
+          sub.clearParams();
+        }
+
+        CharFilterFactory factory;
+        try {
+          factory = CharFilterFactory.forName(className, factoryArgs);
+        } catch (IllegalArgumentException iae) {
+          chain.fail("charFilters[" + charFilters.size() + "]", "failed to create CharFilterFactory
for class \"" + className + "\": " + iae, iae);
+          // Dead code but compiler disagrees:
+          factory = null;
+        }
+
+        if (factory instanceof ResourceLoaderAware) {
+          // nocommit also do RAM wrapping resource loader here:
+          ((ResourceLoaderAware) factory).inform(state.resourceLoader);
+        }
+
+        charFilters.add(factory);
+      }
+    } else {
+      charFilters = null;
+    }
+
     // Build TokenizerFactory:
     String className;
     JSONObject t;
@@ -859,6 +918,7 @@ public class RegisterFieldHandler extend
       }
 
       if (tokenizerFactory instanceof ResourceLoaderAware) {
+        // nocommit also do RAM wrapping resource loader here:
         ((ResourceLoaderAware) tokenizerFactory).inform(state.resourceLoader);
       }
     }
@@ -871,18 +931,15 @@ public class RegisterFieldHandler extend
         String paramName = "tokenFilters[" + tokenFilterFactories.size() + "]";
 
         Request sub;
-        JSONObject subParams;
         if (o instanceof String) {
           className = (String) o;
           sub = null;
-          subParams = null;
         } else {
           if ((o instanceof Request) == false) {
             // nocommit make sure test hits this
             chain.fail(paramName, "each filter must be string or struct; got: " + o.getClass());
           }
           sub = (Request) o;
-          subParams = sub.getRawParams();
 
           className = sub.getString("class");
         }
@@ -906,10 +963,8 @@ public class RegisterFieldHandler extend
           ResourceLoader resources = state.resourceLoader;
           RAMResourceLoaderWrapper ramResources = null;
 
-
           if (sub != null) {
-
-            for(Map.Entry<String,Object> ent : subParams.entrySet()) {
+            for(Map.Entry<String,Object> ent : sub.getRawParams().entrySet()) {
               String argName = ent.getKey();
               Object argValue = ent.getValue();
 
@@ -961,7 +1016,7 @@ public class RegisterFieldHandler extend
             // server.  If any params are really unused, the
             // analysis factory should throw its own
             // IllegalArgumentException:
-            subParams.clear();
+            sub.clearParams();
           }
 
           try {
@@ -984,7 +1039,8 @@ public class RegisterFieldHandler extend
       tokenFilterFactories = null;
     }
 
-    return new CustomAnalyzer(tokenizerFactory, tokenFilterFactories,
+    return new CustomAnalyzer(charFilters,
+                              tokenizerFactory, tokenFilterFactories,
                               chain.getInt("positionIncrementGap"),
                               chain.getInt("offsetGap"));
   }
@@ -1024,13 +1080,22 @@ public class RegisterFieldHandler extend
     }
   }
 
+  /** An analyzer based on the custom charFilter, tokenizer,
+   *  tokenFilters chains specified when the field was
+   *  registered. */
+
   private static class CustomAnalyzer extends Analyzer {
     private final int posIncGap;
     private final int offsetGap;
     private final TokenizerFactory tokenizerFactory;
     private final List<TokenFilterFactory> tokenFilterFactories;
+    private final List<CharFilterFactory> charFilterFactories;
 
-    public CustomAnalyzer(TokenizerFactory tokenizerFactory, List<TokenFilterFactory>
tokenFilterFactories, int posIncGap, int offsetGap) {
+    public CustomAnalyzer(List<CharFilterFactory> charFilterFactories,
+                          TokenizerFactory tokenizerFactory,
+                          List<TokenFilterFactory> tokenFilterFactories,
+                          int posIncGap, int offsetGap) {
+      this.charFilterFactories = charFilterFactories;
       this.tokenizerFactory = tokenizerFactory;
       this.tokenFilterFactories = tokenFilterFactories;
       this.posIncGap = posIncGap;
@@ -1050,6 +1115,17 @@ public class RegisterFieldHandler extend
     }
 
     @Override
+    protected Reader initReader(String fieldName, Reader reader) {
+      Reader result = reader;
+      if (charFilterFactories != null) {
+        for(CharFilterFactory factory : charFilterFactories) {
+          result = factory.create(result);
+        }
+      }
+      return result;
+    }
+
+    @Override
     public int getPositionIncrementGap(String fieldName) {
       return posIncGap;
     }

Modified: lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/params/Request.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/params/Request.java?rev=1559538&r1=1559537&r2=1559538&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/params/Request.java
(original)
+++ lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/params/Request.java
Sun Jan 19 17:47:30 2014
@@ -624,6 +624,12 @@ public class Request {
     fail(null, message, cause);
   }
 
+  /** Throws {@link RequestFailedException} when the wrong
+   *  class was encountered. */
+  public void failWrongClass(String param, String reason, Object thingy) {
+    fail(param, reason + "; got: " + thingy.getClass());
+  }
+
   /** Throws a {@link RequestFailedException} with the
    *  provided parameter and message and original cause. */
   public void fail(String param, String reason, Throwable cause) {

Modified: lucene/dev/branches/lucene5376/lucene/server/src/test/org/apache/lucene/server/TestAnalysis.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/server/src/test/org/apache/lucene/server/TestAnalysis.java?rev=1559538&r1=1559537&r2=1559538&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/server/src/test/org/apache/lucene/server/TestAnalysis.java
(original)
+++ lucene/dev/branches/lucene5376/lucene/server/src/test/org/apache/lucene/server/TestAnalysis.java
Sun Jan 19 17:47:30 2014
@@ -189,5 +189,14 @@ public class TestAnalysis extends Server
     return sb.toString();
   }
 
+  public void testCharFilter() throws Exception {
+    send("analyze", "{text: '<pre>here is some text</pre>', analyzer: {tokenizer:
Whitespace}}");
+    assertEquals("<pre>here is some text</pre>", justTokens());
+
+    // With HTMLStripCharFilter the <pre> and </pre> are removed:
+    send("analyze", "{text: '<pre>here is some text</pre>', analyzer: {charFilters:
[HTMLStrip], tokenizer: Whitespace}}");
+    assertEquals("here is some text", justTokens());
+  }
+
   // nocommit need testOffsetGap ... how...
 }

Modified: lucene/dev/branches/lucene5376/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java?rev=1559538&r1=1559537&r2=1559538&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
(original)
+++ lucene/dev/branches/lucene5376/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
Sun Jan 19 17:47:30 2014
@@ -382,6 +382,11 @@ public class AnalyzingInfixSuggester ext
       }
       ts.end();
 
+      // Must explicitly close now because we pull another
+      // TokenStream in highlight, within this same try
+      // block:
+      ts.close();
+
       String prefixToken = null;
       if (lastToken != null) {
         Query lastQuery;
@@ -406,10 +411,6 @@ public class AnalyzingInfixSuggester ext
         }
       }
 
-      // Must explicitly close now because we pull another
-      // TokenStream in highlight:
-      ts.close();
-
       // TODO: we could allow blended sort here, combining
       // weight w/ score.  Now we ignore score and sort only
       // by weight:



Mime
View raw message