lucene-solr-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Jonathan Lee <jonathan_...@comcast.com>
Subject Re: spellchecker problems (bugs)
Date Wed, 23 Jul 2008 14:13:56 GMT
I don't see the patch attached to my original email either -- does solr-user
not allow attachments?

This is ugly, but here's the patch inline:

Index: src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java
===================================================================
--- src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java
(revision 679057)
+++ src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java
(working copy)
@@ -70,7 +70,7 @@
     indexDir.mkdirs();
     spellchecker.add(FileBasedSpellChecker.INDEX_DIR,
indexDir.getAbsolutePath());
     SolrCore core = h.getCore();
-    String dictName = checker.init(spellchecker, core.getResourceLoader());
+    String dictName = checker.init(spellchecker, core);
     assertTrue(dictName + " is not equal to " + "external",
dictName.equals("external") == true);
     checker.build(core, null);
 
@@ -108,7 +108,7 @@
     spellchecker.add(FileBasedSpellChecker.FIELD_TYPE, "teststop");
     spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME,
spellchecker);
     SolrCore core = h.getCore();
-    String dictName = checker.init(spellchecker, core.getResourceLoader());
+    String dictName = checker.init(spellchecker, core);
     assertTrue(dictName + " is not equal to " + "external",
dictName.equals("external") == true);
     checker.build(core, null);
 
@@ -149,7 +149,7 @@
     spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME,
spellchecker);
 
     SolrCore core = h.getCore();
-    String dictName = checker.init(spellchecker, core.getResourceLoader());
+    String dictName = checker.init(spellchecker, core);
     assertTrue(dictName + " is not equal to " + "external",
dictName.equals("external") == true);
     checker.build(core, null);
 
Index: src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
===================================================================
--- src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
(revision 679057)
+++ src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
(working copy)
@@ -104,7 +104,7 @@
     spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME,
spellchecker);
     SolrCore core = h.getCore();
 
-    String dictName = checker.init(spellchecker, core.getResourceLoader());
+    String dictName = checker.init(spellchecker, core);
     assertTrue(dictName + " is not equal to " +
SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
             dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) ==
true);
     RefCounted<SolrIndexSearcher> holder = core.getSearcher();
@@ -177,7 +177,7 @@
     spellchecker.add(IndexBasedSpellChecker.FIELD, "title");
     spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME,
spellchecker);
     SolrCore core = h.getCore();
-    String dictName = checker.init(spellchecker, core.getResourceLoader());
+    String dictName = checker.init(spellchecker, core);
     assertTrue(dictName + " is not equal to " +
SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
             dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) ==
true);
     RefCounted<SolrIndexSearcher> holder = core.getSearcher();
@@ -233,7 +233,7 @@
     spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME,
spellchecker);
     spellchecker.add(AbstractLuceneSpellChecker.STRING_DISTANCE,
JaroWinklerDistance.class.getName());
     SolrCore core = h.getCore();
-    String dictName = checker.init(spellchecker, core.getResourceLoader());
+    String dictName = checker.init(spellchecker, core);
     assertTrue(dictName + " is not equal to " +
SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
             dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) ==
true);
     RefCounted<SolrIndexSearcher> holder = core.getSearcher();
@@ -283,7 +283,7 @@
     spellchecker.add(IndexBasedSpellChecker.FIELD, "title");
     spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME,
spellchecker);
     SolrCore core = h.getCore();
-    String dictName = checker.init(spellchecker, core.getResourceLoader());
+    String dictName = checker.init(spellchecker, core);
     assertTrue(dictName + " is not equal to " +
SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
             dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) ==
true);
     RefCounted<SolrIndexSearcher> holder = core.getSearcher();
Index: src/java/org/apache/solr/handler/component/SpellCheckComponent.java
===================================================================
--- src/java/org/apache/solr/handler/component/SpellCheckComponent.java
(revision 679057)
+++ src/java/org/apache/solr/handler/component/SpellCheckComponent.java
(working copy)
@@ -243,10 +243,9 @@
           String className = (String) spellchecker.get("classname");
           if (className == null)
             className = IndexBasedSpellChecker.class.getName();
-          SolrResourceLoader loader = core.getResourceLoader();
-          SolrSpellChecker checker = (SolrSpellChecker)
loader.newInstance(className);
+          SolrSpellChecker checker = (SolrSpellChecker)
core.getResourceLoader().newInstance(className);
           if (checker != null) {
-            String dictionary = checker.init(spellchecker, loader);
+            String dictionary = checker.init(spellchecker, core);
             if (dictionary != null) {
               boolean isDefault =
dictionary.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME);
               if (isDefault == true && hasDefault == false){
Index: src/java/org/apache/solr/spelling/FileBasedSpellChecker.java
===================================================================
--- src/java/org/apache/solr/spelling/FileBasedSpellChecker.java
(revision 679057)
+++ src/java/org/apache/solr/spelling/FileBasedSpellChecker.java    (working
copy)
@@ -55,17 +55,28 @@
 
   public static final String SOURCE_FILE_CHAR_ENCODING =
"characterEncoding";
 
-  private String fieldTypeName;
+  private FieldType fieldType = null;
   private String characterEncoding;
   public static final String WORD_FIELD_NAME = "word";
 
-  public String init(NamedList config, SolrResourceLoader loader) {
-    super.init(config, loader);
-    fieldTypeName = (String) config.get(FIELD_TYPE);
+  public String init(NamedList config, SolrCore core) {
+    super.init(config, core);
     characterEncoding = (String) config.get(SOURCE_FILE_CHAR_ENCODING);
+    initAnalyzer((String) config.get(FIELD_TYPE), core);
     return name;
   }
 
+  private void initAnalyzer(String fieldTypeName, SolrCore core) {
+    if (fieldTypeName != null &&
core.getSchema().getFieldTypes().get(fieldTypeName) != null) {
+      fieldType = core.getSchema().getFieldTypes().get(fieldTypeName);
+      analyzer = fieldType.getQueryAnalyzer();
+    } else {
+      log.warning("No fieldType: " + fieldTypeName
+              + " found for dictionary: " + name + ".  Using
WhitespaceAnalzyer.");
+      analyzer = new WhitespaceAnalyzer();
+    }
+  }
+
   public void build(SolrCore core, SolrIndexSearcher searcher) {
     try {
       loadExternalFileDictionary(core.getSchema(),
core.getResourceLoader());
@@ -90,13 +101,10 @@
     try {
 
       // Get the field's analyzer
-      if (fieldTypeName != null
-              && schema.getFieldTypeNoEx(fieldTypeName) != null) {
-        FieldType fieldType = schema.getFieldTypes()
-                .get(fieldTypeName);
+      if (fieldType != null) {
         // Do index-time analysis using the given fieldType's analyzer
         RAMDirectory ramDir = new RAMDirectory();
-        IndexWriter writer = new IndexWriter(ramDir,
fieldType.getAnalyzer(),
+        IndexWriter writer = new IndexWriter(ramDir, getQueryAnalyzer(),
                 true, IndexWriter.MaxFieldLength.UNLIMITED);
         writer.setMergeFactor(300);
         writer.setMaxBufferedDocs(150);
@@ -113,12 +121,7 @@
 
         dictionary = new HighFrequencyDictionary(IndexReader.open(ramDir),
                 WORD_FIELD_NAME, 0.0f);
-        analyzer = fieldType.getQueryAnalyzer();
       } else {
-        log.warning("No fieldType: " + fieldTypeName
-                + " found for dictionary: " + name + ".  Using
WhitespaceAnalzyer.");
-        analyzer = new WhitespaceAnalyzer();
-
         // check if character encoding is defined
         if (characterEncoding == null) {
           dictionary = new
PlainTextDictionary(loader.openResource(sourceLocation));
Index: src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
===================================================================
--- src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
(revision 679057)
+++ src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
(working copy)
@@ -18,6 +18,7 @@
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.core.SolrCore;
 
 
 /**
@@ -50,8 +51,9 @@
   protected float accuracy = 0.5f;
   public static final String FIELD = "field";
 
-  public String init(NamedList config, SolrResourceLoader loader) {
-    super.init(config, loader);
+  public String init(NamedList config, SolrCore core) {
+    super.init(config, core);
+    SolrResourceLoader loader = core.getResourceLoader();
     indexDir = (String) config.get(INDEX_DIR);
     String accuracy = (String) config.get(ACCURACY);
     //If indexDir is relative then create index inside core.getDataDir()
Index: src/java/org/apache/solr/spelling/SolrSpellChecker.java
===================================================================
--- src/java/org/apache/solr/spelling/SolrSpellChecker.java    (revision
679057)
+++ src/java/org/apache/solr/spelling/SolrSpellChecker.java    (working
copy)
@@ -41,7 +41,7 @@
   protected String name;
   protected Analyzer analyzer;
 
-  public String init(NamedList config, SolrResourceLoader loader){
+  public String init(NamedList config, SolrCore core) {
     name = (String) config.get(DICTIONARY_NAME);
     if (name == null) {
       name = DEFAULT_DICTIONARY_NAME;
Index: src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
===================================================================
--- src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
(revision 679057)
+++ src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
(working copy)
@@ -50,11 +50,12 @@
   protected float threshold;
   protected IndexReader reader;
 
-  public String init(NamedList config, SolrResourceLoader loader) {
-    super.init(config, loader);
+  public String init(NamedList config, SolrCore core) {
+    super.init(config, core);
     threshold = config.get(THRESHOLD_TOKEN_FREQUENCY) == null ? 0.0f
             : (Float) config.get(THRESHOLD_TOKEN_FREQUENCY);
     initSourceReader();
+    initAnalyzer(core);
     return name;
   }
 
@@ -69,6 +70,13 @@
     }
   }
 
+  private void initAnalyzer(SolrCore core) {
+    // Get the field's analyzer
+    FieldType fieldType = core.getSchema().getFieldTypeNoEx(field);
+    analyzer = fieldType == null ? new WhitespaceAnalyzer()
+            : fieldType.getQueryAnalyzer();
+  }
+
   public void build(SolrCore core, SolrIndexSearcher searcher) {
     IndexReader reader = null;
     try {
@@ -106,10 +114,6 @@
     // Create the dictionary
     dictionary = new HighFrequencyDictionary(reader, field,
             threshold);
-    // Get the field's analyzer
-    FieldType fieldType = schema.getFieldTypeNoEx(field);
-    analyzer = fieldType == null ? new WhitespaceAnalyzer()
-            : fieldType.getQueryAnalyzer();
   }
 
   @Override



> From: Geoffrey Young <geoff@modperlcookbook.org>
> Reply-To: <solr-user@lucene.apache.org>
> Date: Wed, 23 Jul 2008 08:53:37 -0400
> To: <solr-user@lucene.apache.org>
> Subject: Re: spellchecker problems (bugs)
> 
> 
>> 2. I believe there is a bug in IndexBased- and FileBasedSpellChecker.java
>> where the analyzer variable is only set on the build command. Therefore,
>> when the index is reloaded, but not built after starting solr, issuing a
>> query with the spellcheck.q parameter will cause a NullPointerException to
>> be thrown (SpellCheckComponent.java:158). Moving the analyzer logic to the
>> constructor seems to fix the problem.
>> 
>> I did not see a jira ticket for this (nor am I sure it's a real bug :), so I
>> have attached a patch with these changes. Please let me know if I have
>> overlooked something here and if I should attach this to an actual ticket.
> 
> I don't see a patch, but I was just about to reply to the previous post
> in this thread that I thought a new jira issue was warranted - I see the
> exact same exception in line 158 under the exact circumstances, using
> current trunk as of this minute.
> 
> as far as I can tell, 622 would help... if I wanted to build on each
> start, which I may not.  the exception seems like a different issue that
> should be separately tracked.
> 
> --Geoff


Mime
View raw message