lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Gaurav Gupta (JIRA)" <j...@apache.org>
Subject [jira] Created: (LUCENE-827) Lucene Spell Index Not giving Proper Result
Date Thu, 08 Mar 2007 02:29:24 GMT
Lucene Spell Index Not giving Proper Result
-------------------------------------------

                 Key: LUCENE-827
                 URL: https://issues.apache.org/jira/browse/LUCENE-827
             Project: Lucene - Java
          Issue Type: Bug
         Environment: Windows XP, Linux
            Reporter: Gaurav Gupta


I am passing List of words 'Mayur Vihar Center Circle Udyog Vihar Noida Gurgaon' to create
spell index from Lucene Index. when i searches for correct word for 'Centrer' i.e 'Center',
it does'nt find it. I checked it whether its there in spell Index, i didnt find it there.

By making the spell Index directly from Plain text Dictionary gives me the correct word for
'centre' i.e 'center'. I cant understand why it is behaving like this.



Also attaching the source -: 

CreateDataStructure creates the Lucene Index and initializeSpellChecker initializes the spell
Checker.



import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.sql.SQLException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class SpellCheckImpl implements SpellCheck{

	
	private String SIMPLE_DIRECTORY;
	private String SPELL_DIRECTORY;
	private String DATA_TEXT_FILE;
	private String DEFAULT_FIELD="field";
	
	
	
	private SpellCheckImpl(){
		
	}
	
	//Configure the directories
	
	public SpellCheckImpl(String directoryPath){
		
		File f = new File(directoryPath);
		
		if(f.isDirectory()){
			
			this.SIMPLE_DIRECTORY = directoryPath+"/test";
			this.SPELL_DIRECTORY = directoryPath+"/sp";
			
			File simple = new File(this.SIMPLE_DIRECTORY);
			File spell = new File(this.SPELL_DIRECTORY);
			
			if(!simple.isDirectory()){
				simple.mkdir();
			}
			
			if(!spell.isDirectory()){
				spell.mkdir();
			}
			
		}
		
	}
	
	
	/**
	 * Initialize the Dictionary with given Keywords
	 */
	public void initialize(String filePath){
		
		this.DATA_TEXT_FILE = filePath;
		
		try{
			
			createDataStructure(SIMPLE_DIRECTORY);
			initializeSpellChecker(SIMPLE_DIRECTORY,SPELL_DIRECTORY);
			
		}catch(Exception e){
			System.out.println("Initialization failed "+e.getMessage());
		}
	}
	
	
	/**
	 * This method creates the index for the list of good words at the given location.
	 * @param origDirLocation
	 * @param dictionaryType
	 * @throws IOException
	 * @throws InstantiationException
	 * @throws IllegalAccessException
	 * @throws ClassNotFoundException
	 * @throws SQLException
	 */
	private void createDataStructure(String origDirLocation) throws IOException, InstantiationException,

	IllegalAccessException, ClassNotFoundException, SQLException{

		Directory directory = FSDirectory.getDirectory(origDirLocation, true);
		Analyzer analyzer = new StandardAnalyzer();
		IndexWriter iwriter = new IndexWriter(directory, analyzer, true);
		
		long time=System.currentTimeMillis();
		
		InputStream is = null;
		
		is = new FileInputStream(new File(DATA_TEXT_FILE));
		
		Document doc = new Document();
		
		//doc.add(Field.Text(DEFAULT_FIELD, (Reader) new InputStreamReader(is)));
		doc.add(new Field(DEFAULT_FIELD, "Mayur Vihar Center Circle Udyog Vihar Noida Gurgaon",
Field.Store.YES, Field.Index.TOKENIZED));
		
		
		iwriter.addDocument(doc);
		iwriter.optimize();
		time=System.currentTimeMillis()-time;
		System.out.println("time to Create Lucene Index "+time);
		
		iwriter.close();

	}
	
	
	
	/**
	 * This method creates the spell checker dictionary from the words directory at the specified
location.
	 * @param origDirLocation
	 * @param spellDirLocation
	 * @throws IOException
	 */
	private void initializeSpellChecker(String origDirLocation, String spellDirLocation) throws
IOException{
		FSDirectory origDir = FSDirectory.getDirectory(origDirLocation, false);
		FSDirectory spellDir = FSDirectory.getDirectory(spellDirLocation, true);
		
		long time=System.currentTimeMillis();
		
		IndexReader indexReader = null;
	    indexReader = IndexReader.open(origDir);
	    
	    SpellChecker  spellChecker = new SpellChecker(spellDir);

	    spellChecker.indexDictionnary(new LuceneDictionary(indexReader, DEFAULT_FIELD));
	    
	    time=System.currentTimeMillis()-time;
	    
		System.out.println("time to build Spell Checker Dictionary "+time);
		
	}
	
	
	public String[] GetMatches(String badWord) throws ParseException 
	{
		SpellChecker spellChecker = null;
		try
		{
			spellChecker = new SpellChecker(FSDirectory.getDirectory(SPELL_DIRECTORY,false));
			
			spellChecker.setAccuraty(0);
			
			if(spellChecker.exist(badWord)){
				System.out.println("here");
			}
			
			String[] similarWords = spellChecker.suggestSimilar(badWord, 25);
	
			return similarWords;
		} 
		catch (IOException e) 
		{
			throw new ParseException(e.getMessage());
		}
	}
	
	
	public String GetBestMatch(String badWord) throws ParseException
	{	
		
		String[] correctWords = GetMatches(badWord);
		
		if(correctWords != null && correctWords.length > 0){
			return correctWords[0];
		}
		
		return "No Correct Spelling Found";
		
	}
	
	public void addWords(String word) throws IOException{
		
		
		long time=System.currentTimeMillis();
		Analyzer analyzer = new StandardAnalyzer();
		IndexWriter writer   = new IndexWriter(SIMPLE_DIRECTORY, analyzer, false);
		
		if(word != null && !"".equals(word)){
			Document doc = new Document();
				doc.add(new Field(DEFAULT_FIELD, word , Field.Store.YES,
						Field.Index.TOKENIZED));
			writer.addDocument(doc);
		}
		
        writer.optimize();
        writer.close();
        time=System.currentTimeMillis()-time;
        initializeSpellChecker(SIMPLE_DIRECTORY,SPELL_DIRECTORY);
		System.out.println("time to add words "+time);
	}
	
	
		public void addWords(String[]	word) throws IOException{
		
		long time=System.currentTimeMillis();
		Analyzer analyzer = new StandardAnalyzer();
		IndexWriter writer   = new IndexWriter(SIMPLE_DIRECTORY, analyzer, false);
		
		if(word != null){
			for(int i=0;i<word.length;i++){
				if(word[i] != null && !"".equals(word[i])){
				Document doc = new Document();
					doc.add(new Field(DEFAULT_FIELD, word[i] , Field.Store.YES,
							Field.Index.TOKENIZED));
					writer.addDocument(doc);
				}	
			}
		}
		
        writer.optimize();
        writer.close();
        time=System.currentTimeMillis()-time;
        initializeSpellChecker(SIMPLE_DIRECTORY,SPELL_DIRECTORY);
		System.out.println("time to add words "+time);
	}
	
}


-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org


Mime
View raw message