lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Galactikuh <ksha...@gmail.com>
Subject Help with Snowball Analyzer
Date Fri, 21 Sep 2007 19:02:45 GMT

I am using SnowballAnalyzer with Hibernate search. I am using it to search
phrases in a database. It only seems to return the stemmed version of any
query. So for example, I have two phrases in the DB:

"Cheney is a nut"
and
"I hate nuts"

When I do a search for either "nut" or "nuts" it only returns "Cheney is a
nut"

This is what seems to be inside my index file:

 'cheney' 'hate' 'i' 'nut' 's' 

Here is the code, although it's hibernate so not sure if it's relevant or
will mean anything to anyone here.

Thanks,
Kshanti

import javax.persistence.EntityManager;

import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryParser.QueryParser;
import org.hibernate.*;

public class LuceneSearch 
{

   public enum Similarity
   {
      KEYWORDS,
      HIGH,
      EXPAND,
      EXACT,
      
   }
   
   public static final String TEXT_FIELD = "TEXT";
   
   private static final String HIBERNATE_FILE = "META-INF/persistence.xml";

   private QueryParser _parser;

   private String[] _stopWords;

   private FullTextSession _textSession;

   private EntityManager _entityManager;
   
   private Similarity _defaultSimilarity=Similarity.KEYWORDS;
   
   private boolean _beenIndexed=false;
   
   public static boolean UseStemmer=true;
   
   public LuceneSearch(EntityManager entityMgr)
   {
      if (null == entityMgr)
         throw new IllegalArgumentException("entityMgr must not be null.");
      
      _entityManager = entityMgr;
      // Have to use sessions here
      try
      {
         Session session = (Session) _entityManager.getDelegate();
         _textSession = Search.createFullTextSession(session);
         initialize(null);
      }
      catch (ClassCastException ce)
      {
         // this error supposedly should not get thrown as getDelegate()
         // supposedly always returns a Session, but just in case.
         // (this was from examples on the web and the #hibernate channel so
         // needs to be verified
         ce.printStackTrace();
         // TODO: Wrap it up and do something else with it
         throw ce;
      }
   }

   private void initialize(String[] stopWords)
   {
      if (stopWords != null)
      {
         // first set up the parser with the new stop words
       
         if(UseStemmer)
         {
            _parser = new QueryParser(TEXT_FIELD, new
SnowballAnalyzer("English",stopWords));
         }
         else
         {
            _parser = new QueryParser(TEXT_FIELD, new
StandardAnalyzer(stopWords));
         }
      }
      else
      {  // default stop words
         if(UseStemmer)
         {
            _parser = new QueryParser(TEXT_FIELD, new
SnowballAnalyzer("English"));
         }
         else
         {
            _parser = new QueryParser(TEXT_FIELD, new StandardAnalyzer());
         }
      }
      
      _stopWords = stopWords;
   }

   /***
    * Run the indexer
    */
   public void index()
   {
    //now index
      
      Transaction tx = _textSession.beginTransaction();
      List<Prediction> predictions = _textSession.createQuery("from
Prediction as prediction").list();
      for(Prediction pred: predictions)
      {
         _textSession.index(pred);
      }
      tx.commit(); //index are written at commit time
      _beenIndexed=true;
   }
   
   public List<Prediction> searchPredictions(String queryStr)
   {
      // look in text column of predictions
      try
      {
         if (queryStr != null && !queryStr.equals(""))
         {
            // need to add the field name to the querystring
            queryStr = TEXT_FIELD + ":" + queryStr;
            org.apache.lucene.search.Query luceneQuery =
_parser.parse(queryStr);
            Query query = _textSession.createFullTextQuery(luceneQuery,
                  Prediction.class);
            List<Prediction> ret = query.list();
          //default fits the KEYWORDS similarity
            if (ret != null)
            {
               switch(_defaultSimilarity)
               {

                  case KEYWORDS:
                  case HIGH:
                  case EXPAND:
                  case EXACT:
                 
                     return ret;
                     
               }
               
            }
         }
      }
      catch (Exception pe)
      {// KKG TODO: Turn this into a better exception
         pe.printStackTrace();
      }
      return new ArrayList<Prediction>();
   }
}



-- 
View this message in context: http://www.nabble.com/Help-with-Snowball-Analyzer-tf4497960.html#a12827864
Sent from the Lucene - Java Users mailing list archive at Nabble.com.


---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message