lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Doron Cohen <DOR...@il.ibm.com>
Subject Re: Lucene change field values to wrong ones when indexing
Date Thu, 14 Dec 2006 18:44:06 GMT
Two things I would check:

1) converting pubDate to String during indexing for later
date-range-filtering search results might not work well, because, e.g.,
string wise, "9" > "1000000". You could use Lucene's DateTools - there's an
example in TestDateFilter -
http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestDateFilter.java?view=log

2) the synchronization on lock, protects from closing indexSearcher while
calling indexSearcher.search(). But it is not protecting the fetching of
results. So it is possible that an indexSearcher is closed (by add or
delete) before/while fetching those id fields. Even worse, if another
search arrives (if you have concurrent searches), the later search might
(re)open indexSearcher, and now, if there were deletes in between, you
would definitely fetch different docs because the internal docids have
changed - I think this would explain the change of id field. It would be
probably better to organize the synchronization better, so that an update
would only mark the need in refreshing the searcher, and would not close it
- the searching part would take care of that.

Hope I got this right...
Doron

"Java Programmer" <jprogramista@gmail.com> wrote:

> Hello,
> I have problem with my search code - i try to index some data with
> searching simultanously. Everything goes fine till some number of data
> are indexed then my fields are bugged.
> Eg. I have field with title indexed as "Nowitzki f├╝hrt "Mavs" zum
> ersten Heimsieg" and inner id "15" (not doc id, just field called id).
> At the end of indexing this field disappear, and some other values in
> id field appear. I provide full listing of my program, without AXIS
> part which is responsible for data transmitting. If you can watch my
> code, maybe somewhere is wrong locking mechanism, or any other bug -
> please help me if you can.
>
> import java.io.IOException;
> import java.io.InputStream;
> import java.util.ArrayList;
> import java.util.HashMap;
> import java.util.List;
> import java.util.Map;
> import java.util.Properties;
>
> import javax.servlet.ServletContext;
> import javax.servlet.http.HttpServlet;
>
> import org.apache.axis.MessageContext;
> import org.apache.axis.transport.http.HTTPConstants;
> import org.apache.lucene.analysis.SimpleAnalyzer;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.document.Field;
> import org.apache.lucene.document.Field.Index;
> import org.apache.lucene.document.Field.Store;
> import org.apache.lucene.index.IndexModifier;
> import org.apache.lucene.index.IndexWriter;
> import org.apache.lucene.index.Term;
> import org.apache.lucene.queryParser.MultiFieldQueryParser;
> import org.apache.lucene.queryParser.ParseException;
> import org.apache.lucene.search.BooleanClause;
> import org.apache.lucene.search.BooleanQuery;
> import org.apache.lucene.search.Hits;
> import org.apache.lucene.search.IndexSearcher;
> import org.apache.lucene.search.Query;
> import org.apache.lucene.search.RangeQuery;
> import org.apache.lucene.search.Sort;
>
> public class SNewsSearch {
>
>    private static SNewsSearch search = new SNewsSearch();
>
>    private static IndexSearcher indexSearcher;
>
>    private String indexLocation;
>
>    private String adminLogin;
>
>    private String adminPass;
>
>    private SNewsSearch(){
>       System.out.println("Constructor SNewsSearch");
>
>       Properties props = new Properties();
>       HttpServlet srv = (HttpServlet)
>
MessageContext.getCurrentContext().getProperty(HTTPConstants.MC_HTTP_SERVLET);

>       ServletContext context = srv.getServletContext ();
>       InputStream is = context.getResourceAsStream("/WEB-INF/lucene.
> properties");
>       try {
>          props.load(is);
>          System.out.println("Using index: " + props.
> getProperty("index.location"));
>          this.indexLocation = props.getProperty("index.location");
>          this.adminLogin = props.getProperty("admin.login");
>          this.adminPass = props.getProperty("admin.pass");
>       } catch (IOException e) {
>          e.printStackTrace();
>       }
>    }
>
>    public static SNewsSearch getInstance(){
>       return search;
>    }
>
>    public void setIndexLocation(String indexLocation) {
>       this.indexLocation = indexLocation;
>    }
>
>    private static Object lock = new Object();
>
>    public void addDocument(int id,String title, String body, int
> pubDate) throws IOException{
>       Document doc = new Document();
>       doc.add(new Field("id",Integer.toString(id),Store.YES,Index.
> UN_TOKENIZED));
>       doc.add(new Field("title",title,Store.NO,Index.TOKENIZED));
>       doc.add(new Field("body",body,Store.NO,Index.TOKENIZED));
>       doc.add(new Field("pubDate",Integer.toString(pubDate),Store.
> NO,Index.UN_TOKENIZED));
>       synchronized(lock){
>          IndexWriter indexWriter = new IndexWriter(indexLocation, new
> SimpleAnalyzer(), false);
>          indexWriter.addDocument(doc);
>          indexWriter.close();
>          if(indexSearcher!=null)
>             indexSearcher.close();
>          indexSearcher = null;
>       }
>    }
>
>    public Map<String,Object> search(String query, int pubDate, int page,
> int results, boolean reverse) throws IOException, ParseException{
>       MultiFieldQueryParser queryparser = new MultiFieldQueryParser(new
> String[]{"title","body"}, new SimpleAnalyzer());
>       Query q; // = queryparser.parse(query);
>       if(pubDate!=0){
>          BooleanQuery bq = new BooleanQuery();
>          bq.add(queryparser.parse(query),BooleanClause.Occur.MUST);
>          bq.add(new RangeQuery(
>                new Term("pubDate",Integer.toString(pubDate)),
>                new Term("pubDate","99999999"),
>                true),BooleanClause.Occur.MUST);
>          q = bq;
>       } else {
>          q = queryparser.parse(query);
>       }
>       Hits hits;
>       synchronized(lock){
>          if(indexSearcher==null)
>             indexSearcher = new IndexSearcher(indexLocation);
>
>          if(reverse)
>             hits = indexSearcher.search(q, new Sort("pubDate",true));
>          else
>             hits = indexSearcher.search(q);
>       }
>
>       List<Integer> output = new ArrayList<Integer>();
>       int i = page*results;
>       while(i<hits.length()){
>          if(i >= (page+1)*results) break;
>          Document doc = hits.doc(i);
>          output.add(Integer.parseInt(doc.get("id")));
>          i++;
>       }
>       Map<String,Object> searchResult = new HashMap<String,Object>();
>       searchResult.put("numberOfResults", hits.length());
>       searchResult.put("results", output);
>       return searchResult;
>    }
>
>    public void deleteDocument(int id) throws IOException{
>       synchronized(lock){
>          IndexModifier indexModifier = new IndexModifier(indexLocation,
new
> SimpleAnalyzer(), false);
>          indexModifier.deleteDocuments(new Term("id",
Integer.toString(id)));
>          indexModifier.close();
>          if(indexSearcher!=null)
>             indexSearcher.close();
>          indexSearcher = null;
>       }
>    }
>
>    public void setUp(String adminLogin, String adminPass) throws
IOException {
>       if(adminLogin.equals(this.adminLogin) && adminPass.
> equals(this.adminPass)){
>          synchronized(lock){
>             IndexWriter indexWriter = new IndexWriter(indexLocation, new
> SimpleAnalyzer(), true);
>             indexWriter.close();
>             if(indexSearcher!=null)
>                indexSearcher.close();
>             indexSearcher = null;
>          }
>       }
>    }
> }
>
> Best regards,
> Adrian
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-user-help@lucene.apache.org
>


---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message