lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Erick Erickson" <erickerick...@gmail.com>
Subject Re: Problem with Escape characters in lucene demo search
Date Thu, 20 Dec 2007 00:41:04 GMT
I'm pretty sure StandardAnalyzer is stripping these off. Have you looked
in your index to see if "longin" (with quotes) is even in there? I think
you'll find that it's not.

Best
Erick

On Dec 19, 2007 5:07 PM, Baljeet Dhaliwal <bsd@sfu.ca> wrote:

> Hi there
>
>
>
> I am having a problem in using escape characters with lucene demo code. I
> used the following code for IndexFiles and SearchFiles. The code works
> fine
> for regular searching and also with wildcard. However, when I try to use
> escape characters, somehow it strips them off. For example: when i search
> for  "login" through "\"login\"" or for that matter try to use any escape
> character, lucene strips the escape characters and looks for the text only
> (login as against "login")
>
>
>
> Can someone help please.
>
>
>
> Cheers
>
> B
>
>
>
>
>
> IndexFiles.java
>
>
>
> package org.apache.lucene.demo;
>
>
>
> /**
>
>  * Licensed to the Apache Software Foundation (ASF) under one or more
>
>  * contributor license agreements.  See the NOTICE file distributed with
>
>  * this work for additional information regarding copyright ownership.
>
>  * The ASF licenses this file to You under the Apache License, Version 2.0
>
>  * (the "License"); you may not use this file except in compliance with
>
>  * the License.  You may obtain a copy of the License at
>
>  *
>
>  *     http://www.apache.org/licenses/LICENSE-2.0
>
>  *
>
>  * Unless required by applicable law or agreed to in writing, software
>
>  * distributed under the License is distributed on an "AS IS" BASIS,
>
>  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
>
>  * See the License for the specific language governing permissions and
>
>  * limitations under the License.
>
>  */
>
>
>
> import org.apache.lucene.analysis.standard.StandardAnalyzer;
>
> import org.apache.lucene.document.Document;
>
> import org.apache.lucene.index.IndexWriter;
>
>
>
> import java.io.BufferedReader;
>
> import java.io.File;
>
> import java.io.FileNotFoundException;
>
> import java.io.IOException;
>
> import java.io.InputStreamReader;
>
> import java.util.Date;
>
>
>
> /** Index all text files under a directory. */
>
> public class IndexFiles {
>
>
>
>  private IndexFiles() {}
>
>
>
>  static final File INDEX_DIR = new File("index");
>
>
>
>  /** Index all text files under a directory. */
>
>  public static void main(String[] args) {
>
>    String usage = "java org.apache.lucene.demo.IndexFiles
> <root_directory>";
>
>    /*if (args.length == 0) {
>
>      System.err.println("Usage: " + usage);
>
>      System.exit(1);
>
>    }*/
>
>
>
>    /*if (INDEX_DIR.exists()) {
>
>      System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory,
> please delete it first");
>
>      System.exit(1);
>
>    }*/
>
>
>
>    //final File docDir = new File(args[0]);
>
>    //File docDir = new File(args[0]);
>
>    File docDir = new File("F:/bsd-logs/logs");
>
>    if (!docDir.exists() || !docDir.canRead()) {
>
>      System.out.println("Document directory '" +docDir.getAbsolutePath()+
> "' does not exist or is not readable, please check the path");
>
>      System.exit(1);
>
>    }
>
>
>
>    Date start = new Date();
>
>    try {
>
>      System.out.println("Indexing to directory '" +INDEX_DIR+ "'...");
>
>      BufferedReader in = null;
>
>      int i = 0;
>
>      while (i==0){
>
>          IndexWriter writer = new IndexWriter(INDEX_DIR, new
> StandardAnalyzer(), true);
>
>
>
>                      indexDocs(writer, docDir);
>
>                      System.out.println("Optimizing...");
>
>                      writer.optimize();
>
>                      in = new BufferedReader(new
> InputStreamReader(System.in, "UTF-8"));
>
>                      i=1;
>
>                      /*System.out.println("Enter another directory: ");
>
>
>
>                        docDir = new File(in.readLine());
>
>                        writer.close();
>
>                        String args1[] =
> {"\\users\\baljeet\\documents\\rees\\programs\\
> <file:///\\users\baljeet\documents\rees\programs\> "};
>
>                        DeleteFiles.main(args1);*/
>
>     }
>
>
>
>
>
>
>
>      Date end = new Date();
>
>      System.out.println(end.getTime() - start.getTime() + " total
> milliseconds");
>
>
>
>    } catch (IOException e) {
>
>      System.out.println(" caught a " + e.getClass() +
>
>       "\n with message: " + e.getMessage());
>
>    }
>
>  }
>
>
>
>  static void indexDocs(IndexWriter writer, File file)
>
>    throws IOException {
>
>    // do not try to index files that cannot be read
>
>    if (file.canRead()) {
>
>      if (file.isDirectory()) {
>
>        String[] files = file.list();
>
>        // an IO error could occur
>
>        if (files != null) {
>
>          for (int i = 0; i < files.length; i++) {
>
>            indexDocs(writer, new File(file, files[i]));
>
>          }
>
>        }
>
>      } else {
>
>        System.out.println("adding " + file);
>
>        try {
>
>                Document doc = FileDocument.Document(file);
>
>                System.out.println("Path = "+doc.getField("path"));
>
>          writer.addDocument(FileDocument.Document(file));
>
>
>
>        }
>
>        // at least on windows, some temporary files raise this exception
> with an "access denied" message
>
>        // checking if the file can be read doesn't help
>
>        catch (FileNotFoundException fnfe) {
>
>          ;
>
>        }
>
>      }
>
>    }
>
>  }
>
>
>
> }
>
>
>
>
>
>
>
> SearchFiles.java
>
>
>
>
>
> package org.apache.lucene.demo;
>
>
>
> /**
>
>  * Licensed to the Apache Software Foundation (ASF) under one or more
>
>  * contributor license agreements.  See the NOTICE file distributed with
>
>  * this work for additional information regarding copyright ownership.
>
>  * The ASF licenses this file to You under the Apache License, Version 2.0
>
>  * (the "License"); you may not use this file except in compliance with
>
>  * the License.  You may obtain a copy of the License at
>
>  *
>
>  *     http://www.apache.org/licenses/LICENSE-2.0
>
>  *
>
>  * Unless required by applicable law or agreed to in writing, software
>
>  * distributed under the License is distributed on an "AS IS" BASIS,
>
>  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
>
>  * See the License for the specific language governing permissions and
>
>  * limitations under the License.
>
>  */
>
>
>
> import org.apache.lucene.analysis.Analyzer;
>
> import org.apache.lucene.analysis.standard.StandardAnalyzer;
>
> import org.apache.lucene.document.Document;
>
> import org.apache.lucene.index.FilterIndexReader;
>
> import org.apache.lucene.index.IndexReader;
>
> import org.apache.lucene.queryParser.QueryParser;
>
> import org.apache.lucene.search.Hits;
>
> import org.apache.lucene.search.IndexSearcher;
>
> import org.apache.lucene.search.Query;
>
> import org.apache.lucene.search.Searcher;
>
>
>
> import java.io.BufferedReader;
>
> import java.io.FileReader;
>
> import java.io.IOException;
>
> import java.io.InputStreamReader;
>
> import java.util.Collection;
>
> import java.util.Date;
>
> import java.util.Hashtable;
>
> import java.util.Iterator;
>
>
>
> /** Simple command-line based search demo. */
>
> public class SearchFiles {
>
>
>
>  /** Use the norms from one field for all fields.  Norms are read into
> memory,
>
>   * using a byte of memory per document per searched field.  This can
> cause
>
>   * search of large collections with a large number of fields to run out
> of
>
>   * memory.  If all of the fields contain only a single token, then the
> norms
>
>   * are all identical, then single norm vector may be shared. */
>
>  public static class OneNormsReader extends FilterIndexReader {
>
>    private String field;
>
>
>
>    public OneNormsReader(IndexReader in, String field) {
>
>      super(in);
>
>      this.field = field;
>
>    }
>
>
>
>    public byte[] norms(String field) throws IOException {
>
>      return in.norms(this.field);
>
>    }
>
>  }
>
>
>
>  private SearchFiles() {}
>
>
>
>  /** Simple command-line based search demo. */
>
>  public static void main(String[] args) throws Exception {
>
>    String usage =
>
>      "Usage: java org.apache.lucene.demo.SearchFiles [-index dir] [-field
> f] [-repeat n] [-queries file] [-raw] [-norms field]";
>
>    if (args.length > 0 && ("-h".equals(args[0]) ||
> "-help".equals(args[0]))) {
>
>      System.out.println(usage);
>
>      System.exit(0);
>
>    }
>
>
>
>    String index = "index";
>
>    String field = "contents";
>
>    String queries = null;
>
>    int repeat = 0;
>
>    boolean raw = false;
>
>    String normsField = null;
>
>
>
>    for (int i = 0; i < args.length; i++) {
>
>      if ("-index".equals(args[i])) {
>
>        index = args[i+1];
>
>        i++;
>
>      } else if ("-field".equals(args[i])) {
>
>        field = args[i+1];
>
>        i++;
>
>      } else if ("-queries".equals(args[i])) {
>
>        queries = args[i+1];
>
>        i++;
>
>      } else if ("-repeat".equals(args[i])) {
>
>        repeat = Integer.parseInt(args[i+1]);
>
>        i++;
>
>      } else if ("-raw".equals(args[i])) {
>
>        raw = true;
>
>      } else if ("-norms".equals(args[i])) {
>
>        normsField = args[i+1];
>
>        i++;
>
>      }
>
>    }
>
>
>
>    IndexReader reader = IndexReader.open(index);
>
>
>
>    if (normsField != null)
>
>      reader = new OneNormsReader(reader, normsField);
>
>
>
>    Collection<String> h =
> reader.getFieldNames(IndexReader.FieldOption.INDEXED);
>
>    Iterator<String> it = h.iterator();
>
>    while (it.hasNext()){
>
>                System.out.println("Iterator Value = "+it.next());
>
>
>
>    }
>
>
>
>
>
>
>
>    Searcher searcher = new IndexSearcher(reader);
>
>    Analyzer analyzer = new StandardAnalyzer();
>
>
>
>    BufferedReader in = null;
>
>    if (queries != null) {
>
>      in = new BufferedReader(new FileReader(queries));
>
>    } else {
>
>      in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
>
>    }
>
>      QueryParser parser = new QueryParser(field, analyzer);
>
>    while (true) {
>
>      if (queries == null)                        // prompt the user
>
>        System.out.println("Enter query: ");
>
>
>
>      String line = in.readLine();
>
>
>
>      if (line == null || line.length() == -1)
>
>        break;
>
>
>
>      line = line.trim();
>
>      if (line.length() == 0)
>
>        break;
>
>      System.out.println("line 1= "+line);
>
>     // line = QueryParser.escape(line);
>
>      System.out.println("line 2= "+line);
>
>      Query query = parser.parse(line);
>
>
>
>      System.out.println("Searching for: " + query.toString(field));
>
>
>
>      Hits hits = searcher.search(query);
>
>
>
>
>
>      if (repeat > 0) {                           // repeat & time as
> benchmark
>
>        Date start = new Date();
>
>        for (int i = 0; i < repeat; i++) {
>
>          hits = searcher.search(query);
>
>        }
>
>        Date end = new Date();
>
>        System.out.println("Time: "+(end.getTime()-start.getTime())+"ms");
>
>      }
>
>
>
>
>
>      System.out.println(hits.length() + " total matching documents");
>
>
>
>      final int HITS_PER_PAGE = 10;
>
>      for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
>
>        int end = Math.min(hits.length(), start + HITS_PER_PAGE);
>
>        for (int i = start; i < end; i++) {
>
>
>
>          if (raw) {                              // output raw format
>
>            System.out.println("doc="+hits.id(i)+" score="+hits.score(i));
>
>            continue;
>
>          }
>
>
>
>          Document doc = hits.doc(i);
>
>          String path = doc.get("path");
>
>          if (path != null) {
>
>            System.out.println((i+1) + ". " + path);
>
>            //getSearchedLine(path, "kvoll");
>
>            String title = doc.get("title");
>
>            if (title != null) {
>
>              System.out.println("   Title: " + doc.get("title"));
>
>            }
>
>          } else {
>
>            System.out.println((i+1) + ". " + "No path for this document");
>
>          }
>
>        }
>
>
>
>        if (queries != null)                      // non-interactive
>
>          break;
>
>
>
>        if (hits.length() > end) {
>
>          System.out.println("more (y/n) ? ");
>
>          line = in.readLine();
>
>          if (line.length() == 0 || line.charAt(0) == 'n')
>
>            break;
>
>        }
>
>      }
>
>    }
>
>    reader.close();
>
>  }
>
> }
>
>
>
>

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message