lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ehatc...@apache.org
Subject cvs commit: jakarta-lucene-sandbox/contributions/lucli/src/lucli LuceneMethods.java Lucli.java
Date Mon, 26 Jan 2004 01:36:15 GMT
ehatcher    2004/01/25 17:36:15

  Modified:    contributions/lucli/src/lucli LuceneMethods.java Lucli.java
  Log:
  lucli code cleanup and a couple of minor enhancements/fixes
  
  Revision  Changes    Path
  1.2       +282 -273  jakarta-lucene-sandbox/contributions/lucli/src/lucli/LuceneMethods.java
  
  Index: LuceneMethods.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/lucli/src/lucli/LuceneMethods.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- LuceneMethods.java	11 Dec 2003 02:07:45 -0000	1.1
  +++ LuceneMethods.java	26 Jan 2004 01:36:15 -0000	1.2
  @@ -92,281 +92,290 @@
    * Parts addapted from Lucene demo. Various methods that interact with
    * Lucene and provide info about the index, search, etc.
    */
  +
   class LuceneMethods {
   
  -	private int numDocs;
  -	private String indexName; //directory of this index
  -	private long version; //version number of this index
  -	java.util.Iterator fieldIterator;
  -	Vector fields; //Fields as a vector
  -	Vector indexedFields; //Fields as a vector
  -	String fieldsArray[]; //Fields as an array
  -	Searcher searcher;
  -	Query query; //current query string
  -
  -	public LuceneMethods(String index) {
  -		indexName = index;
  -		message("Lucene CLI. Using directory:" + indexName);
  -	}
  -
  -
  -	public void info() throws java.io.IOException {
  -		IndexReader indexReader = IndexReader.open(indexName);
  -
  -
  -		getFieldInfo();
  -		numDocs= indexReader.numDocs();
  -		message("Index has " + numDocs + " documents ");
  -		message ("All Fields:" + fields.toString());
  -		message ("Indexed Fields:" + indexedFields.toString());
  -
  -		if (IndexReader.isLocked(indexName)) {
  -			message("Index is locked");
  -		}
  -		//IndexReader.getCurrentVersion(indexName);
  -		//System.out.println("Version:" + version);
  -
  -		indexReader.close();
  -	}
  -
  -
  -	public void search(String queryString, boolean explain, boolean showTokens) throws java.io.IOException,
org.apache.lucene.queryParser.ParseException {
  -		BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
  -		Hits hits = initSearch(queryString);
  -		System.out.println(hits.length() + " total matching documents");
  -		Query explainQuery;
  -		if (explain) {
  -			query = explainQuery(queryString);
  -		}
  -
  -
  -		final int HITS_PER_PAGE = 10;
  -		message ("--------------------------------------");
  -		for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
  -			int end = Math.min(hits.length(), start + HITS_PER_PAGE);
  -			for (int ii = start; ii < end; ii++) {
  -				Document doc = hits.doc(ii);
  -				message ("---------------- " + ii + " score:" + hits.score(ii) + "---------------------");
  -				printHit(doc);
  -				if (showTokens) {
  -					invertDocument(doc);
  -				}
  -				if (explain) {
  -					Explanation exp = searcher.explain(query, hits.id(ii));
  -					message("Explanation:" + exp.toString());
  -				}
  -			}
  -			message ("#################################################");
  -
  -			if (hits.length() > end) {
  -				System.out.print("more (y/n) ? ");
  -				queryString = in.readLine();
  -				if (queryString.length() == 0 || queryString.charAt(0) == 'n')
  -					break;
  -			}
  -		}
  -		searcher.close();
  -	}
  -
  -	private void printHit(Document doc) {
  -		for (int ii= 0; ii < fieldsArray.length; ii++) {
  -			String currField = fieldsArray[ii];
  -			String result = doc.get(currField);
  -			message(currField + ":" + result);
  -		}
  -		//another option is to just do message(doc);
  -	}
  -
  -	public void optimize () throws IOException{
  -		//open the index writer. False: don't create a new one
  -		IndexWriter indexWriter = new IndexWriter(indexName,  new StandardAnalyzer(), false);
  -		message("Starting to optimize index.");
  -		long start = System.currentTimeMillis();
  -		indexWriter.optimize();
  -		message("Done optimizing index. Took " + (System.currentTimeMillis() - start) + " msecs");
  -		indexWriter.close();
  -	}
  -
  -
  -	private Query explainQuery(String queryString) throws IOException, ParseException {
  -
  -		searcher = new IndexSearcher(indexName);
  -		Analyzer analyzer = new StandardAnalyzer();
  -		getFieldInfo();
  -
  -		BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
  -
  -		MultiFieldQueryParser parser = new  MultiFieldQueryParser(queryString, analyzer);
  -
  -		int arraySize = indexedFields.size();
  -		String indexedArray[] = new String[arraySize];
  -		for (int ii = 0; ii < arraySize; ii++) {
  -			indexedArray[ii] = (String) indexedFields.get(ii);
  -		}
  -		query = parser.parse(queryString, indexedArray, analyzer);
  -		System.out.println("Searching for: " + query.toString());
  -		return (query);
  -
  -	}
  -	private Hits initSearch(String queryString) throws IOException, ParseException {
  -
  -		searcher = new IndexSearcher(indexName);
  -		Analyzer analyzer = new StandardAnalyzer();
  -		getFieldInfo();
  -
  -		BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
  -
  -		MultiFieldQueryParser parser = new  MultiFieldQueryParser(queryString, analyzer);
  -
  -		int arraySize = fields.size();
  -		fieldsArray = new String[arraySize];
  -		for (int ii = 0; ii < arraySize; ii++) {
  -			fieldsArray[ii] = (String) fields.get(ii);
  -		}
  -		query = parser.parse(queryString, fieldsArray, analyzer);
  -		System.out.println("Searching for: " + query.toString());
  -		Hits hits = searcher.search(query);
  -		return (hits);
  -
  -	}
  -
  -	public void count(String queryString) throws java.io.IOException, ParseException {
  -		Hits hits = initSearch(queryString);
  -		System.out.println(hits.length() + " total documents");
  -		searcher.close();
  -	}
  -
  -	static public void message (String s) {
  -		System.out.println(s);
  -	}
  -
  -	private void getFieldInfo() throws IOException {
  -		IndexReader indexReader = IndexReader.open(indexName);
  -		fields = new Vector();
  -		indexedFields = new Vector();
  -
  -		//get the list of all field names
  -		fieldIterator = indexReader.getFieldNames().iterator();
  -		while (fieldIterator.hasNext()) {
  -			Object field = fieldIterator.next();
  -			if (field != null && !field.equals(""))
  -				fields.add(field.toString());
  -		}
  -		//
  -		//get the list of indexed field names
  -		fieldIterator = indexReader.getFieldNames(true).iterator();
  -		while (fieldIterator.hasNext()) {
  -			Object field = fieldIterator.next();
  -			if (field != null && !field.equals(""))
  -				indexedFields.add(field.toString());
  -		}
  -		indexReader.close();
  -	}
  -
  -
  -	// Copied from DocumentWriter
  -	// Tokenizes the fields of a document into Postings.
  -	private void invertDocument(Document doc)
  -		throws IOException {
  -
  -		Hashtable tokenHash = new Hashtable();
  -		final int maxFieldLength = 10000;
  -
  -		Analyzer analyzer = new StandardAnalyzer();
  -		Enumeration fields = doc.fields();
  -		while (fields.hasMoreElements()) {
  -			Field field = (Field) fields.nextElement();
  -			String fieldName = field.name();
  -
  -
  -			if (field.isIndexed()) {
  -				if (field.isTokenized()) {     // un-tokenized field
  -					Reader reader;        // find or make Reader
  -					if (field.readerValue() != null)
  -						reader = field.readerValue();
  -					else if (field.stringValue() != null)
  -						reader = new StringReader(field.stringValue());
  -					else
  -						throw new IllegalArgumentException
  -							("field must have either String or Reader value");
  -
  -					int position = 0;
  -					// Tokenize field and add to postingTable
  -					TokenStream stream = analyzer.tokenStream(fieldName, reader);
  -					try {
  -						for (Token t = stream.next(); t != null; t = stream.next()) {
  -							position += (t.getPositionIncrement() - 1);
  -							position++;
  -							String name = t.termText();
  -							Integer Count = (Integer)tokenHash.get(name);
  -							if (Count == null) { // not in there yet
  -								tokenHash.put(name, new Integer(1)); //first one
  -							} else {
  -								int count = Count.intValue();
  -								tokenHash.put(name, new Integer (count+1));
  -							}
  -							if (position > maxFieldLength) break;
  -						}
  -					} finally {
  -						stream.close();
  -					}
  -				}
  -
  -			}
  -		}
  -		Entry[] sortedHash = getSortedHashtableEntries(tokenHash);
  -		for (int ii = 0; ii < sortedHash.length && ii < 10; ii ++) {
  -			Entry currentEntry = sortedHash[ii];
  -			message((ii + 1) + ":" + currentEntry.getKey() + " " + currentEntry.getValue());
  -		}
  -	}
  -
  -
  -	/** Provides a list of the top terms of the index.
  -	 *
  -	 * @param field  - the name of the command or null for all of them.
  -	 */
  -	public void terms(String field) throws IOException {
  -		TreeMap termMap = new TreeMap();
  -		IndexReader indexReader = IndexReader.open(indexName);
  -		TermEnum terms = indexReader.terms();
  -		while (terms.next()) {
  -			Term term = terms.term();
  -			//message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
  -			//if we're either not looking by field or we're matching the specific field
  -			if ((field == null) || field.equals(term.field()))
  -				termMap.put(new Integer((0 - terms.docFreq())), term.field() + ":" + term.text());
  -		}
  -
  -		Iterator termIterator = termMap.keySet().iterator();
  -		for (int ii=0; termIterator.hasNext() && ii < 100; ii++) {
  -			Integer termFreq = (Integer) termIterator.next();
  -			String termDetails = (String) termMap.get(termFreq);
  -			message(termDetails + ": " + termFreq);
  -		}
  -		indexReader.close();
  -	}
  -
  -	/** Sort Hashtable values
  -	 * @param h the hashtable we're sorting
  -	 * from http://developer.java.sun.com/developer/qow/archive/170/index.jsp
  -	 */
  -
  -	public static Entry[]
  -		getSortedHashtableEntries(Hashtable h) {
  -			Set set = h.entrySet();
  -			Entry [] entries =
  -				(Entry[])set.toArray(
  -																 new Entry[set.size()]);
  -			Arrays.sort(entries, new Comparator() {
  -				public int compare(Object o1, Object o2) {
  -					Object v1 = ((Entry)o1).getValue();
  -					Object v2 = ((Entry)o2).getValue();
  -					return ((Comparable)v2).compareTo(v1); //descending order
  -				}
  -			});
  -			return entries;
  -		}
  +  private int numDocs;
  +  private String indexName; //directory of this index
  +  private long version; //version number of this index
  +  java.util.Iterator fieldIterator;
  +  Vector fields; //Fields as a vector
  +  Vector indexedFields; //Fields as a vector
  +  String fieldsArray[]; //Fields as an array
  +  Searcher searcher;
  +  Query query; //current query string
  +
  +  public LuceneMethods(String index) {
  +    indexName = index;
  +    message("Lucene CLI. Using directory:" + indexName);
  +  }
  +
  +
  +  public void info() throws java.io.IOException {
  +    IndexReader indexReader = IndexReader.open(indexName);
  +
  +
  +    getFieldInfo();
  +    numDocs = indexReader.numDocs();
  +    message("Index has " + numDocs + " documents ");
  +    message("All Fields:" + fields.toString());
  +    message("Indexed Fields:" + indexedFields.toString());
  +
  +    if (IndexReader.isLocked(indexName)) {
  +      message("Index is locked");
  +    }
  +    //IndexReader.getCurrentVersion(indexName);
  +    //System.out.println("Version:" + version);
  +
  +    indexReader.close();
  +  }
  +
  +
  +  public void search(String queryString, boolean explain, boolean showTokens) throws java.io.IOException,
org.apache.lucene.queryParser.ParseException {
  +    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
  +    Hits hits = initSearch(queryString);
  +    System.out.println(hits.length() + " total matching documents");
  +    if (explain) {
  +      query = explainQuery(queryString);
  +    }
  +
  +
  +    final int HITS_PER_PAGE = 10;
  +    message("--------------------------------------");
  +    for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
  +      int end = Math.min(hits.length(), start + HITS_PER_PAGE);
  +      for (int ii = start; ii < end; ii++) {
  +        Document doc = hits.doc(ii);
  +        message("---------------- " + (ii + 1) + " score:" + hits.score(ii) + "---------------------");
  +        printHit(doc);
  +        if (showTokens) {
  +          invertDocument(doc);
  +        }
  +        if (explain) {
  +          Explanation exp = searcher.explain(query, hits.id(ii));
  +          message("Explanation:" + exp.toString());
  +        }
  +      }
  +      message("#################################################");
  +
  +      if (hits.length() > end) {
  +        System.out.print("more (y/n) ? ");
  +        queryString = in.readLine();
  +        if (queryString.length() == 0 || queryString.charAt(0) == 'n')
  +          break;
  +      }
  +    }
  +    searcher.close();
  +  }
  +
  +  /**
  +   * @todo Allow user to specify what field(s) to display
  +   */
  +  private void printHit(Document doc) {
  +    for (int ii = 0; ii < fieldsArray.length; ii++) {
  +      String currField = fieldsArray[ii];
  +      String[] result = doc.getValues(currField);
  +      for (int i = 0; i < result.length; i++) {
  +        message(currField + ":" + result[i]);
  +      }
  +    }
  +    //another option is to just do message(doc);
  +  }
  +
  +  public void optimize() throws IOException {
  +    //open the index writer. False: don't create a new one
  +    IndexWriter indexWriter = new IndexWriter(indexName, new StandardAnalyzer(), false);
  +    message("Starting to optimize index.");
  +    long start = System.currentTimeMillis();
  +    indexWriter.optimize();
  +    message("Done optimizing index. Took " + (System.currentTimeMillis() - start) + " msecs");
  +    indexWriter.close();
  +  }
  +
  +
  +  private Query explainQuery(String queryString) throws IOException, ParseException {
  +
  +    searcher = new IndexSearcher(indexName);
  +    Analyzer analyzer = new StandardAnalyzer();
  +    getFieldInfo();
  +
  +    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
  +
  +    MultiFieldQueryParser parser = new MultiFieldQueryParser(queryString, analyzer);
  +
  +    int arraySize = indexedFields.size();
  +    String indexedArray[] = new String[arraySize];
  +    for (int ii = 0; ii < arraySize; ii++) {
  +      indexedArray[ii] = (String) indexedFields.get(ii);
  +    }
  +    query = parser.parse(queryString, indexedArray, analyzer);
  +    System.out.println("Searching for: " + query.toString());
  +    return (query);
  +
  +  }
  +
  +  /**
  +   * @todo Allow user to specify analyzer
  +   */
  +  private Hits initSearch(String queryString) throws IOException, ParseException {
  +
  +    searcher = new IndexSearcher(indexName);
  +    Analyzer analyzer = new StandardAnalyzer();
  +    getFieldInfo();
  +
  +    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
  +
  +    MultiFieldQueryParser parser = new MultiFieldQueryParser(queryString, analyzer);
  +
  +    int arraySize = fields.size();
  +    fieldsArray = new String[arraySize];
  +    for (int ii = 0; ii < arraySize; ii++) {
  +      fieldsArray[ii] = (String) fields.get(ii);
  +    }
  +    query = parser.parse(queryString, fieldsArray, analyzer);
  +    System.out.println("Searching for: " + query.toString());
  +    Hits hits = searcher.search(query);
  +    return (hits);
  +
  +  }
  +
  +  public void count(String queryString) throws java.io.IOException, ParseException {
  +    Hits hits = initSearch(queryString);
  +    System.out.println(hits.length() + " total documents");
  +    searcher.close();
  +  }
  +
  +  static public void message(String s) {
  +    System.out.println(s);
  +  }
  +
  +  private void getFieldInfo() throws IOException {
  +    IndexReader indexReader = IndexReader.open(indexName);
  +    fields = new Vector();
  +    indexedFields = new Vector();
  +
  +    //get the list of all field names
  +    fieldIterator = indexReader.getFieldNames().iterator();
  +    while (fieldIterator.hasNext()) {
  +      Object field = fieldIterator.next();
  +      if (field != null && !field.equals(""))
  +        fields.add(field.toString());
  +    }
  +    //
  +    //get the list of indexed field names
  +    fieldIterator = indexReader.getFieldNames(true).iterator();
  +    while (fieldIterator.hasNext()) {
  +      Object field = fieldIterator.next();
  +      if (field != null && !field.equals(""))
  +        indexedFields.add(field.toString());
  +    }
  +    indexReader.close();
  +  }
  +
  +
  +  // Copied from DocumentWriter
  +  // Tokenizes the fields of a document into Postings.
  +  private void invertDocument(Document doc)
  +    throws IOException {
  +
  +    Hashtable tokenHash = new Hashtable();
  +    final int maxFieldLength = 10000;
  +
  +    Analyzer analyzer = new StandardAnalyzer();
  +    Enumeration fields = doc.fields();
  +    while (fields.hasMoreElements()) {
  +      Field field = (Field) fields.nextElement();
  +      String fieldName = field.name();
  +
  +
  +      if (field.isIndexed()) {
  +        if (field.isTokenized()) {     // un-tokenized field
  +          Reader reader;        // find or make Reader
  +          if (field.readerValue() != null)
  +            reader = field.readerValue();
  +          else if (field.stringValue() != null)
  +            reader = new StringReader(field.stringValue());
  +          else
  +            throw new IllegalArgumentException
  +              ("field must have either String or Reader value");
  +
  +          int position = 0;
  +          // Tokenize field and add to postingTable
  +          TokenStream stream = analyzer.tokenStream(fieldName, reader);
  +          try {
  +            for (Token t = stream.next(); t != null; t = stream.next()) {
  +              position += (t.getPositionIncrement() - 1);
  +              position++;
  +              String name = t.termText();
  +              Integer Count = (Integer) tokenHash.get(name);
  +              if (Count == null) { // not in there yet
  +                tokenHash.put(name, new Integer(1)); //first one
  +              } else {
  +                int count = Count.intValue();
  +                tokenHash.put(name, new Integer(count + 1));
  +              }
  +              if (position > maxFieldLength) break;
  +            }
  +          } finally {
  +            stream.close();
  +          }
  +        }
  +
  +      }
  +    }
  +    Entry[] sortedHash = getSortedHashtableEntries(tokenHash);
  +    for (int ii = 0; ii < sortedHash.length && ii < 10; ii++) {
  +      Entry currentEntry = sortedHash[ii];
  +      message((ii + 1) + ":" + currentEntry.getKey() + " " + currentEntry.getValue());
  +    }
  +  }
  +
  +
  +  /** Provides a list of the top terms of the index.
  +   *
  +   * @param field  - the name of the command or null for all of them.
  +   */
  +  public void terms(String field) throws IOException {
  +    TreeMap termMap = new TreeMap();
  +    IndexReader indexReader = IndexReader.open(indexName);
  +    TermEnum terms = indexReader.terms();
  +    while (terms.next()) {
  +      Term term = terms.term();
  +      //message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
  +      //if we're either not looking by field or we're matching the specific field
  +      if ((field == null) || field.equals(term.field()))
  +        termMap.put(term.field() + ":" + term.text(), new Integer((terms.docFreq())));
  +    }
  +
  +    Iterator termIterator = termMap.keySet().iterator();
  +    for (int ii = 0; termIterator.hasNext() && ii < 100; ii++) {
  +      String termDetails = (String) termIterator.next();
  +      Integer termFreq = (Integer) termMap.get(termDetails);
  +      message(termDetails + ": " + termFreq);
  +    }
  +    indexReader.close();
  +  }
  +
  +  /** Sort Hashtable values
  +   * @param h the hashtable we're sorting
  +   * from http://developer.java.sun.com/developer/qow/archive/170/index.jsp
  +   */
  +
  +  public static Entry[]
  +    getSortedHashtableEntries(Hashtable h) {
  +    Set set = h.entrySet();
  +    Entry[] entries =
  +      (Entry[]) set.toArray(
  +        new Entry[set.size()]);
  +    Arrays.sort(entries, new Comparator() {
  +      public int compare(Object o1, Object o2) {
  +        Object v1 = ((Entry) o1).getValue();
  +        Object v2 = ((Entry) o2).getValue();
  +        return ((Comparable) v2).compareTo(v1); //descending order
  +      }
  +    });
  +    return entries;
  +  }
   
   }
   
  
  
  
  1.3       +2 -2      jakarta-lucene-sandbox/contributions/lucli/src/lucli/Lucli.java
  
  Index: Lucli.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/lucli/src/lucli/Lucli.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- Lucli.java	25 Jan 2004 19:42:21 -0000	1.2
  +++ Lucli.java	26 Jan 2004 01:36:15 -0000	1.3
  @@ -69,7 +69,7 @@
   public class Lucli {
   
   	final static String DEFAULT_INDEX = "index"; //directory "index" under the current directory
  -	final static String HISTORYFILE = ".lucli"; //directory "index" under the current directory
  +	final static String HISTORYFILE = ".lucli"; //history file in user's home directory
   	public final static int MAX_TERMS = 100; //Maximum number of terms we're going to show
   
   	// List of commands
  @@ -352,7 +352,7 @@
   	}
   
   	private void usage() {
  -		message("Usage: lucli [-j]");
  +		message("Usage: lucli [-r]");
   		message("Arguments:");
   		message("\t-r: Provide tab completion and history using the GNU readline shared library
");
   	}
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message