lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ehatc...@apache.org
Subject cvs commit: jakarta-lucene-sandbox/contributions/miscellaneous/src/java/org/apache/lucene/misc HighFreqTerms.java
Date Sun, 25 Jan 2004 13:16:00 GMT
ehatcher    2004/01/25 05:16:00

  Modified:    contributions/miscellaneous/src/java/org/apache/lucene/misc
                        HighFreqTerms.java
  Log:
  #26396 - HighFreqTerms fixup from Jean-Fran├žois Halleux
  
  Revision  Changes    Path
  1.3       +54 -72    jakarta-lucene-sandbox/contributions/miscellaneous/src/java/org/apache/lucene/misc/HighFreqTerms.java
  
  Index: HighFreqTerms.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/miscellaneous/src/java/org/apache/lucene/misc/HighFreqTerms.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- HighFreqTerms.java	6 Jan 2004 01:17:37 -0000	1.2
  +++ HighFreqTerms.java	25 Jan 2004 13:16:00 -0000	1.3
  @@ -3,7 +3,7 @@
   /* ====================================================================
    * The Apache Software License, Version 1.1
    *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * Copyright (c) 2001,2004 The Apache Software Foundation.  All rights
    * reserved.
    *
    * Redistribution and use in source and binary forms, with or without
  @@ -54,10 +54,10 @@
    * <http://www.apache.org/>.
    */
   
  -import org.apache.lucene.util.PriorityQueue;
   import org.apache.lucene.index.IndexReader;
   import org.apache.lucene.index.Term;
   import org.apache.lucene.index.TermEnum;
  +import org.apache.lucene.util.PriorityQueue;
   
   /**
    * <code>HighFreqTerms</code> class extracts terms and their frequencies out
  @@ -65,77 +65,59 @@
    *
    * @version $Id$
    */
  -public class HighFreqTerms
  -{
  -    public static int numTerms = 100;
  -
  -    public static void main(String[] args) throws Exception
  -    {
  -        IndexReader reader = null;
  -        if (args.length == 1)
  -        {
  -            reader = IndexReader.open(args[0]);
  -        }
  -        else
  -        {
  -            usage();
  -            System.exit(1);
  -        }
  -
  -        TermInfoQueue tiq = new TermInfoQueue(numTerms);
  -        TermEnum terms = reader.terms();
  -
  -        int minFreq = 0;
  -        while (terms.next())
  -        {
  -            if (terms.docFreq() > minFreq)
  -            {
  -                tiq.put(new TermInfo(terms.term(), terms.docFreq()));
  -                if (tiq.size() > numTerms) 		     // if tiq overfull
  -                {
  -                    tiq.pop();				     // remove lowest in tiq
  -                    minFreq = ((TermInfo)tiq.top()).docFreq; // reset minFreq
  -                }
  -            }
  -        }
  -
  -        while (tiq.size() != 0)
  -        {
  -            TermInfo termInfo = (TermInfo)tiq.pop();
  -            System.out.println(termInfo.term + " " + termInfo.docFreq);
  -        }
  -
  -        reader.close();
  -    }
  -
  -    private static void usage()
  -    {
  -        System.out.println("\n\n" +
  -            "java org.apache.lucene.misc.HighFreqTerms <index dir>\n\n");
  -    }
  +public class HighFreqTerms {
  +	
  +	// The top numTerms will be displayed
  +	public static final int numTerms = 100;
  +
  +	public static void main(String[] args) throws Exception {
  +		IndexReader reader = null;
  +		if (args.length == 1) {
  +			reader = IndexReader.open(args[0]);
  +		} else {
  +			usage();
  +			System.exit(1);
  +		}
  +
  +		TermInfoQueue tiq = new TermInfoQueue(numTerms);
  +		TermEnum terms = reader.terms();
  +
  +		while (terms.next()) {
  +			tiq.insert(new TermInfo(terms.term(), terms.docFreq()));
  +		}
  +
  +		while (tiq.size() != 0) {
  +			TermInfo termInfo = (TermInfo) tiq.pop();
  +			System.out.println(termInfo.term + " " + termInfo.docFreq);
  +		}
  +
  +		reader.close();
  +	}
  +
  +	private static void usage() {
  +		System.out.println(
  +			"\n\n"
  +				+ "java org.apache.lucene.misc.HighFreqTerms <index dir>\n\n");
  +	}
   }
   
  -final class TermInfo
  -{
  -    TermInfo(Term t, int df)
  -    {
  -        term = t;
  -        docFreq = df;
  -    }
  -    int docFreq;
  -    Term term;
  +final class TermInfo {
  +	TermInfo(Term t, int df) {
  +		term = t;
  +		docFreq = df;
  +	}
  +	int docFreq;
  +	Term term;
   }
   
  -final class TermInfoQueue extends PriorityQueue
  -{
  -    TermInfoQueue(int size)
  -    {
  -        initialize(size);
  -    }
  -    protected final boolean lessThan(Object a, Object b)
  -    {
  -        TermInfo termInfoA = (TermInfo)a;
  -        TermInfo termInfoB = (TermInfo)b;
  -        return termInfoA.docFreq < termInfoB.docFreq;
  -    }
  +final class TermInfoQueue extends PriorityQueue {
  +	TermInfoQueue(int size) {
  +		initialize(size);
  +	}
  +
  +	protected final boolean lessThan(Object a, Object b) {
  +		TermInfo termInfoA = (TermInfo) a;
  +		TermInfo termInfoB = (TermInfo) b;
  +		return termInfoA.docFreq < termInfoB.docFreq;
  +	}
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Mime
View raw message