lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cmarsch...@apache.org
Subject cvs commit: jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher URLLengthFilter.java
Date Tue, 22 Oct 2002 15:17:06 GMT
cmarschner    2002/10/22 08:17:06

  Modified:    contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher
                        URLLengthFilter.java
  Log:
  added logging
  
  Revision  Changes    Path
  1.3       +17 -2     jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLLengthFilter.java
  
  Index: URLLengthFilter.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/URLLengthFilter.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- URLLengthFilter.java	22 May 2002 23:09:17 -0000	1.2
  +++ URLLengthFilter.java	22 Oct 2002 15:17:06 -0000	1.3
  @@ -54,6 +54,8 @@
   
   package de.lanlab.larm.fetcher;
   
  +import de.lanlab.larm.util.*;
  +
   /**
    * kills URLs longer than X characters. Used to prevent endless loops where
    * the page contains the current URL + some extension
  @@ -80,13 +82,24 @@
   
       int maxLength;
   
  +//    URLLengthFilter()
  +//    {
  +//        maxLength = 0;
  +//    }
  +    SimpleLogger log;
   
       /**
        * Constructor for the URLLengthFilter object
        *
        * @param maxLength  max length of the _total_ URL (protocol+host+port+path)
        */
  -    public URLLengthFilter(int maxLength)
  +    public URLLengthFilter(int maxLength, SimpleLogger log)
  +    {
  +        this.maxLength = maxLength;
  +        this.log = log;
  +    }
  +
  +    public void setMaxLength(int maxLength)
       {
           this.maxLength = maxLength;
       }
  @@ -105,6 +118,8 @@
           if (file != null && file.length() > maxLength) // path + query
           {
               filtered++;
  +            //log.log("URLLengthFilter: URL " + m.getUrl() + " exceeds maxLength " + this.maxLength);
  +            log.log(message.toString());
               return null;
           }
           return message;
  
  
  

--
To unsubscribe, e-mail:   <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-dev-help@jakarta.apache.org>


Mime
View raw message