lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cmarsch...@apache.org
Subject cvs commit: jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util WebDocument.java
Date Tue, 18 Jun 2002 00:46:35 GMT
cmarschner    2002/06/17 17:46:35

  Modified:    contributions/webcrawler-LARM/src/de/lanlab/larm/util
                        WebDocument.java
  Log:
  changed web doc. to field/value pairs
  
  Revision  Changes    Path
  1.5       +47 -5     jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/WebDocument.java
  
  Index: WebDocument.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/util/WebDocument.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- WebDocument.java	17 Jun 2002 14:16:12 -0000	1.4
  +++ WebDocument.java	18 Jun 2002 00:46:35 -0000	1.5
  @@ -56,6 +56,9 @@
   
   
   import java.net.URL;
  +import java.util.HashMap;
  +import java.util.Date;
  +import java.util.Set;
   import de.lanlab.larm.fetcher.URLMessage;
   import de.lanlab.larm.net.HostManager;
   
  @@ -65,20 +68,55 @@
   public class WebDocument extends URLMessage
   {
       protected String mimeType;
  -    protected byte[] document;
  +    // protected byte[] document;
       protected int resultCode;
       protected int size;
       protected String title;
  +    protected Date lastModified;
  +    HashMap fields;
   
  -    public  WebDocument(URL url, String mimeType, byte[] document, int resultCode, URL
referer, int size, String title, HostManager hm)
  +    public  WebDocument(URL url, String mimeType, int resultCode, URL referer, int size,
String title, Date lastModified, HostManager hm)
       {
           super(url, referer, false, null, hm);
           this.url = url;
           this.mimeType = mimeType;
  -        this.document = document;
  +        //this.document = document;
           this.resultCode = resultCode;
           this.size = size;
           this.title = title;
  +        this.lastModified = lastModified;
  +        this.fields = new HashMap(7);       // expect ~4 fields
  +    }
  +
  +    public Set getFieldNames()
  +    {
  +        return fields.keySet();
  +    }
  +
  +    public Object getField(String name)
  +    {
  +        return fields.get(name);
  +    }
  +
  +    public void addField(String name, Object value)
  +    {
  +        fields.put(name, value);
  +    }
  +
  +    public void removeField(String name)
  +    {
  +        fields.remove(name);
  +    }
  +
  +    public int getNumFields()
  +    {
  +        return fields.size();
  +    }
  +
  +
  +    public Date getLastModified()
  +    {
  +        return lastModified;
       }
   
       public String getTitle()
  @@ -101,11 +139,13 @@
           this.size = size;
       }
   
  -
  +/*
       public void setDocument(byte[] document)
       {
           this.document = document;
       }
  +*/
  +
       public int getResultCode()
       {
           return resultCode;
  @@ -116,10 +156,12 @@
           this.resultCode = resultCode;
       }
   
  +/*
       public byte[] getDocumentBytes()
       {
           return this.document;
       }
  +*/
   
       public void setUrl(URL url)
       {
  @@ -142,7 +184,7 @@
           this.resultCode + "\t" +
           this.mimeType + "\t" +
           this.size + "\t" +
  -        "\"" + this.title.replace('\t',' ').replace('\"', (char)0xff ).replace('\n',' ').replace('\r','
') + "\"";
  +        "\"" + this.title.replace('\t',' ').replace('\"', (char)0xff ).replace('\n',' ').replace('\r','
') + "\"\t" + (this.lastModified != null ? java.text.DateFormat.getDateTimeInstance(java.text.DateFormat.SHORT,
java.text.DateFormat.SHORT).format(this.lastModified) : "");
       }
   
   
  
  
  

--
To unsubscribe, e-mail:   <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-dev-help@jakarta.apache.org>


Mime
View raw message