nutch-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From doga...@apache.org
Subject svn commit: r563777 - in /lucene/nutch/trunk: CHANGES.txt src/java/org/apache/nutch/crawl/CrawlDatum.java src/java/org/apache/nutch/metadata/Metadata.java src/java/org/apache/nutch/parse/ParseData.java src/java/org/apache/nutch/protocol/Content.java
Date Wed, 08 Aug 2007 07:33:27 GMT
Author: dogacan
Date: Wed Aug  8 00:33:23 2007
New Revision: 563777

URL: http://svn.apache.org/viewvc?view=rev&rev=563777
Log:
NUTCH-535 - ParseData's contentMeta accumulates unnecessary values during parse.

Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java
    lucene/nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=563777&r1=563776&r2=563777
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Wed Aug  8 00:33:23 2007
@@ -110,6 +110,9 @@
 36. NUTCH-533 - LinkDbMerger: url normalized is not updated in the key and 
     inlinks list. (Emmanuel Joke via dogacan)
 
+37. NUTCH-535 -ParseData's contentMeta accumulates unnecessary values during 
+    parse. (dogacan)
+
 Release 0.9 - 2007-04-02
 
  1. Changed log4j confiquration to log to stdout on commandline

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java?view=diff&rev=563777&r1=563776&r2=563777
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java Wed Aug  8 00:33:23
2007
@@ -130,7 +130,9 @@
     return false;
   }
 
-  public CrawlDatum() {}
+  public CrawlDatum() {
+    metaData = new MapWritable();
+  }
 
   public CrawlDatum(int status, float fetchInterval) {
     this.status = (byte)status;
@@ -230,17 +232,9 @@
       } else signature = null;
     }
     if (version > 3) {
+      metaData.clear();
       if (in.readBoolean()) {
-        if (metaData == null) {
-          metaData = new MapWritable(); 
-        } else {
-           metaData.clear();
-        }
         metaData.readFields(in);
-      } else {
-        if (metaData != null) {
-          metaData.clear(); // at least clear old meta data
-        }
       }
     }
     // translate status codes

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java?view=diff&rev=563777&r1=563776&r2=563777
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java Wed Aug  8 00:33:23
2007
@@ -162,6 +162,11 @@
   public int size() {
     return metadata.size();
   }
+  
+  /** Remove all mappings from metadata. */
+  public void clear() {
+    metadata.clear();
+  }
 
   public boolean equals(Object o) {
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java?view=diff&rev=563777&r1=563776&r2=563777
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java Wed Aug  8 00:33:23
2007
@@ -43,7 +43,10 @@
   private ParseStatus status;
   private byte version = VERSION;
   
-  public ParseData() {}
+  public ParseData() {
+    contentMeta = new Metadata();
+    parseMeta = new Metadata();
+  }
 
   public ParseData(ParseStatus status, String title, Outlink[] outlinks,
                    Metadata contentMeta) {
@@ -125,16 +128,16 @@
     
     if (version < 3) {
       int propertyCount = in.readInt();             // read metadata
-      contentMeta = new Metadata();
+      contentMeta.clear();
       for (int i = 0; i < propertyCount; i++) {
         contentMeta.add(Text.readString(in), Text.readString(in));
       }
     } else {
-      contentMeta = new Metadata();
+      contentMeta.clear();
       contentMeta.readFields(in);
     }
     if (version > 3) {
-      parseMeta = new Metadata();
+      parseMeta.clear();
       parseMeta.readFields(in);
     }
   }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java?view=diff&rev=563777&r1=563776&r2=563777
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java Wed Aug  8 00:33:23
2007
@@ -126,6 +126,7 @@
   }
   
   public final void readFields(DataInput in) throws IOException {
+    metadata.clear();
     int sizeOrVersion = in.readInt();
     if (sizeOrVersion < 0) { // version
       version = sizeOrVersion;



Mime
View raw message