nutch-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a.@apache.org
Subject svn commit: r397169 - in /lucene/nutch/trunk/src/java/org/apache/nutch/crawl: CrawlDatum.java CrawlDbReducer.java
Date Wed, 26 Apr 2006 10:54:58 GMT
Author: ab
Date: Wed Apr 26 03:54:53 2006
New Revision: 397169

URL: http://svn.apache.org/viewcvs?rev=397169&view=rev
Log:
Don't allow CrawlDatum.getMetaData() to return null. Underlying
MapWritable is lazily instantiated to minimize the number of
created objects.

Refactor CrawlDbReducer to use this assumption.

Add missing statements in CrawlDatum.equals() and CrawlDatum.hashCode()
that deal with metaData.

Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java?rev=397169&r1=397168&r2=397169&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java Wed Apr 26 03:54:53
2006
@@ -121,11 +121,11 @@
    public void setMetaData(MapWritable mapWritable) {this.metaData = mapWritable; }
 
   /**
-   * returns a MapWritable if it was set or read @see readFields(DataInput), 
-   * returns null in case CrawlDatum was freshly generated or an empty map 
-   * in case CrawlDatum is a recycled instance.
+   * returns a MapWritable if it was set or read in @see readFields(DataInput), 
+   * returns empty map in case CrawlDatum was freshly created (lazily instantiated).
    */
   public MapWritable getMetaData() {
+    if (this.metaData == null) this.metaData = new MapWritable();
     return this.metaData;
   }
   
@@ -291,6 +291,7 @@
     buf.append("Retry interval: " + getFetchInterval() + " days\n");
     buf.append("Score: " + getScore() + "\n");
     buf.append("Signature: " + StringUtil.toHexString(getSignature()) + "\n");
+    buf.append("Metadata: " + (metaData != null ? metaData.toString() : "null") + "\n");
     return buf.toString();
   }
 
@@ -298,7 +299,7 @@
     if (!(o instanceof CrawlDatum))
       return false;
     CrawlDatum other = (CrawlDatum)o;
-    return
+    boolean res =
       (this.status == other.status) &&
       (this.fetchTime == other.fetchTime) &&
       (this.modifiedTime == other.modifiedTime) &&
@@ -306,6 +307,19 @@
       (this.fetchInterval == other.fetchInterval) &&
       (SignatureComparator._compare(this.signature, other.signature) == 0) &&
       (this.score == other.score);
+    if (!res) return res;
+    // allow zero-sized metadata to be equal to null metadata
+    if (this.metaData == null) {
+      if (other.metaData != null && other.metaData.size() > 0) return false;
+      else return true;
+    } else {
+      if (other.metaData == null) {
+        if (this.metaData.size() == 0) return true;
+        else return false;
+      } else {
+        return this.metaData.equals(other.metaData);
+      }
+    }
   }
 
   public int hashCode() {
@@ -316,6 +330,7 @@
                 signature[i+2] << 8 + signature[i+3]);
       }
     }
+    if (metaData != null) res ^= metaData.hashCode();
     return
       res ^ status ^
       ((int)fetchTime) ^

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=397169&r1=397168&r2=397169&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Wed Apr 26 03:54:53
2006
@@ -67,11 +67,10 @@
     result.set(highest);
     if (old != null) {
       // copy metadata from old, if exists
-      if (old.getMetaData() != null) {
-        if (result.getMetaData() == null) result.setMetaData(new MapWritable());
+      if (old.getMetaData().size() > 0) {
         result.getMetaData().putAll(old.getMetaData());
         // overlay with new, if any
-        if (highest.getMetaData() != null)
+        if (highest.getMetaData().size() > 0)
           result.getMetaData().putAll(highest.getMetaData());
       }
       // set the most recent valid value of modifiedTime



Mime
View raw message