Return-Path: Delivered-To: apmail-lucene-nutch-commits-archive@www.apache.org Received: (qmail 27999 invoked from network); 26 Apr 2006 10:55:21 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (209.237.227.199) by minotaur.apache.org with SMTP; 26 Apr 2006 10:55:21 -0000 Received: (qmail 27720 invoked by uid 500); 26 Apr 2006 10:55:21 -0000 Delivered-To: apmail-lucene-nutch-commits-archive@lucene.apache.org Received: (qmail 27696 invoked by uid 500); 26 Apr 2006 10:55:21 -0000 Mailing-List: contact nutch-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: nutch-dev@lucene.apache.org Delivered-To: mailing list nutch-commits@lucene.apache.org Received: (qmail 27685 invoked by uid 99); 26 Apr 2006 10:55:20 -0000 Received: from asf.osuosl.org (HELO asf.osuosl.org) (140.211.166.49) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 26 Apr 2006 03:55:20 -0700 X-ASF-Spam-Status: No, hits=-9.4 required=10.0 tests=ALL_TRUSTED,NO_REAL_NAME X-Spam-Check-By: apache.org Received: from [209.237.227.194] (HELO minotaur.apache.org) (209.237.227.194) by apache.org (qpsmtpd/0.29) with SMTP; Wed, 26 Apr 2006 03:55:20 -0700 Received: (qmail 27765 invoked by uid 65534); 26 Apr 2006 10:54:59 -0000 Message-ID: <20060426105459.27763.qmail@minotaur.apache.org> Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r397169 - in /lucene/nutch/trunk/src/java/org/apache/nutch/crawl: CrawlDatum.java CrawlDbReducer.java Date: Wed, 26 Apr 2006 10:54:58 -0000 To: nutch-commits@lucene.apache.org From: ab@apache.org X-Mailer: svnmailer-1.0.8 X-Virus-Checked: Checked by ClamAV on apache.org X-Spam-Rating: minotaur.apache.org 1.6.2 0/1000/N Author: ab Date: Wed Apr 26 03:54:53 2006 New Revision: 397169 URL: http://svn.apache.org/viewcvs?rev=397169&view=rev Log: Don't allow CrawlDatum.getMetaData() to return null. Underlying MapWritable is lazily instantiated to minimize the number of created objects. Refactor CrawlDbReducer to use this assumption. Add missing statements in CrawlDatum.equals() and CrawlDatum.hashCode() that deal with metaData. Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java?rev=397169&r1=397168&r2=397169&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java Wed Apr 26 03:54:53 2006 @@ -121,11 +121,11 @@ public void setMetaData(MapWritable mapWritable) {this.metaData = mapWritable; } /** - * returns a MapWritable if it was set or read @see readFields(DataInput), - * returns null in case CrawlDatum was freshly generated or an empty map - * in case CrawlDatum is a recycled instance. + * returns a MapWritable if it was set or read in @see readFields(DataInput), + * returns empty map in case CrawlDatum was freshly created (lazily instantiated). */ public MapWritable getMetaData() { + if (this.metaData == null) this.metaData = new MapWritable(); return this.metaData; } @@ -291,6 +291,7 @@ buf.append("Retry interval: " + getFetchInterval() + " days\n"); buf.append("Score: " + getScore() + "\n"); buf.append("Signature: " + StringUtil.toHexString(getSignature()) + "\n"); + buf.append("Metadata: " + (metaData != null ? metaData.toString() : "null") + "\n"); return buf.toString(); } @@ -298,7 +299,7 @@ if (!(o instanceof CrawlDatum)) return false; CrawlDatum other = (CrawlDatum)o; - return + boolean res = (this.status == other.status) && (this.fetchTime == other.fetchTime) && (this.modifiedTime == other.modifiedTime) && @@ -306,6 +307,19 @@ (this.fetchInterval == other.fetchInterval) && (SignatureComparator._compare(this.signature, other.signature) == 0) && (this.score == other.score); + if (!res) return res; + // allow zero-sized metadata to be equal to null metadata + if (this.metaData == null) { + if (other.metaData != null && other.metaData.size() > 0) return false; + else return true; + } else { + if (other.metaData == null) { + if (this.metaData.size() == 0) return true; + else return false; + } else { + return this.metaData.equals(other.metaData); + } + } } public int hashCode() { @@ -316,6 +330,7 @@ signature[i+2] << 8 + signature[i+3]); } } + if (metaData != null) res ^= metaData.hashCode(); return res ^ status ^ ((int)fetchTime) ^ Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=397169&r1=397168&r2=397169&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Wed Apr 26 03:54:53 2006 @@ -67,11 +67,10 @@ result.set(highest); if (old != null) { // copy metadata from old, if exists - if (old.getMetaData() != null) { - if (result.getMetaData() == null) result.setMetaData(new MapWritable()); + if (old.getMetaData().size() > 0) { result.getMetaData().putAll(old.getMetaData()); // overlay with new, if any - if (highest.getMetaData() != null) + if (highest.getMetaData().size() > 0) result.getMetaData().putAll(highest.getMetaData()); } // set the most recent valid value of modifiedTime