Return-Path: Delivered-To: apmail-hadoop-core-commits-archive@www.apache.org Received: (qmail 85879 invoked from network); 13 May 2008 19:40:28 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 13 May 2008 19:40:28 -0000 Received: (qmail 89011 invoked by uid 500); 13 May 2008 19:40:29 -0000 Delivered-To: apmail-hadoop-core-commits-archive@hadoop.apache.org Received: (qmail 88987 invoked by uid 500); 13 May 2008 19:40:29 -0000 Mailing-List: contact core-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: core-dev@hadoop.apache.org Delivered-To: mailing list core-commits@hadoop.apache.org Received: (qmail 88976 invoked by uid 99); 13 May 2008 19:40:29 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 13 May 2008 12:40:29 -0700 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 13 May 2008 19:39:51 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id DEC7A2388A23; Tue, 13 May 2008 12:40:06 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r655984 - in /hadoop/core/trunk: CHANGES.txt src/java/org/apache/hadoop/io/SequenceFile.java Date: Tue, 13 May 2008 19:40:06 -0000 To: core-commits@hadoop.apache.org From: ddas@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20080513194006.DEC7A2388A23@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: ddas Date: Tue May 13 12:40:06 2008 New Revision: 655984 URL: http://svn.apache.org/viewvc?rev=655984&view=rev Log: HADOOP-3365. Removes an unnecessary copy of the key from SegmentDescriptor to MergeQueue. Contributed by Devaraj Das. Modified: hadoop/core/trunk/CHANGES.txt hadoop/core/trunk/src/java/org/apache/hadoop/io/SequenceFile.java Modified: hadoop/core/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=655984&r1=655983&r2=655984&view=diff ============================================================================== --- hadoop/core/trunk/CHANGES.txt (original) +++ hadoop/core/trunk/CHANGES.txt Tue May 13 12:40:06 2008 @@ -225,6 +225,9 @@ HADOOP-3349. A file rename was incorrectly changing the name inside a lease record. (Tsz Wo (Nicholas), SZE via dhruba) + HADOOP-3365. Removes an unnecessary copy of the key from SegmentDescriptor + to MergeQueue. (Devaraj Das) + Release 0.17.0 - Unreleased INCOMPATIBLE CHANGES Modified: hadoop/core/trunk/src/java/org/apache/hadoop/io/SequenceFile.java URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/io/SequenceFile.java?rev=655984&r1=655983&r2=655984&view=diff ============================================================================== --- hadoop/core/trunk/src/java/org/apache/hadoop/io/SequenceFile.java (original) +++ hadoop/core/trunk/src/java/org/apache/hadoop/io/SequenceFile.java Tue May 13 12:40:06 2008 @@ -2690,6 +2690,7 @@ private Progress mergeProgress = new Progress(); private Path tmpDir; private Progressable progress = null; //handle to the progress reporting object + private SegmentDescriptor minSegment; //a TreeMap used to store the segments sorted by size (segment offset and //segment path name is used to break ties between segments of same sizes) @@ -2738,6 +2739,7 @@ while ((ms = (SegmentDescriptor)pop()) != null) { ms.cleanup(); } + minSegment = null; } public DataOutputBuffer getKey() throws IOException { return rawKey; @@ -2748,21 +2750,25 @@ public boolean next() throws IOException { if (size() == 0) return false; - SegmentDescriptor ms = (SegmentDescriptor)top(); - //save the raw key - rawKey.reset(); - rawKey.write(ms.getKey().getData(), 0, ms.getKey().getLength()); + int valLength; + if (minSegment != null) { + //minSegment is non-null for all invocations of next except the first + //one. For the first invocation, the priority queue is ready for use + //but for the subsequent invocations, first adjust the queue + adjustPriorityQueue(minSegment); + if (size() == 0) { + minSegment = null; + return false; + } + } + minSegment = (SegmentDescriptor)top(); + //save the raw key reference + rawKey = minSegment.getKey(); //load the raw value. Re-use the existing rawValue buffer - if (rawValue == null) - rawValue = ms.in.createValueBytes(); - int valLength = ms.nextRawValue(rawValue); - - if (ms.nextRawKey()) { - adjustTop(); - } else { - pop(); - ms.cleanup(); + if (rawValue == null) { + rawValue = minSegment.in.createValueBytes(); } + valLength = minSegment.nextRawValue(rawValue); if (progPerByte > 0) { totalBytesProcessed += rawKey.getLength() + valLength; mergeProgress.set(totalBytesProcessed * progPerByte); @@ -2774,6 +2780,14 @@ return mergeProgress; } + private void adjustPriorityQueue(SegmentDescriptor ms) throws IOException{ + if (ms.nextRawKey()) { + adjustTop(); + } else { + pop(); + ms.cleanup(); + } + } /** This is the single level merge that is called multiple times * depending on the factor size and the number of segments * @return RawKeyValueIterator