Return-Path: Delivered-To: apmail-lucene-hadoop-commits-archive@locus.apache.org Received: (qmail 21647 invoked from network); 31 Jul 2006 20:17:13 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (209.237.227.199) by minotaur.apache.org with SMTP; 31 Jul 2006 20:17:13 -0000 Received: (qmail 9689 invoked by uid 500); 31 Jul 2006 20:17:13 -0000 Delivered-To: apmail-lucene-hadoop-commits-archive@lucene.apache.org Received: (qmail 9666 invoked by uid 500); 31 Jul 2006 20:17:13 -0000 Mailing-List: contact hadoop-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hadoop-dev@lucene.apache.org Delivered-To: mailing list hadoop-commits@lucene.apache.org Received: (qmail 9657 invoked by uid 99); 31 Jul 2006 20:17:13 -0000 Received: from asf.osuosl.org (HELO asf.osuosl.org) (140.211.166.49) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 31 Jul 2006 13:17:13 -0700 X-ASF-Spam-Status: No, hits=-9.4 required=10.0 tests=ALL_TRUSTED,NO_REAL_NAME X-Spam-Check-By: apache.org Received-SPF: pass (asf.osuosl.org: local policy) Received: from [140.211.166.113] (HELO eris.apache.org) (140.211.166.113) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 31 Jul 2006 13:17:12 -0700 Received: by eris.apache.org (Postfix, from userid 65534) id 556FC1A981D; Mon, 31 Jul 2006 13:16:51 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r427241 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/io/SequenceFile.java Date: Mon, 31 Jul 2006 20:16:50 -0000 To: hadoop-commits@lucene.apache.org From: cutting@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20060731201651.556FC1A981D@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org X-Spam-Rating: minotaur.apache.org 1.6.2 0/1000/N Author: cutting Date: Mon Jul 31 13:16:50 2006 New Revision: 427241 URL: http://svn.apache.org/viewvc?rev=427241&view=rev Log: HADOOP-287. Use quicksort instead of mergesort for in-memory sorting algorithm in SequenceFile. Contributed by Ben Reed. Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/io/SequenceFile.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=427241&r1=427240&r2=427241&view=diff ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Mon Jul 31 13:16:50 2006 @@ -108,6 +108,9 @@ 30. HADOOP-362. Fix a problem where jobs hang when status messages are recieved out-of-order. (omalley via cutting) +31. HADOOP-287. Use quicksort in instead of mergesort for in-memory + sorting algorithm in SequenceFile. (Benjamin Reed via cutting) + Release 0.4.0 - 2006-06-28 Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/io/SequenceFile.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/io/SequenceFile.java?rev=427241&r1=427240&r2=427241&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/io/SequenceFile.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/io/SequenceFile.java Mon Jul 31 13:16:50 2006 @@ -570,7 +570,6 @@ private int[] starts = new int[1024]; private int[] pointers = new int[starts.length]; - private int[] pointersCopy = new int[starts.length]; private int[] keyLengths = new int[starts.length]; private int[] lengths = new int[starts.length]; @@ -644,7 +643,6 @@ int newLength = starts.length * 3 / 2; starts = grow(starts, newLength); pointers = grow(pointers, newLength); - pointersCopy = new int[newLength]; keyLengths = grow(keyLengths, newLength); lengths = grow(lengths, newLength); } @@ -682,48 +680,58 @@ } } + /** + * Ensure that pointers[p] <= pointers[r]. This function + * is used to make the choice of pivot closer to optimal. + * See "Median-of-Three Partitioning" in Sedgewick book. + */ + final private void fix(int p, int r) { + if (compare(pointers[p], pointers[r]) > 0) swap(pointers, p, r); + } + + private void quicksort(int p, int r) { + if (p-r < 13) { + for (int i=p; i<=r; i++) + for (int j=i; j>p && compare(pointers[j-1], pointers[j])>0; j--) + swap(pointers, j, j-1); + return; + } + // sort the first middle and last elements + fix(p, r); + fix(p, (p+r)/2); + fix((p+r)/2, r); + // Divide + int x = pointers[(p+r)/2]; + int i = p-1; + int j = r+1; + while(true) { + j--; + i++; + while(compare(pointers[j], x) > 0) j--; + while(compare(pointers[i], x) < 0) i++; + if (ilow && compare(dest[j-1], dest[j])>0; j--) - swap(dest, j, j-1); - return; - } - - // Recursively sort halves of dest into src - int mid = (low + high) >> 1; - mergeSort(dest, src, low, mid); - mergeSort(dest, src, mid, high); - - // If list is already sorted, just copy from src to dest. This is an - // optimization that results in faster sorts for nearly ordered lists. - if (compare(src[mid-1], src[mid]) <= 0) { - System.arraycopy(src, low, dest, low, length); - return; - } - - // Merge sorted halves (now in src) into dest - for(int i = low, p = low, q = mid; i < high; i++) { - if (q>=high || p