ant-notifications mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bode...@apache.org
Subject svn commit: r1340895 - in /ant/core/trunk: WHATSNEW src/main/org/apache/tools/bzip2/BlockSort.java src/main/org/apache/tools/bzip2/CBZip2OutputStream.java src/tests/junit/org/apache/tools/bzip2/BlockSortTest.java
Date Mon, 21 May 2012 04:33:42 GMT
Author: bodewig
Date: Mon May 21 04:33:42 2012
New Revision: 1340895

URL: http://svn.apache.org/viewvc?rev=1340895&view=rev
Log:
merge bzip2 edge case improvement from Commons Compress

Added:
    ant/core/trunk/src/main/org/apache/tools/bzip2/BlockSort.java
      - copied, changed from r1340888, commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java
    ant/core/trunk/src/tests/junit/org/apache/tools/bzip2/BlockSortTest.java
      - copied, changed from r1340888, commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java
Modified:
    ant/core/trunk/WHATSNEW
    ant/core/trunk/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java   (contents, props
changed)

Modified: ant/core/trunk/WHATSNEW
URL: http://svn.apache.org/viewvc/ant/core/trunk/WHATSNEW?rev=1340895&r1=1340894&r2=1340895&view=diff
==============================================================================
--- ant/core/trunk/WHATSNEW (original)
+++ ant/core/trunk/WHATSNEW Mon May 21 04:33:42 2012
@@ -14,6 +14,10 @@ Fixed bugs:
  * Fixed some potential stream leaks.
    Bugzilla Reports 52738, 52740, 52742, 52743.
 
+ * Ported libbzip2's fallback sort algorithm to CBZip2OutputStream to
+   speed up compression in certain edge cases.  Merge from Commons
+   Compress.
+
 Other changes:
 --------------
 

Copied: ant/core/trunk/src/main/org/apache/tools/bzip2/BlockSort.java (from r1340888, commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java)
URL: http://svn.apache.org/viewvc/ant/core/trunk/src/main/org/apache/tools/bzip2/BlockSort.java?p2=ant/core/trunk/src/main/org/apache/tools/bzip2/BlockSort.java&p1=commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java&r1=1340888&r2=1340895&rev=1340895&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java
(original)
+++ ant/core/trunk/src/main/org/apache/tools/bzip2/BlockSort.java Mon May 21 04:33:42 2012
@@ -16,13 +16,13 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.commons.compress.compressors.bzip2;
+package org.apache.tools.bzip2;
 
 import java.util.BitSet;
 
 /**
  * Encapsulates the Burrows-Wheeler sorting algorithm needed by {@link
- * BZip2CompressorOutputStream}.
+ * CBZip2OutputStream}.
  *
  * <p>This class is based on a Java port of Julian Seward's
  * blocksort.c in his libbzip2</p>
@@ -146,11 +146,11 @@ class BlockSort {
      */
     private final char[] quadrant;
 
-    BlockSort(final BZip2CompressorOutputStream.Data data) {
+    BlockSort(final CBZip2OutputStream.Data data) {
         this.quadrant = data.sfmap;
     }
 
-    void blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
+    void blockSort(final CBZip2OutputStream.Data data, final int last) {
         this.workLimit = WORK_FACTOR * last;
         this.workDone = 0;
         this.firstAttempt = true;
@@ -182,7 +182,7 @@ class BlockSort {
      * code, in particular deal with the fact that block starts at
      * offset 1 (in libbzip2 1.0.6 it starts at 0).
      */
-    final void fallbackSort(final BZip2CompressorOutputStream.Data data,
+    final void fallbackSort(final CBZip2OutputStream.Data data,
                             final int last) {
         data.block[0] = data.block[last + 1];
         fallbackSort(data.fmap, data.block, last + 1);
@@ -591,7 +591,7 @@ class BlockSort {
      * JIT compiler of the vm.
      * </p>
      */
-    private boolean mainSimpleSort(final BZip2CompressorOutputStream.Data dataShadow,
+    private boolean mainSimpleSort(final CBZip2OutputStream.Data dataShadow,
                                    final int lo, final int hi, final int d,
                                    final int lastShadow) {
         final int bigN = hi - lo + 1;
@@ -804,7 +804,7 @@ class BlockSort {
     /**
      * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2
      */
-    private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow,
+    private void mainQSort3(final CBZip2OutputStream.Data dataShadow,
                             final int loSt, final int hiSt, final int dSt,
                             final int last) {
         final int[] stack_ll = this.stack_ll;
@@ -912,7 +912,7 @@ class BlockSort {
     private static final int SETMASK = (1 << 21);
     private static final int CLEARMASK = (~SETMASK);
 
-    final void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
+    final void mainSort(final CBZip2OutputStream.Data dataShadow,
                         final int lastShadow) {
         final int[] runningOrder = this.mainSort_runningOrder;
         final int[] copy = this.mainSort_copy;

Modified: ant/core/trunk/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java
URL: http://svn.apache.org/viewvc/ant/core/trunk/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java?rev=1340895&r1=1340894&r2=1340895&view=diff
==============================================================================
--- ant/core/trunk/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java (original)
+++ ant/core/trunk/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java Mon May 21 04:33:42
2012
@@ -24,8 +24,8 @@
 
 package org.apache.tools.bzip2;
 
-import java.io.OutputStream;
 import java.io.IOException;
+import java.io.OutputStream;
 
 /**
  * An output stream that compresses into the BZip2 format (without the file
@@ -529,18 +529,11 @@ public class CBZip2OutputStream extends 
     private int last;
 
     /**
-     * Index in fmap[] of original string after sorting.
-     */
-    private int origPtr;
-
-    /**
      * Always: in the range 0 .. 9. The current block size is 100000 * this
      * number.
      */
     private final int blockSize100k;
 
-    private boolean blockRandomised;
-
     private int bsBuff;
     private int bsLive;
     private final CRC crc = new CRC();
@@ -549,14 +542,6 @@ public class CBZip2OutputStream extends 
 
     private int nMTF;
 
-    /*
-     * Used when sorting. If too many long comparisons happen, we stop sorting,
-     * randomise the block slightly, and try again.
-     */
-    private int workDone;
-    private int workLimit;
-    private boolean firstAttempt;
-
     private int currentChar = -1;
     private int runLength = 0;
 
@@ -567,7 +552,8 @@ public class CBZip2OutputStream extends 
     /**
      * All memory intensive stuff.
      */
-    private CBZip2OutputStream.Data data;
+    private Data data;
+    private BlockSort blockSorter;
 
     private OutputStream out;
 
@@ -592,7 +578,7 @@ public class CBZip2OutputStream extends 
      * Constructs a new <tt>CBZip2OutputStream</tt> with a blocksize of 900k.
      *
      * <p>
-     * <b>Attention: </b>The caller is resonsible to write the two BZip2 magic
+     * <b>Attention: </b>The caller is responsible to write the two BZip2 magic
      * bytes <tt>"BZ"</tt> to the specified stream prior to calling this
      * constructor.
      * </p>
@@ -613,7 +599,7 @@ public class CBZip2OutputStream extends 
      * Constructs a new <tt>CBZip2OutputStream</tt> with specified blocksize.
      *
      * <p>
-     * <b>Attention: </b>The caller is resonsible to write the two BZip2 magic
+     * <b>Attention: </b>The caller is responsible to write the two BZip2 magic
      * bytes <tt>"BZ"</tt> to the specified stream prior to calling this
      * constructor.
      * </p>
@@ -652,6 +638,8 @@ public class CBZip2OutputStream extends 
         init();
     }
 
+    /** {@inheritDoc} */
+    @Override
     public void write(final int b) throws IOException {
         if (this.out != null) {
             write0(b);
@@ -660,6 +648,19 @@ public class CBZip2OutputStream extends 
         }
     }
 
+    /**
+     * Writes the current byte to the buffer, run-length encoding it
+     * if it has been repeated at least four times (the first step
+     * RLEs sequences of four identical bytes).
+     *
+     * <p>Flushes the current block before writing data if it is
+     * full.</p>
+     *
+     * <p>"write to the buffer" means adding to data.buffer starting
+     * two steps "after" this.last - initially starting at index 1
+     * (not 0) - and updating this.last to point to the last index
+     * written minus 1.</p>
+     */
     private void writeRun() throws IOException {
         final int lastShadow = this.last;
 
@@ -717,6 +718,7 @@ public class CBZip2OutputStream extends 
     /**
      * Overriden to close the stream.
      */
+    @Override
     protected void finalize() throws Throwable {
         finish();
         super.finalize();
@@ -735,10 +737,12 @@ public class CBZip2OutputStream extends 
             } finally {
                 this.out = null;
                 this.data = null;
+                this.blockSorter = null;
             }
         }
     }
 
+    @Override
     public void close() throws IOException {
         if (out != null) {
             OutputStream outShadow = this.out;
@@ -747,6 +751,7 @@ public class CBZip2OutputStream extends 
         }
     }
 
+    @Override
     public void flush() throws IOException {
         OutputStream outShadow = this.out;
         if (outShadow != null) {
@@ -760,6 +765,7 @@ public class CBZip2OutputStream extends 
         // this.out.write('Z');
 
         this.data = new Data(this.blockSize100k);
+        this.blockSorter = new BlockSort(this.data);
 
         /*
          * Write `magic' bytes h indicating file-format == huffmanised, followed
@@ -821,12 +827,8 @@ public class CBZip2OutputStream extends 
         /* Now the block's CRC, so it is in a known place. */
         bsPutInt(this.blockCRC);
 
-        /* Now a single bit indicating randomisation. */
-        if (this.blockRandomised) {
-            bsW(1, 1);
-        } else {
-            bsW(1, 0);
-        }
+        /* Now a single bit indicating no randomisation. */
+        bsW(1, 0);
 
         /* Finally, block's contents proper. */
         moveToFrontCodeAndSend();
@@ -857,6 +859,7 @@ public class CBZip2OutputStream extends 
         return this.blockSize100k;
     }
 
+    @Override
     public void write(final byte[] buf, int offs, final int len)
         throws IOException {
         if (offs < 0) {
@@ -879,6 +882,10 @@ public class CBZip2OutputStream extends 
         }
     }
 
+    /**
+     * Keeps track of the last bytes written and implicitly performs
+     * run-length encoding as the first step of the bzip2 algorithm.
+     */
     private void write0(int b) throws IOException {
         if (this.currentChar != -1) {
             b &= 0xff;
@@ -990,7 +997,7 @@ public class CBZip2OutputStream extends 
         sendMTFValues6(nGroups, alphaSize);
 
         /* And finally, the block data proper */
-        sendMTFValues7(nSelectors);
+        sendMTFValues7();
     }
 
     private void sendMTFValues0(final int nGroups, final int alphaSize) {
@@ -1343,7 +1350,7 @@ public class CBZip2OutputStream extends 
         this.bsLive = bsLiveShadow;
     }
 
-    private void sendMTFValues7(final int nSelectors) throws IOException {
+    private void sendMTFValues7() throws IOException {
         final Data dataShadow = this.data;
         final byte[][] len = dataShadow.sendMTFValues_len;
         final int[][] code = dataShadow.sendMTFValues_code;
@@ -1391,545 +1398,22 @@ public class CBZip2OutputStream extends 
     }
 
     private void moveToFrontCodeAndSend() throws IOException {
-        bsW(24, this.origPtr);
+        bsW(24, this.data.origPtr);
         generateMTFValues();
         sendMTFValues();
     }
 
-    /**
-     * This is the most hammered method of this class.
-     *
-     * <p>
-     * This is the version using unrolled loops. Normally I never use such ones
-     * in Java code. The unrolling has shown a noticable performance improvement
-     * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the
-     * JIT compiler of the vm.
-     * </p>
-     */
-    private boolean mainSimpleSort(final Data dataShadow, final int lo,
-                                   final int hi, final int d) {
-        final int bigN = hi - lo + 1;
-        if (bigN < 2) {
-            return this.firstAttempt && (this.workDone > this.workLimit);
-        }
-
-        int hp = 0;
-        while (INCS[hp] < bigN) {
-            hp++;
-        }
-
-        final int[] fmap = dataShadow.fmap;
-        final char[] quadrant = dataShadow.quadrant;
-        final byte[] block = dataShadow.block;
-        final int lastShadow = this.last;
-        final int lastPlus1 = lastShadow + 1;
-        final boolean firstAttemptShadow = this.firstAttempt;
-        final int workLimitShadow = this.workLimit;
-        int workDoneShadow = this.workDone;
-
-        // Following block contains unrolled code which could be shortened by
-        // coding it in additional loops.
-
-        HP: while (--hp >= 0) {
-            final int h = INCS[hp];
-            final int mj = lo + h - 1;
-
-            for (int i = lo + h; i <= hi;) {
-                // copy
-                for (int k = 3; (i <= hi) && (--k >= 0); i++) {
-                    final int v = fmap[i];
-                    final int vd = v + d;
-                    int j = i;
-
-                    // for (int a;
-                    // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd,
-                    // block, quadrant, lastShadow);
-                    // j -= h) {
-                    // fmap[j] = a;
-                    // }
-                    //
-                    // unrolled version:
-
-                    // start inline mainGTU
-                    boolean onceRunned = false;
-                    int a = 0;
-
-                    HAMMER: while (true) {
-                        if (onceRunned) {
-                            fmap[j] = a;
-                            if ((j -= h) <= mj) {
-                                break HAMMER;
-                            }
-                        } else {
-                            onceRunned = true;
-                        }
-
-                        a = fmap[j - h];
-                        int i1 = a + d;
-                        int i2 = vd;
-
-                        // following could be done in a loop, but
-                        // unrolled it for performance:
-                        if (block[i1 + 1] == block[i2 + 1]) {
-                            if (block[i1 + 2] == block[i2 + 2]) {
-                                if (block[i1 + 3] == block[i2 + 3]) {
-                                    if (block[i1 + 4] == block[i2 + 4]) {
-                                        if (block[i1 + 5] == block[i2 + 5]) {
-                                            if (block[(i1 += 6)] == block[(i2 += 6)]) {
-                                                int x = lastShadow;
-                                                X: while (x > 0) {
-                                                    x -= 4;
-
-                                                    if (block[i1 + 1] == block[i2 + 1]) {
-                                                        if (quadrant[i1] == quadrant[i2])
{
-                                                            if (block[i1 + 2] == block[i2
+ 2]) {
-                                                                if (quadrant[i1 + 1] == quadrant[i2
+ 1]) {
-                                                                    if (block[i1 + 3] ==
block[i2 + 3]) {
-                                                                        if (quadrant[i1 +
2] == quadrant[i2 + 2]) {
-                                                                            if (block[i1
+ 4] == block[i2 + 4]) {
-                                                                                if (quadrant[i1
+ 3] == quadrant[i2 + 3]) {
-                                                                                    if ((i1
+= 4) >= lastPlus1) {
-                                                                                        i1
-= lastPlus1;
-                                                                                    }
-                                                                                    if ((i2
+= 4) >= lastPlus1) {
-                                                                                        i2
-= lastPlus1;
-                                                                                    }
-                                                                                    workDoneShadow++;
-                                                                                    continue
X;
-                                                                                } else if
((quadrant[i1 + 3] > quadrant[i2 + 3])) {
-                                                                                    continue
HAMMER;
-                                                                                } else {
-                                                                                    break
HAMMER;
-                                                                                }
-                                                                            } else if ((block[i1
+ 4] & 0xff) > (block[i2 + 4] & 0xff)) {
-                                                                                continue
HAMMER;
-                                                                            } else {
-                                                                                break HAMMER;
-                                                                            }
-                                                                        } else if ((quadrant[i1
+ 2] > quadrant[i2 + 2])) {
-                                                                            continue HAMMER;
-                                                                        } else {
-                                                                            break HAMMER;
-                                                                        }
-                                                                    } else if ((block[i1
+ 3] & 0xff) > (block[i2 + 3] & 0xff)) {
-                                                                        continue HAMMER;
-                                                                    } else {
-                                                                        break HAMMER;
-                                                                    }
-                                                                } else if ((quadrant[i1 +
1] > quadrant[i2 + 1])) {
-                                                                    continue HAMMER;
-                                                                } else {
-                                                                    break HAMMER;
-                                                                }
-                                                            } else if ((block[i1 + 2] &
0xff) > (block[i2 + 2] & 0xff)) {
-                                                                continue HAMMER;
-                                                            } else {
-                                                                break HAMMER;
-                                                            }
-                                                        } else if ((quadrant[i1] > quadrant[i2]))
{
-                                                            continue HAMMER;
-                                                        } else {
-                                                            break HAMMER;
-                                                        }
-                                                    } else if ((block[i1 + 1] & 0xff)
> (block[i2 + 1] & 0xff)) {
-                                                        continue HAMMER;
-                                                    } else {
-                                                        break HAMMER;
-                                                    }
-
-                                                }
-                                                break HAMMER;
-                                            } // while x > 0
-                                            else {
-                                                if ((block[i1] & 0xff) > (block[i2]
& 0xff)) {
-                                                    continue HAMMER;
-                                                } else {
-                                                    break HAMMER;
-                                                }
-                                            }
-                                        } else if ((block[i1 + 5] & 0xff) > (block[i2
+ 5] & 0xff)) {
-                                            continue HAMMER;
-                                        } else {
-                                            break HAMMER;
-                                        }
-                                    } else if ((block[i1 + 4] & 0xff) > (block[i2
+ 4] & 0xff)) {
-                                        continue HAMMER;
-                                    } else {
-                                        break HAMMER;
-                                    }
-                                } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3]
& 0xff)) {
-                                    continue HAMMER;
-                                } else {
-                                    break HAMMER;
-                                }
-                            } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] &
0xff)) {
-                                continue HAMMER;
-                            } else {
-                                break HAMMER;
-                            }
-                        } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff))
{
-                            continue HAMMER;
-                        } else {
-                            break HAMMER;
-                        }
-
-                    } // HAMMER
-                    // end inline mainGTU
-
-                    fmap[j] = v;
-                }
-
-                if (firstAttemptShadow && (i <= hi)
-                    && (workDoneShadow > workLimitShadow)) {
-                    break HP;
-                }
-            }
-        }
-
-        this.workDone = workDoneShadow;
-        return firstAttemptShadow && (workDoneShadow > workLimitShadow);
-    }
-
-    private static void vswap(int[] fmap, int p1, int p2, int n) {
-        n += p1;
-        while (p1 < n) {
-            int t = fmap[p1];
-            fmap[p1++] = fmap[p2];
-            fmap[p2++] = t;
-        }
-    }
-
-    private static byte med3(byte a, byte b, byte c) {
-        return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c
? c
-                                                        : a);
-    }
-
     private void blockSort() {
-        this.workLimit = WORK_FACTOR * this.last;
-        this.workDone = 0;
-        this.blockRandomised = false;
-        this.firstAttempt = true;
-        mainSort();
-
-        if (this.firstAttempt && (this.workDone > this.workLimit)) {
-            randomiseBlock();
-            this.workLimit = this.workDone = 0;
-            this.firstAttempt = false;
-            mainSort();
-        }
-
-        int[] fmap = this.data.fmap;
-        this.origPtr = -1;
-        for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
-            if (fmap[i] == 0) {
-                this.origPtr = i;
-                break;
-            }
-        }
-
-        // assert (this.origPtr != -1) : this.origPtr;
+        blockSorter.blockSort(data, last);
     }
 
-    /**
-     * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2
+    /*
+     * Performs Move-To-Front on the Burrows-Wheeler transformed
+     * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB
+     * run-length-encoded form.
+     *
+     * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
      */
-    private void mainQSort3(final Data dataShadow, final int loSt,
-                            final int hiSt, final int dSt) {
-        final int[] stack_ll = dataShadow.stack_ll;
-        final int[] stack_hh = dataShadow.stack_hh;
-        final int[] stack_dd = dataShadow.stack_dd;
-        final int[] fmap = dataShadow.fmap;
-        final byte[] block = dataShadow.block;
-
-        stack_ll[0] = loSt;
-        stack_hh[0] = hiSt;
-        stack_dd[0] = dSt;
-
-        for (int sp = 1; --sp >= 0;) {
-            final int lo = stack_ll[sp];
-            final int hi = stack_hh[sp];
-            final int d = stack_dd[sp];
-
-            if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) {
-                if (mainSimpleSort(dataShadow, lo, hi, d)) {
-                    return;
-                }
-            } else {
-                final int d1 = d + 1;
-                final int med = med3(block[fmap[lo] + d1],
-                                     block[fmap[hi] + d1], block[fmap[(lo + hi) >>>
1] + d1]) & 0xff;
-
-                int unLo = lo;
-                int unHi = hi;
-                int ltLo = lo;
-                int gtHi = hi;
-
-                while (true) {
-                    while (unLo <= unHi) {
-                        final int n = ((int) block[fmap[unLo] + d1] & 0xff)
-                            - med;
-                        if (n == 0) {
-                            final int temp = fmap[unLo];
-                            fmap[unLo++] = fmap[ltLo];
-                            fmap[ltLo++] = temp;
-                        } else if (n < 0) {
-                            unLo++;
-                        } else {
-                            break;
-                        }
-                    }
-
-                    while (unLo <= unHi) {
-                        final int n = ((int) block[fmap[unHi] + d1] & 0xff)
-                            - med;
-                        if (n == 0) {
-                            final int temp = fmap[unHi];
-                            fmap[unHi--] = fmap[gtHi];
-                            fmap[gtHi--] = temp;
-                        } else if (n > 0) {
-                            unHi--;
-                        } else {
-                            break;
-                        }
-                    }
-
-                    if (unLo <= unHi) {
-                        final int temp = fmap[unLo];
-                        fmap[unLo++] = fmap[unHi];
-                        fmap[unHi--] = temp;
-                    } else {
-                        break;
-                    }
-                }
-
-                if (gtHi < ltLo) {
-                    stack_ll[sp] = lo;
-                    stack_hh[sp] = hi;
-                    stack_dd[sp] = d1;
-                    sp++;
-                } else {
-                    int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo)
-                        : (unLo - ltLo);
-                    vswap(fmap, lo, unLo - n, n);
-                    int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi)
-                        : (gtHi - unHi);
-                    vswap(fmap, unLo, hi - m + 1, m);
-
-                    n = lo + unLo - ltLo - 1;
-                    m = hi - (gtHi - unHi) + 1;
-
-                    stack_ll[sp] = lo;
-                    stack_hh[sp] = n;
-                    stack_dd[sp] = d;
-                    sp++;
-
-                    stack_ll[sp] = n + 1;
-                    stack_hh[sp] = m - 1;
-                    stack_dd[sp] = d1;
-                    sp++;
-
-                    stack_ll[sp] = m;
-                    stack_hh[sp] = hi;
-                    stack_dd[sp] = d;
-                    sp++;
-                }
-            }
-        }
-    }
-
-    private void mainSort() {
-        final Data dataShadow = this.data;
-        final int[] runningOrder = dataShadow.mainSort_runningOrder;
-        final int[] copy = dataShadow.mainSort_copy;
-        final boolean[] bigDone = dataShadow.mainSort_bigDone;
-        final int[] ftab = dataShadow.ftab;
-        final byte[] block = dataShadow.block;
-        final int[] fmap = dataShadow.fmap;
-        final char[] quadrant = dataShadow.quadrant;
-        final int lastShadow = this.last;
-        final int workLimitShadow = this.workLimit;
-        final boolean firstAttemptShadow = this.firstAttempt;
-
-        // Set up the 2-byte frequency table
-        for (int i = 65537; --i >= 0;) {
-            ftab[i] = 0;
-        }
-
-        /*
-         * In the various block-sized structures, live data runs from 0 to
-         * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area
-         * for block.
-         */
-        for (int i = 0; i < NUM_OVERSHOOT_BYTES; i++) {
-            block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1];
-        }
-        for (int i = lastShadow + NUM_OVERSHOOT_BYTES +1; --i >= 0;) {
-            quadrant[i] = 0;
-        }
-        block[0] = block[lastShadow + 1];
-
-        // Complete the initial radix sort:
-
-        int c1 = block[0] & 0xff;
-        for (int i = 0; i <= lastShadow; i++) {
-            final int c2 = block[i + 1] & 0xff;
-            ftab[(c1 << 8) + c2]++;
-            c1 = c2;
-        }
-
-        for (int i = 1; i <= 65536; i++)
-            ftab[i] += ftab[i - 1];
-
-        c1 = block[1] & 0xff;
-        for (int i = 0; i < lastShadow; i++) {
-            final int c2 = block[i + 2] & 0xff;
-            fmap[--ftab[(c1 << 8) + c2]] = i;
-            c1 = c2;
-        }
-
-        fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]]
= lastShadow;
-
-        /*
-         * Now ftab contains the first loc of every small bucket. Calculate the
-         * running order, from smallest to largest big bucket.
-         */
-        for (int i = 256; --i >= 0;) {
-            bigDone[i] = false;
-            runningOrder[i] = i;
-        }
-
-        for (int h = 364; h != 1;) {
-            h /= 3;
-            for (int i = h; i <= 255; i++) {
-                final int vv = runningOrder[i];
-                final int a = ftab[(vv + 1) << 8] - ftab[vv << 8];
-                final int b = h - 1;
-                int j = i;
-                for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro <<
8]) > a; ro = runningOrder[j
-                                                                                        
                       - h]) {
-                    runningOrder[j] = ro;
-                    j -= h;
-                    if (j <= b) {
-                        break;
-                    }
-                }
-                runningOrder[j] = vv;
-            }
-        }
-
-        /*
-         * The main sorting loop.
-         */
-        for (int i = 0; i <= 255; i++) {
-            /*
-             * Process big buckets, starting with the least full.
-             */
-            final int ss = runningOrder[i];
-
-            // Step 1:
-            /*
-             * Complete the big bucket [ss] by quicksorting any unsorted small
-             * buckets [ss, j]. Hopefully previous pointer-scanning phases have
-             * already completed many of the small buckets [ss, j], so we don't
-             * have to sort them at all.
-             */
-            for (int j = 0; j <= 255; j++) {
-                final int sb = (ss << 8) + j;
-                final int ftab_sb = ftab[sb];
-                if ((ftab_sb & SETMASK) != SETMASK) {
-                    final int lo = ftab_sb & CLEARMASK;
-                    final int hi = (ftab[sb + 1] & CLEARMASK) - 1;
-                    if (hi > lo) {
-                        mainQSort3(dataShadow, lo, hi, 2);
-                        if (firstAttemptShadow
-                            && (this.workDone > workLimitShadow)) {
-                            return;
-                        }
-                    }
-                    ftab[sb] = ftab_sb | SETMASK;
-                }
-            }
-
-            // Step 2:
-            // Now scan this big bucket so as to synthesise the
-            // sorted order for small buckets [t, ss] for all t != ss.
-
-            for (int j = 0; j <= 255; j++) {
-                copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
-            }
-
-            for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) <<
8] & CLEARMASK); j < hj; j++) {
-                final int fmap_j = fmap[j];
-                c1 = block[fmap_j] & 0xff;
-                if (!bigDone[c1]) {
-                    fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1);
-                    copy[c1]++;
-                }
-            }
-
-            for (int j = 256; --j >= 0;)
-                ftab[(j << 8) + ss] |= SETMASK;
-
-            // Step 3:
-            /*
-             * The ss big bucket is now done. Record this fact, and update the
-             * quadrant descriptors. Remember to update quadrants in the
-             * overshoot area too, if necessary. The "if (i < 255)" test merely
-             * skips this updating for the last bucket processed, since updating
-             * for the last bucket is pointless.
-             */
-            bigDone[ss] = true;
-
-            if (i < 255) {
-                final int bbStart = ftab[ss << 8] & CLEARMASK;
-                final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart;
-                int shifts = 0;
-
-                while ((bbSize >> shifts) > 65534) {
-                    shifts++;
-                }
-
-                for (int j = 0; j < bbSize; j++) {
-                    final int a2update = fmap[bbStart + j];
-                    final char qVal = (char) (j >> shifts);
-                    quadrant[a2update] = qVal;
-                    if (a2update < NUM_OVERSHOOT_BYTES) {
-                        quadrant[a2update + lastShadow + 1] = qVal;
-                    }
-                }
-            }
-
-        }
-    }
-
-    private void randomiseBlock() {
-        final boolean[] inUse = this.data.inUse;
-        final byte[] block = this.data.block;
-        final int lastShadow = this.last;
-
-        for (int i = 256; --i >= 0;)
-            inUse[i] = false;
-
-        int rNToGo = 0;
-        int rTPos = 0;
-        for (int i = 0, j = 1; i <= lastShadow; i = j, j++) {
-            if (rNToGo == 0) {
-                rNToGo = (char) BZip2Constants.rNums[rTPos];
-                if (++rTPos == 512) {
-                    rTPos = 0;
-                }
-            }
-
-            rNToGo--;
-            block[j] ^= ((rNToGo == 1) ? 1 : 0);
-
-            // handle 16 bit signed numbers
-            inUse[block[j] & 0xff] = true;
-        }
-
-        this.blockRandomised = true;
-    }
-
     private void generateMTFValues() {
         final int lastShadow = this.last;
         final Data dataShadow = this.data;
@@ -2033,9 +1517,10 @@ public class CBZip2OutputStream extends 
         this.nMTF = wr + 1;
     }
 
-    private static final class Data extends Object {
+    static final class Data extends Object {
 
         // with blockSize 900k
+        /* maps unsigned byte => "does it occur in block" */
         final boolean[] inUse = new boolean[256]; // 256 byte
         final byte[] unseqToSeq = new byte[256]; // 256 byte
         final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
@@ -2054,23 +1539,19 @@ public class CBZip2OutputStream extends 
         final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
         final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
 
-        final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte
-        final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte
-        final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte
-
-        final int[] mainSort_runningOrder = new int[256]; // 1024 byte
-        final int[] mainSort_copy = new int[256]; // 1024 byte
-        final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte
-
         final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
         final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
         final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
 
-        final int[] ftab = new int[65537]; // 262148 byte
         // ------------
         // 333408 byte
 
+        /* holds the RLEd block of original data starting at index 1.
+         * After sorting the last byte added to the buffer is at index
+         * 0. */
         final byte[] block; // 900021 byte
+        /* maps index in Burrows-Wheeler transformed block => index of
+         * byte in original block */
         final int[] fmap; // 3600000 byte
         final char[] sfmap; // 3600000 byte
         // ------------
@@ -2078,11 +1559,12 @@ public class CBZip2OutputStream extends 
         // ============
 
         /**
-         * Array instance identical to sfmap, both are used only
-         * temporarily and indepently, so we do not need to allocate
-         * additional memory.
+         * Index of original line in Burrows-Wheeler table.
+         *
+         * <p>This is the index in fmap that points to the last byte
+         * of the original data.</p>
          */
-        final char[] quadrant;
+        int origPtr;
 
         Data(int blockSize100k) {
             super();
@@ -2091,7 +1573,6 @@ public class CBZip2OutputStream extends 
             this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
             this.fmap = new int[n];
             this.sfmap = new char[2 * n];
-            this.quadrant = this.sfmap;
         }
 
     }

Propchange: ant/core/trunk/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Mon May 21 04:33:42 2012
@@ -0,0 +1 @@
+/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java:1332540-1340799

Copied: ant/core/trunk/src/tests/junit/org/apache/tools/bzip2/BlockSortTest.java (from r1340888,
commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java)
URL: http://svn.apache.org/viewvc/ant/core/trunk/src/tests/junit/org/apache/tools/bzip2/BlockSortTest.java?p2=ant/core/trunk/src/tests/junit/org/apache/tools/bzip2/BlockSortTest.java&p1=commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java&r1=1340888&r2=1340895&rev=1340895&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java
(original)
+++ ant/core/trunk/src/tests/junit/org/apache/tools/bzip2/BlockSortTest.java Mon May 21 04:33:42
2012
@@ -16,7 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.commons.compress.compressors.bzip2;
+package org.apache.tools.bzip2;
 
 import org.junit.Test;
 
@@ -123,7 +123,7 @@ public class BlockSortTest {
 
     @Test
     public void testFallbackSort() {
-        BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1);
+        CBZip2OutputStream.Data data = new CBZip2OutputStream.Data(1);
         BlockSort s = new BlockSort(data);
         int[] fmap = new int[FIXTURE.length];
         s.fallbackSort(fmap, FIXTURE, FIXTURE.length);
@@ -134,7 +134,7 @@ public class BlockSortTest {
         return setUpFixture(FIXTURE);
     }
 
-    private void assertFixtureSorted(BZip2CompressorOutputStream.Data data) {
+    private void assertFixtureSorted(CBZip2OutputStream.Data data) {
         assertFixtureSorted(data, FIXTURE, FIXTURE_BWT);
     }
 
@@ -142,17 +142,17 @@ public class BlockSortTest {
         return setUpFixture(FIXTURE2);
     }
 
-    private void assertFixture2Sorted(BZip2CompressorOutputStream.Data data) {
+    private void assertFixture2Sorted(CBZip2OutputStream.Data data) {
         assertFixtureSorted(data, FIXTURE2, FIXTURE2_BWT);
     }
 
     private DS setUpFixture(byte[] fixture) {
-        BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1);
+        CBZip2OutputStream.Data data = new CBZip2OutputStream.Data(1);
         System.arraycopy(fixture, 0, data.block, 1, fixture.length);
         return new DS(data, new BlockSort(data));
     }
 
-    private void assertFixtureSorted(BZip2CompressorOutputStream.Data data,
+    private void assertFixtureSorted(CBZip2OutputStream.Data data,
                                      byte[] fixture, byte[] fixtureBwt) {
         assertEquals(fixture[fixture.length - 1], data.block[0]);
         for (int i = 0; i < fixture.length; i++) {
@@ -161,9 +161,9 @@ public class BlockSortTest {
     }
 
     private static class DS {
-        private final BZip2CompressorOutputStream.Data data;
+        private final CBZip2OutputStream.Data data;
         private final BlockSort s;
-        DS(BZip2CompressorOutputStream.Data data, BlockSort s) {
+        DS(CBZip2OutputStream.Data data, BlockSort s) {
             this.data = data;
             this.s = s;
         }



Mime
View raw message