Return-Path: Delivered-To: apmail-jakarta-commons-dev-archive@apache.org Received: (qmail 33650 invoked from network); 8 Jul 2002 22:16:05 -0000 Received: from unknown (HELO nagoya.betaversion.org) (192.18.49.131) by daedalus.apache.org with SMTP; 8 Jul 2002 22:16:05 -0000 Received: (qmail 7152 invoked by uid 97); 8 Jul 2002 22:16:11 -0000 Delivered-To: qmlist-jakarta-archive-commons-dev@jakarta.apache.org Received: (qmail 7128 invoked by uid 97); 8 Jul 2002 22:16:10 -0000 Mailing-List: contact commons-dev-help@jakarta.apache.org; run by ezmlm Precedence: bulk List-Unsubscribe: List-Subscribe: List-Help: List-Post: List-Id: "Jakarta Commons Developers List" Reply-To: "Jakarta Commons Developers List" Delivered-To: mailing list commons-dev@jakarta.apache.org Received: (qmail 7104 invoked by uid 97); 8 Jul 2002 22:16:09 -0000 X-Antivirus: nagoya (v4198 created Apr 24 2002) Date: 8 Jul 2002 22:15:44 -0000 Message-ID: <20020708221544.54482.qmail@icarus.apache.org> From: nicolaken@apache.org To: jakarta-commons-sandbox-cvs@apache.org Subject: cvs commit: jakarta-commons-sandbox/io/src/java/org/apache/commons/io/compress/bzip2 BZip2Constants.java CBZip2InputStream.java CBZip2OutputStream.java CRC.java package.html X-Spam-Rating: daedalus.apache.org 1.6.2 0/1000/N X-Spam-Rating: daedalus.apache.org 1.6.2 0/1000/N nicolaken 2002/07/08 15:15:44 Added: io/src/java/org/apache/commons/io/compress/bzip2 BZip2Constants.java CBZip2InputStream.java CBZip2OutputStream.java CRC.java package.html Log: New bzip2 classes from Avalon excalibur. Revision Changes Path 1.1 jakarta-commons-sandbox/io/src/java/org/apache/commons/io/compress/bzip2/BZip2Constants.java Index: BZip2Constants.java =================================================================== /* * Copyright (C) The Apache Software Foundation. All rights reserved. * * This software is published under the terms of the Apache Software License * version 1.1, a copy of which has been included with this distribution in * the LICENSE.txt file. */ package org.apache.commons.io.compress.bzip2; /** * Base class for both the compress and decompress classes. Holds common arrays, * and static data. * * @author Keiron Liddle */ interface BZip2Constants { int BASE_BLOCK_SIZE = 100000; int MAX_ALPHA_SIZE = 258; int MAX_CODE_LEN = 23; int RUNA = 0; int RUNB = 1; int N_GROUPS = 6; int G_SIZE = 50; int N_ITERS = 4; int MAX_SELECTORS = ( 2 + ( 900000 / G_SIZE ) ); int NUM_OVERSHOOT_BYTES = 20; int[] RAND_NUMS = new int[] { 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, 419, 436, 278, 496, 867, 210, 399, 680, 480, 51, 878, 465, 811, 169, 869, 675, 611, 697, 867, 561, 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, 150, 238, 59, 379, 684, 877, 625, 169, 643, 105, 170, 607, 520, 932, 727, 476, 693, 425, 174, 647, 73, 122, 335, 530, 442, 853, 695, 249, 445, 515, 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, 641, 801, 220, 162, 819, 984, 589, 513, 495, 799, 161, 604, 958, 533, 221, 400, 386, 867, 600, 782, 382, 596, 414, 171, 516, 375, 682, 485, 911, 276, 98, 553, 163, 354, 666, 933, 424, 341, 533, 870, 227, 730, 475, 186, 263, 647, 537, 686, 600, 224, 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, 184, 943, 795, 384, 383, 461, 404, 758, 839, 887, 715, 67, 618, 276, 204, 918, 873, 777, 604, 560, 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 652, 934, 970, 447, 318, 353, 859, 672, 112, 785, 645, 863, 803, 350, 139, 93, 354, 99, 820, 908, 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, 653, 282, 762, 623, 680, 81, 927, 626, 789, 125, 411, 521, 938, 300, 821, 78, 343, 175, 128, 250, 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, 857, 956, 358, 619, 580, 124, 737, 594, 701, 612, 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, 944, 375, 748, 52, 600, 747, 642, 182, 862, 81, 344, 805, 988, 739, 511, 655, 814, 334, 249, 515, 897, 955, 664, 981, 649, 113, 974, 459, 893, 228, 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, 686, 754, 806, 760, 493, 403, 415, 394, 687, 700, 946, 670, 656, 610, 738, 392, 760, 799, 887, 653, 978, 321, 576, 617, 626, 502, 894, 679, 243, 440, 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, 707, 151, 457, 449, 797, 195, 791, 558, 945, 679, 297, 59, 87, 824, 713, 663, 412, 693, 342, 606, 134, 108, 571, 364, 631, 212, 174, 643, 304, 329, 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, 170, 514, 364, 692, 829, 82, 855, 953, 676, 246, 369, 970, 294, 750, 807, 827, 150, 790, 288, 923, 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, 896, 831, 547, 261, 524, 462, 293, 465, 502, 56, 661, 821, 976, 991, 658, 869, 905, 758, 745, 193, 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, 61, 688, 793, 644, 986, 403, 106, 366, 905, 644, 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, 920, 176, 193, 713, 857, 265, 203, 50, 668, 108, 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, 936, 638 }; } 1.1 jakarta-commons-sandbox/io/src/java/org/apache/commons/io/compress/bzip2/CBZip2InputStream.java Index: CBZip2InputStream.java =================================================================== /* * Copyright (C) The Apache Software Foundation. All rights reserved. * * This software is published under the terms of the Apache Software License * version 1.1, a copy of which has been included with this distribution in * the LICENSE.txt file. */ package org.apache.commons.io.compress.bzip2; import java.io.IOException; import java.io.InputStream; /** * An input stream that decompresses from the BZip2 format (without the file * header chars) to be read as any other stream. * * @author Keiron Liddle */ public class CBZip2InputStream extends InputStream implements BZip2Constants { private static final int START_BLOCK_STATE = 1; private static final int RAND_PART_A_STATE = 2; private static final int RAND_PART_B_STATE = 3; private static final int RAND_PART_C_STATE = 4; private static final int NO_RAND_PART_A_STATE = 5; private static final int NO_RAND_PART_B_STATE = 6; private static final int NO_RAND_PART_C_STATE = 7; private CRC m_crc = new CRC(); private boolean[] m_inUse = new boolean[ 256 ]; private char[] m_seqToUnseq = new char[ 256 ]; private char[] m_unseqToSeq = new char[ 256 ]; private char[] m_selector = new char[ MAX_SELECTORS ]; private char[] m_selectorMtf = new char[ MAX_SELECTORS ]; /* * freq table collected to save a pass over the data * during decompression. */ private int[] m_unzftab = new int[ 256 ]; private int[][] m_limit = new int[ N_GROUPS ][ MAX_ALPHA_SIZE ]; private int[][] m_base = new int[ N_GROUPS ][ MAX_ALPHA_SIZE ]; private int[][] m_perm = new int[ N_GROUPS ][ MAX_ALPHA_SIZE ]; private int[] m_minLens = new int[ N_GROUPS ]; private boolean m_streamEnd; private int m_currentChar = -1; private int m_currentState = START_BLOCK_STATE; private int m_rNToGo; private int m_rTPos; private int m_tPos; private int i2; private int count; private int chPrev; private int ch2; private int j2; private char z; private boolean m_blockRandomised; /* * always: in the range 0 .. 9. * The current block size is 100000 * this number. */ private int m_blockSize100k; private int m_bsBuff; private int m_bsLive; private InputStream m_input; private int m_computedBlockCRC; private int m_computedCombinedCRC; /* * index of the last char in the block, so * the block size == last + 1. */ private int m_last; private char[] m_ll8; private int m_nInUse; /* * index in zptr[] of original string after sorting. */ private int m_origPtr; private int m_storedBlockCRC; private int m_storedCombinedCRC; private int[] m_tt; public CBZip2InputStream( final InputStream input ) { bsSetStream( input ); initialize(); initBlock(); setupBlock(); } private static void badBlockHeader() { cadvise(); } private static void blockOverrun() { cadvise(); } private static void cadvise() { System.out.println( "CRC Error" ); //throw new CCoruptionError(); } private static void compressedStreamEOF() { cadvise(); } private static void crcError() { cadvise(); } public int read() { if( m_streamEnd ) { return -1; } else { int retChar = m_currentChar; switch( m_currentState ) { case START_BLOCK_STATE: break; case RAND_PART_A_STATE: break; case RAND_PART_B_STATE: setupRandPartB(); break; case RAND_PART_C_STATE: setupRandPartC(); break; case NO_RAND_PART_A_STATE: break; case NO_RAND_PART_B_STATE: setupNoRandPartB(); break; case NO_RAND_PART_C_STATE: setupNoRandPartC(); break; default: break; } return retChar; } } private void setDecompressStructureSizes( int newSize100k ) { if( !( 0 <= newSize100k && newSize100k <= 9 && 0 <= m_blockSize100k && m_blockSize100k <= 9 ) ) { // throw new IOException("Invalid block size"); } m_blockSize100k = newSize100k; if( newSize100k == 0 ) { return; } int n = BASE_BLOCK_SIZE * newSize100k; m_ll8 = new char[ n ]; m_tt = new int[ n ]; } private void setupBlock() { int[] cftab = new int[ 257 ]; char ch; cftab[ 0 ] = 0; for( int i = 1; i <= 256; i++ ) { cftab[ i ] = m_unzftab[ i - 1 ]; } for( int i = 1; i <= 256; i++ ) { cftab[ i ] += cftab[ i - 1 ]; } for( int i = 0; i <= m_last; i++ ) { ch = m_ll8[ i ]; m_tt[ cftab[ ch ] ] = i; cftab[ ch ]++; } cftab = null; m_tPos = m_tt[ m_origPtr ]; count = 0; i2 = 0; ch2 = 256; /* * not a char and not EOF */ if( m_blockRandomised ) { m_rNToGo = 0; m_rTPos = 0; setupRandPartA(); } else { setupNoRandPartA(); } } private void setupNoRandPartA() { if( i2 <= m_last ) { chPrev = ch2; ch2 = m_ll8[ m_tPos ]; m_tPos = m_tt[ m_tPos ]; i2++; m_currentChar = ch2; m_currentState = NO_RAND_PART_B_STATE; m_crc.updateCRC( ch2 ); } else { endBlock(); initBlock(); setupBlock(); } } private void setupNoRandPartB() { if( ch2 != chPrev ) { m_currentState = NO_RAND_PART_A_STATE; count = 1; setupNoRandPartA(); } else { count++; if( count >= 4 ) { z = m_ll8[ m_tPos ]; m_tPos = m_tt[ m_tPos ]; m_currentState = NO_RAND_PART_C_STATE; j2 = 0; setupNoRandPartC(); } else { m_currentState = NO_RAND_PART_A_STATE; setupNoRandPartA(); } } } private void setupNoRandPartC() { if( j2 < z ) { m_currentChar = ch2; m_crc.updateCRC( ch2 ); j2++; } else { m_currentState = NO_RAND_PART_A_STATE; i2++; count = 0; setupNoRandPartA(); } } private void setupRandPartA() { if( i2 <= m_last ) { chPrev = ch2; ch2 = m_ll8[ m_tPos ]; m_tPos = m_tt[ m_tPos ]; if( m_rNToGo == 0 ) { m_rNToGo = RAND_NUMS[ m_rTPos ]; m_rTPos++; if( m_rTPos == 512 ) { m_rTPos = 0; } } m_rNToGo--; ch2 ^= ( ( m_rNToGo == 1 ) ? 1 : 0 ); i2++; m_currentChar = ch2; m_currentState = RAND_PART_B_STATE; m_crc.updateCRC( ch2 ); } else { endBlock(); initBlock(); setupBlock(); } } private void setupRandPartB() { if( ch2 != chPrev ) { m_currentState = RAND_PART_A_STATE; count = 1; setupRandPartA(); } else { count++; if( count >= 4 ) { z = m_ll8[ m_tPos ]; m_tPos = m_tt[ m_tPos ]; if( m_rNToGo == 0 ) { m_rNToGo = RAND_NUMS[ m_rTPos ]; m_rTPos++; if( m_rTPos == 512 ) { m_rTPos = 0; } } m_rNToGo--; z ^= ( ( m_rNToGo == 1 ) ? 1 : 0 ); j2 = 0; m_currentState = RAND_PART_C_STATE; setupRandPartC(); } else { m_currentState = RAND_PART_A_STATE; setupRandPartA(); } } } private void setupRandPartC() { if( j2 < z ) { m_currentChar = ch2; m_crc.updateCRC( ch2 ); j2++; } else { m_currentState = RAND_PART_A_STATE; i2++; count = 0; setupRandPartA(); } } private void getAndMoveToFrontDecode() { int nextSym; int limitLast = BASE_BLOCK_SIZE * m_blockSize100k; m_origPtr = readVariableSizedInt( 24 ); recvDecodingTables(); int EOB = m_nInUse + 1; int groupNo = -1; int groupPos = 0; /* * Setting up the unzftab entries here is not strictly * necessary, but it does save having to do it later * in a separate pass, and so saves a block's worth of * cache misses. */ for( int i = 0; i <= 255; i++ ) { m_unzftab[ i ] = 0; } final char[] yy = new char[ 256 ]; for( int i = 0; i <= 255; i++ ) { yy[ i ] = (char)i; } m_last = -1; int zt; int zn; int zvec; int zj; groupNo++; groupPos = G_SIZE - 1; zt = m_selector[ groupNo ]; zn = m_minLens[ zt ]; zvec = bsR( zn ); while( zvec > m_limit[ zt ][ zn ] ) { zn++; while( m_bsLive < 1 ) { int zzi; char thech = 0; try { thech = (char)m_input.read(); } catch( IOException e ) { compressedStreamEOF(); } if( thech == -1 ) { compressedStreamEOF(); } zzi = thech; m_bsBuff = ( m_bsBuff << 8 ) | ( zzi & 0xff ); m_bsLive += 8; } zj = ( m_bsBuff >> ( m_bsLive - 1 ) ) & 1; m_bsLive--; zvec = ( zvec << 1 ) | zj; } nextSym = m_perm[ zt ][ zvec - m_base[ zt ][ zn ] ]; while( true ) { if( nextSym == EOB ) { break; } if( nextSym == RUNA || nextSym == RUNB ) { char ch; int s = -1; int N = 1; do { if( nextSym == RUNA ) { s = s + ( 0 + 1 ) * N; } else// if( nextSym == RUNB ) { s = s + ( 1 + 1 ) * N; } N = N * 2; if( groupPos == 0 ) { groupNo++; groupPos = G_SIZE; } groupPos--; zt = m_selector[ groupNo ]; zn = m_minLens[ zt ]; zvec = bsR( zn ); while( zvec > m_limit[ zt ][ zn ] ) { zn++; while( m_bsLive < 1 ) { int zzi; char thech = 0; try { thech = (char)m_input.read(); } catch( IOException e ) { compressedStreamEOF(); } if( thech == -1 ) { compressedStreamEOF(); } zzi = thech; m_bsBuff = ( m_bsBuff << 8 ) | ( zzi & 0xff ); m_bsLive += 8; } zj = ( m_bsBuff >> ( m_bsLive - 1 ) ) & 1; m_bsLive--; zvec = ( zvec << 1 ) | zj; } nextSym = m_perm[ zt ][ zvec - m_base[ zt ][ zn ] ]; } while( nextSym == RUNA || nextSym == RUNB ); s++; ch = m_seqToUnseq[ yy[ 0 ] ]; m_unzftab[ ch ] += s; while( s > 0 ) { m_last++; m_ll8[ m_last ] = ch; s--; } if( m_last >= limitLast ) { blockOverrun(); } continue; } else { char tmp; m_last++; if( m_last >= limitLast ) { blockOverrun(); } tmp = yy[ nextSym - 1 ]; m_unzftab[ m_seqToUnseq[ tmp ] ]++; m_ll8[ m_last ] = m_seqToUnseq[ tmp ]; /* * This loop is hammered during decompression, * hence the unrolling. * for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1]; */ int j = nextSym - 1; for( ; j > 3; j -= 4 ) { yy[ j ] = yy[ j - 1 ]; yy[ j - 1 ] = yy[ j - 2 ]; yy[ j - 2 ] = yy[ j - 3 ]; yy[ j - 3 ] = yy[ j - 4 ]; } for( ; j > 0; j-- ) { yy[ j ] = yy[ j - 1 ]; } yy[ 0 ] = tmp; if( groupPos == 0 ) { groupNo++; groupPos = G_SIZE; } groupPos--; zt = m_selector[ groupNo ]; zn = m_minLens[ zt ]; zvec = bsR( zn ); while( zvec > m_limit[ zt ][ zn ] ) { zn++; while( m_bsLive < 1 ) { char ch = 0; try { ch = (char)m_input.read(); } catch( IOException e ) { compressedStreamEOF(); } m_bsBuff = ( m_bsBuff << 8 ) | ( ch & 0xff ); m_bsLive += 8; } zj = ( m_bsBuff >> ( m_bsLive - 1 ) ) & 1; m_bsLive--; zvec = ( zvec << 1 ) | zj; } nextSym = m_perm[ zt ][ zvec - m_base[ zt ][ zn ] ]; continue; } } } private void bsFinishedWithStream() { m_input = null; } private int readVariableSizedInt( final int numBits ) { return bsR( numBits ); } private char readUnsignedChar() { return (char)bsR( 8 ); } private int readInt() { int u = 0; u = ( u << 8 ) | bsR( 8 ); u = ( u << 8 ) | bsR( 8 ); u = ( u << 8 ) | bsR( 8 ); u = ( u << 8 ) | bsR( 8 ); return u; } private int bsR( final int n ) { while( m_bsLive < n ) { char ch = 0; try { ch = (char)m_input.read(); } catch( final IOException ioe ) { compressedStreamEOF(); } if( ch == -1 ) { compressedStreamEOF(); } m_bsBuff = ( m_bsBuff << 8 ) | ( ch & 0xff ); m_bsLive += 8; } final int result = ( m_bsBuff >> ( m_bsLive - n ) ) & ( ( 1 << n ) - 1 ); m_bsLive -= n; return result; } private void bsSetStream( final InputStream input ) { m_input = input; m_bsLive = 0; m_bsBuff = 0; } private void complete() { m_storedCombinedCRC = readInt(); if( m_storedCombinedCRC != m_computedCombinedCRC ) { crcError(); } bsFinishedWithStream(); m_streamEnd = true; } private void endBlock() { m_computedBlockCRC = m_crc.getFinalCRC(); /* * A bad CRC is considered a fatal error. */ if( m_storedBlockCRC != m_computedBlockCRC ) { crcError(); } m_computedCombinedCRC = ( m_computedCombinedCRC << 1 ) | ( m_computedCombinedCRC >>> 31 ); m_computedCombinedCRC ^= m_computedBlockCRC; } private void hbCreateDecodeTables( final int[] limit, final int[] base, final int[] perm, final char[] length, final int minLen, final int maxLen, final int alphaSize ) { int pp = 0; for( int i = minLen; i <= maxLen; i++ ) { for( int j = 0; j < alphaSize; j++ ) { if( length[ j ] == i ) { perm[ pp ] = j; pp++; } } } for( int i = 0; i < MAX_CODE_LEN; i++ ) { base[ i ] = 0; } for( int i = 0; i < alphaSize; i++ ) { base[ length[ i ] + 1 ]++; } for( int i = 1; i < MAX_CODE_LEN; i++ ) { base[ i ] += base[ i - 1 ]; } for( int i = 0; i < MAX_CODE_LEN; i++ ) { limit[ i ] = 0; } int vec = 0; for( int i = minLen; i <= maxLen; i++ ) { vec += ( base[ i + 1 ] - base[ i ] ); limit[ i ] = vec - 1; vec <<= 1; } for( int i = minLen + 1; i <= maxLen; i++ ) { base[ i ] = ( ( limit[ i - 1 ] + 1 ) << 1 ) - base[ i ]; } } private void initBlock() { final char magic1 = readUnsignedChar(); final char magic2 = readUnsignedChar(); final char magic3 = readUnsignedChar(); final char magic4 = readUnsignedChar(); final char magic5 = readUnsignedChar(); final char magic6 = readUnsignedChar(); if( magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45 && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90 ) { complete(); return; } if( magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59 || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59 ) { badBlockHeader(); m_streamEnd = true; return; } m_storedBlockCRC = readInt(); if( bsR( 1 ) == 1 ) { m_blockRandomised = true; } else { m_blockRandomised = false; } // currBlockNo++; getAndMoveToFrontDecode(); m_crc.initialiseCRC(); m_currentState = START_BLOCK_STATE; } private void initialize() { final char magic3 = readUnsignedChar(); final char magic4 = readUnsignedChar(); if( magic3 != 'h' || magic4 < '1' || magic4 > '9' ) { bsFinishedWithStream(); m_streamEnd = true; return; } setDecompressStructureSizes( magic4 - '0' ); m_computedCombinedCRC = 0; } private void makeMaps() { m_nInUse = 0; for( int i = 0; i < 256; i++ ) { if( m_inUse[ i ] ) { m_seqToUnseq[ m_nInUse ] = (char)i; m_unseqToSeq[ i ] = (char)m_nInUse; m_nInUse++; } } } private void recvDecodingTables() { buildInUseTable(); makeMaps(); final int alphaSize = m_nInUse + 2; /* * Now the selectors */ final int groupCount = bsR( 3 ); final int selectorCount = bsR( 15 ); for( int i = 0; i < selectorCount; i++ ) { int run = 0; while( bsR( 1 ) == 1 ) { run++; } m_selectorMtf[ i ] = (char)run; } /* * Undo the MTF values for the selectors. */ final char[] pos = new char[ N_GROUPS ]; for( char v = 0; v < groupCount; v++ ) { pos[ v ] = v; } for( int i = 0; i < selectorCount; i++ ) { int v = m_selectorMtf[ i ]; final char tmp = pos[ v ]; while( v > 0 ) { pos[ v ] = pos[ v - 1 ]; v--; } pos[ 0 ] = tmp; m_selector[ i ] = tmp; } final char[][] len = new char[ N_GROUPS ][ MAX_ALPHA_SIZE ]; /* * Now the coding tables */ for( int i = 0; i < groupCount; i++ ) { int curr = bsR( 5 ); for( int j = 0; j < alphaSize; j++ ) { while( bsR( 1 ) == 1 ) { if( bsR( 1 ) == 0 ) { curr++; } else { curr--; } } len[ i ][ j ] = (char)curr; } } /* * Create the Huffman decoding tables */ for( int k = 0; k < groupCount; k++ ) { int minLen = 32; int maxLen = 0; for( int i = 0; i < alphaSize; i++ ) { if( len[ k ][ i ] > maxLen ) { maxLen = len[ k ][ i ]; } if( len[ k ][ i ] < minLen ) { minLen = len[ k ][ i ]; } } hbCreateDecodeTables( m_limit[ k ], m_base[ k ], m_perm[ k ], len[ k ], minLen, maxLen, alphaSize ); m_minLens[ k ] = minLen; } } private void buildInUseTable() { final boolean[] inUse16 = new boolean[ 16 ]; /* * Receive the mapping table */ for( int i = 0; i < 16; i++ ) { if( bsR( 1 ) == 1 ) { inUse16[ i ] = true; } else { inUse16[ i ] = false; } } for( int i = 0; i < 256; i++ ) { m_inUse[ i ] = false; } for( int i = 0; i < 16; i++ ) { if( inUse16[ i ] ) { for( int j = 0; j < 16; j++ ) { if( bsR( 1 ) == 1 ) { m_inUse[ i * 16 + j ] = true; } } } } } } 1.1 jakarta-commons-sandbox/io/src/java/org/apache/commons/io/compress/bzip2/CBZip2OutputStream.java Index: CBZip2OutputStream.java =================================================================== /* * Copyright (C) The Apache Software Foundation. All rights reserved. * * This software is published under the terms of the Apache Software License * version 1.1, a copy of which has been included with this distribution in * the LICENSE.txt file. */ package org.apache.commons.io.compress.bzip2; import java.io.IOException; import java.io.OutputStream; /** * An output stream that compresses into the BZip2 format (without the file * header chars) into another stream. TODO: Update to BZip2 1.0.1 * * @author Keiron Liddle */ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { private static final int LOWER_BYTE_MASK = 0x000000ff; private static final int UPPER_BYTE_MASK = 0xffffff00; private static final int SETMASK = ( 1 << 21 ); private static final int CLEARMASK = ( ~SETMASK ); private static final int GREATER_ICOST = 15; private static final int LESSER_ICOST = 0; private static final int SMALL_THRESH = 20; private static final int DEPTH_THRESH = 10; /* * If you are ever unlucky/improbable enough * to get a stack overflow whilst sorting, * increase the following constant and try * again. In practice I have never seen the * stack go above 27 elems, so the following * limit seems very generous. */ private static final int QSORT_STACK_SIZE = 1000; private CRC m_crc = new CRC(); private boolean[] m_inUse = new boolean[ 256 ]; private char[] m_seqToUnseq = new char[ 256 ]; private char[] m_unseqToSeq = new char[ 256 ]; private char[] m_selector = new char[ MAX_SELECTORS ]; private char[] m_selectorMtf = new char[ MAX_SELECTORS ]; private int[] m_mtfFreq = new int[ MAX_ALPHA_SIZE ]; private int m_currentChar = -1; private int m_runLength; private boolean m_closed; /* * Knuth's increments seem to work better * than Incerpi-Sedgewick here. Possibly * because the number of elems to sort is * usually small, typically <= 20. */ private int[] m_incs = new int[] { 1, 4, 13, 40, 121, 364, 1093, 3280, 9841, 29524, 88573, 265720, 797161, 2391484 }; private boolean m_blockRandomised; /* * always: in the range 0 .. 9. * The current block size is 100000 * this number. */ private int m_blockSize100k; private int m_bsBuff; private int m_bsLive; /* * index of the last char in the block, so * the block size == last + 1. */ private int m_last; /* * index in zptr[] of original string after sorting. */ private int m_origPtr; private int m_allowableBlockSize; private char[] m_block; private int m_blockCRC; private int m_combinedCRC; private OutputStream m_bsStream; private boolean m_firstAttempt; private int[] m_ftab; private int m_nInUse; private int m_nMTF; private int[] m_quadrant; private short[] m_szptr; private int m_workDone; /* * Used when sorting. If too many long comparisons * happen, we stop sorting, randomise the block * slightly, and try again. */ private int m_workFactor; private int m_workLimit; private int[] m_zptr; public CBZip2OutputStream( final OutputStream output ) throws IOException { this( output, 9 ); } public CBZip2OutputStream( final OutputStream output, final int blockSize ) throws IOException { bsSetStream( output ); m_workFactor = 50; int outBlockSize = blockSize; if( outBlockSize > 9 ) { outBlockSize = 9; } if( outBlockSize < 1 ) { outBlockSize = 1; } m_blockSize100k = outBlockSize; allocateCompressStructures(); initialize(); initBlock(); } private static void hbMakeCodeLengths( char[] len, int[] freq, int alphaSize, int maxLen ) { /* * Nodes and heap entries run from 1. Entry 0 * for both the heap and nodes is a sentinel. */ int nNodes; /* * Nodes and heap entries run from 1. Entry 0 * for both the heap and nodes is a sentinel. */ int nHeap; /* * Nodes and heap entries run from 1. Entry 0 * for both the heap and nodes is a sentinel. */ int n1; /* * Nodes and heap entries run from 1. Entry 0 * for both the heap and nodes is a sentinel. */ int n2; /* * Nodes and heap entries run from 1. Entry 0 * for both the heap and nodes is a sentinel. */ int i; /* * Nodes and heap entries run from 1. Entry 0 * for both the heap and nodes is a sentinel. */ int j; /* * Nodes and heap entries run from 1. Entry 0 * for both the heap and nodes is a sentinel. */ int k; boolean tooLong; int[] heap = new int[ MAX_ALPHA_SIZE + 2 ]; int[] weights = new int[ MAX_ALPHA_SIZE * 2 ]; int[] parent = new int[ MAX_ALPHA_SIZE * 2 ]; for( i = 0; i < alphaSize; i++ ) { weights[ i + 1 ] = ( freq[ i ] == 0 ? 1 : freq[ i ] ) << 8; } while( true ) { nNodes = alphaSize; nHeap = 0; heap[ 0 ] = 0; weights[ 0 ] = 0; parent[ 0 ] = -2; for( i = 1; i <= alphaSize; i++ ) { parent[ i ] = -1; nHeap++; heap[ nHeap ] = i; { int zz; int tmp; zz = nHeap; tmp = heap[ zz ]; while( weights[ tmp ] < weights[ heap[ zz >> 1 ] ] ) { heap[ zz ] = heap[ zz >> 1 ]; zz >>= 1; } heap[ zz ] = tmp; } } if( !( nHeap < ( MAX_ALPHA_SIZE + 2 ) ) ) { panic(); } while( nHeap > 1 ) { n1 = heap[ 1 ]; heap[ 1 ] = heap[ nHeap ]; nHeap--; { int zz = 0; int yy = 0; int tmp = 0; zz = 1; tmp = heap[ zz ]; while( true ) { yy = zz << 1; if( yy > nHeap ) { break; } if( yy < nHeap && weights[ heap[ yy + 1 ] ] < weights[ heap[ yy ] ] ) { yy++; } if( weights[ tmp ] < weights[ heap[ yy ] ] ) { break; } heap[ zz ] = heap[ yy ]; zz = yy; } heap[ zz ] = tmp; } n2 = heap[ 1 ]; heap[ 1 ] = heap[ nHeap ]; nHeap--; { int zz = 0; int yy = 0; int tmp = 0; zz = 1; tmp = heap[ zz ]; while( true ) { yy = zz << 1; if( yy > nHeap ) { break; } if( yy < nHeap && weights[ heap[ yy + 1 ] ] < weights[ heap[ yy ] ] ) { yy++; } if( weights[ tmp ] < weights[ heap[ yy ] ] ) { break; } heap[ zz ] = heap[ yy ]; zz = yy; } heap[ zz ] = tmp; } nNodes++; parent[ n1 ] = nNodes; parent[ n2 ] = nNodes; final int v1 = weights[ n1 ]; final int v2 = weights[ n2 ]; final int weight = calculateWeight( v1, v2 ); weights[ nNodes ] = weight; parent[ nNodes ] = -1; nHeap++; heap[ nHeap ] = nNodes; { int zz = 0; int tmp = 0; zz = nHeap; tmp = heap[ zz ]; while( weights[ tmp ] < weights[ heap[ zz >> 1 ] ] ) { heap[ zz ] = heap[ zz >> 1 ]; zz >>= 1; } heap[ zz ] = tmp; } } if( !( nNodes < ( MAX_ALPHA_SIZE * 2 ) ) ) { panic(); } tooLong = false; for( i = 1; i <= alphaSize; i++ ) { j = 0; k = i; while( parent[ k ] >= 0 ) { k = parent[ k ]; j++; } len[ i - 1 ] = (char)j; if( j > maxLen ) { tooLong = true; } } if( !tooLong ) { break; } for( i = 1; i < alphaSize; i++ ) { j = weights[ i ] >> 8; j = 1 + ( j / 2 ); weights[ i ] = j << 8; } } } private static int calculateWeight( final int v1, final int v2 ) { final int upper = ( v1 & UPPER_BYTE_MASK ) + ( v2 & UPPER_BYTE_MASK ); final int v1Lower = ( v1 & LOWER_BYTE_MASK ); final int v2Lower = ( v2 & LOWER_BYTE_MASK ); final int nnnn = ( v1Lower > v2Lower ) ? v1Lower : v2Lower; return upper | ( 1 + nnnn ); } private static void panic() { System.out.println( "panic" ); //throw new CError(); } public void close() throws IOException { if( m_closed ) { return; } if( m_runLength > 0 ) { writeRun(); } m_currentChar = -1; endBlock(); endCompression(); m_closed = true; super.close(); m_bsStream.close(); } public void finalize() throws Throwable { close(); } public void flush() throws IOException { super.flush(); m_bsStream.flush(); } /** * modified by Oliver Merkel, 010128 * * @param bv Description of Parameter * @exception java.io.IOException Description of Exception */ public void write( int bv ) throws IOException { int b = ( 256 + bv ) % 256; if( m_currentChar != -1 ) { if( m_currentChar == b ) { m_runLength++; if( m_runLength > 254 ) { writeRun(); m_currentChar = -1; m_runLength = 0; } } else { writeRun(); m_runLength = 1; m_currentChar = b; } } else { m_currentChar = b; m_runLength++; } } private void allocateCompressStructures() { int n = BASE_BLOCK_SIZE * m_blockSize100k; m_block = new char[ ( n + 1 + NUM_OVERSHOOT_BYTES ) ]; m_quadrant = new int[ ( n + NUM_OVERSHOOT_BYTES ) ]; m_zptr = new int[ n ]; m_ftab = new int[ 65537 ]; if( m_block == null || m_quadrant == null || m_zptr == null || m_ftab == null ) { //int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537; //compressOutOfMemory ( totalDraw, n ); } /* * The back end needs a place to store the MTF values * whilst it calculates the coding tables. We could * put them in the zptr array. However, these values * will fit in a short, so we overlay szptr at the * start of zptr, in the hope of reducing the number * of cache misses induced by the multiple traversals * of the MTF values when calculating coding tables. * Seems to improve compression speed by about 1%. */ // szptr = zptr; m_szptr = new short[ 2 * n ]; } private void bsFinishedWithStream() throws IOException { while( m_bsLive > 0 ) { int ch = ( m_bsBuff >> 24 ); try { m_bsStream.write( ch );// write 8-bit } catch( IOException e ) { throw e; } m_bsBuff <<= 8; m_bsLive -= 8; } } private void bsPutIntVS( int numBits, int c ) throws IOException { bsW( numBits, c ); } private void bsPutUChar( int c ) throws IOException { bsW( 8, c ); } private void bsPutint( int u ) throws IOException { bsW( 8, ( u >> 24 ) & 0xff ); bsW( 8, ( u >> 16 ) & 0xff ); bsW( 8, ( u >> 8 ) & 0xff ); bsW( 8, u & 0xff ); } private void bsSetStream( OutputStream f ) { m_bsStream = f; m_bsLive = 0; m_bsBuff = 0; } private void bsW( int n, int v ) throws IOException { while( m_bsLive >= 8 ) { int ch = ( m_bsBuff >> 24 ); try { m_bsStream.write( ch );// write 8-bit } catch( IOException e ) { throw e; } m_bsBuff <<= 8; m_bsLive -= 8; } m_bsBuff |= ( v << ( 32 - m_bsLive - n ) ); m_bsLive += n; } private void doReversibleTransformation() { int i; m_workLimit = m_workFactor * m_last; m_workDone = 0; m_blockRandomised = false; m_firstAttempt = true; mainSort(); if( m_workDone > m_workLimit && m_firstAttempt ) { randomiseBlock(); m_workLimit = 0; m_workDone = 0; m_blockRandomised = true; m_firstAttempt = false; mainSort(); } m_origPtr = -1; for( i = 0; i <= m_last; i++ ) { if( m_zptr[ i ] == 0 ) { m_origPtr = i; break; } } ; if( m_origPtr == -1 ) { panic(); } } private void endBlock() throws IOException { m_blockCRC = m_crc.getFinalCRC(); m_combinedCRC = ( m_combinedCRC << 1 ) | ( m_combinedCRC >>> 31 ); m_combinedCRC ^= m_blockCRC; /* * sort the block and establish posn of original string */ doReversibleTransformation(); /* * A 6-byte block header, the value chosen arbitrarily * as 0x314159265359 :-). A 32 bit value does not really * give a strong enough guarantee that the value will not * appear by chance in the compressed datastream. Worst-case * probability of this event, for a 900k block, is about * 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. * For a compressed file of size 100Gb -- about 100000 blocks -- * only a 48-bit marker will do. NB: normal compression/ * decompression do *not* rely on these statistical properties. * They are only important when trying to recover blocks from * damaged files. */ bsPutUChar( 0x31 ); bsPutUChar( 0x41 ); bsPutUChar( 0x59 ); bsPutUChar( 0x26 ); bsPutUChar( 0x53 ); bsPutUChar( 0x59 ); /* * Now the block's CRC, so it is in a known place. */ bsPutint( m_blockCRC ); /* * Now a single bit indicating randomisation. */ if( m_blockRandomised ) { bsW( 1, 1 ); } else { bsW( 1, 0 ); } /* * Finally, block's contents proper. */ moveToFrontCodeAndSend(); } private void endCompression() throws IOException { /* * Now another magic 48-bit number, 0x177245385090, to * indicate the end of the last block. (sqrt(pi), if * you want to know. I did want to use e, but it contains * too much repetition -- 27 18 28 18 28 46 -- for me * to feel statistically comfortable. Call me paranoid.) */ bsPutUChar( 0x17 ); bsPutUChar( 0x72 ); bsPutUChar( 0x45 ); bsPutUChar( 0x38 ); bsPutUChar( 0x50 ); bsPutUChar( 0x90 ); bsPutint( m_combinedCRC ); bsFinishedWithStream(); } private boolean fullGtU( int i1, int i2 ) { int k; char c1; char c2; int s1; int s2; c1 = m_block[ i1 + 1 ]; c2 = m_block[ i2 + 1 ]; if( c1 != c2 ) { return ( c1 > c2 ); } i1++; i2++; c1 = m_block[ i1 + 1 ]; c2 = m_block[ i2 + 1 ]; if( c1 != c2 ) { return ( c1 > c2 ); } i1++; i2++; c1 = m_block[ i1 + 1 ]; c2 = m_block[ i2 + 1 ]; if( c1 != c2 ) { return ( c1 > c2 ); } i1++; i2++; c1 = m_block[ i1 + 1 ]; c2 = m_block[ i2 + 1 ]; if( c1 != c2 ) { return ( c1 > c2 ); } i1++; i2++; c1 = m_block[ i1 + 1 ]; c2 = m_block[ i2 + 1 ]; if( c1 != c2 ) { return ( c1 > c2 ); } i1++; i2++; c1 = m_block[ i1 + 1 ]; c2 = m_block[ i2 + 1 ]; if( c1 != c2 ) { return ( c1 > c2 ); } i1++; i2++; k = m_last + 1; do { c1 = m_block[ i1 + 1 ]; c2 = m_block[ i2 + 1 ]; if( c1 != c2 ) { return ( c1 > c2 ); } s1 = m_quadrant[ i1 ]; s2 = m_quadrant[ i2 ]; if( s1 != s2 ) { return ( s1 > s2 ); } i1++; i2++; c1 = m_block[ i1 + 1 ]; c2 = m_block[ i2 + 1 ]; if( c1 != c2 ) { return ( c1 > c2 ); } s1 = m_quadrant[ i1 ]; s2 = m_quadrant[ i2 ]; if( s1 != s2 ) { return ( s1 > s2 ); } i1++; i2++; c1 = m_block[ i1 + 1 ]; c2 = m_block[ i2 + 1 ]; if( c1 != c2 ) { return ( c1 > c2 ); } s1 = m_quadrant[ i1 ]; s2 = m_quadrant[ i2 ]; if( s1 != s2 ) { return ( s1 > s2 ); } i1++; i2++; c1 = m_block[ i1 + 1 ]; c2 = m_block[ i2 + 1 ]; if( c1 != c2 ) { return ( c1 > c2 ); } s1 = m_quadrant[ i1 ]; s2 = m_quadrant[ i2 ]; if( s1 != s2 ) { return ( s1 > s2 ); } i1++; i2++; if( i1 > m_last ) { i1 -= m_last; i1--; } ; if( i2 > m_last ) { i2 -= m_last; i2--; } ; k -= 4; m_workDone++; } while( k >= 0 ); return false; } private void generateMTFValues() { char[] yy = new char[ 256 ]; int i; int j; char tmp; char tmp2; int zPend; int wr; int EOB; makeMaps(); EOB = m_nInUse + 1; for( i = 0; i <= EOB; i++ ) { m_mtfFreq[ i ] = 0; } wr = 0; zPend = 0; for( i = 0; i < m_nInUse; i++ ) { yy[ i ] = (char)i; } for( i = 0; i <= m_last; i++ ) { char ll_i; ll_i = m_unseqToSeq[ m_block[ m_zptr[ i ] ] ]; j = 0; tmp = yy[ j ]; while( ll_i != tmp ) { j++; tmp2 = tmp; tmp = yy[ j ]; yy[ j ] = tmp2; } ; yy[ 0 ] = tmp; if( j == 0 ) { zPend++; } else { if( zPend > 0 ) { zPend--; while( true ) { switch( zPend % 2 ) { case 0: m_szptr[ wr ] = (short)RUNA; wr++; m_mtfFreq[ RUNA ]++; break; case 1: m_szptr[ wr ] = (short)RUNB; wr++; m_mtfFreq[ RUNB ]++; break; } ; if( zPend < 2 ) { break; } zPend = ( zPend - 2 ) / 2; } ; zPend = 0; } m_szptr[ wr ] = (short)( j + 1 ); wr++; m_mtfFreq[ j + 1 ]++; } } if( zPend > 0 ) { zPend--; while( true ) { switch( zPend % 2 ) { case 0: m_szptr[ wr ] = (short)RUNA; wr++; m_mtfFreq[ RUNA ]++; break; case 1: m_szptr[ wr ] = (short)RUNB; wr++; m_mtfFreq[ RUNB ]++; break; } if( zPend < 2 ) { break; } zPend = ( zPend - 2 ) / 2; } } m_szptr[ wr ] = (short)EOB; wr++; m_mtfFreq[ EOB ]++; m_nMTF = wr; } private void hbAssignCodes( int[] code, char[] length, int minLen, int maxLen, int alphaSize ) { int n; int vec; int i; vec = 0; for( n = minLen; n <= maxLen; n++ ) { for( i = 0; i < alphaSize; i++ ) { if( length[ i ] == n ) { code[ i ] = vec; vec++; } } ; vec <<= 1; } } private void initBlock() { // blockNo++; m_crc.initialiseCRC(); m_last = -1; // ch = 0; for( int i = 0; i < 256; i++ ) { m_inUse[ i ] = false; } /* * 20 is just a paranoia constant */ m_allowableBlockSize = BASE_BLOCK_SIZE * m_blockSize100k - 20; } private void initialize() throws IOException { /* * Write `magic' bytes h indicating file-format == huffmanised, * followed by a digit indicating blockSize100k. */ bsPutUChar( 'h' ); bsPutUChar( '0' + m_blockSize100k ); m_combinedCRC = 0; } private void mainSort() { int i; int j; int ss; int sb; int[] runningOrder = new int[ 256 ]; int[] copy = new int[ 256 ]; boolean[] bigDone = new boolean[ 256 ]; int c1; int c2; /* * In the various block-sized structures, live data runs * from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, * set up the overshoot area for block. */ // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" ); for( i = 0; i < NUM_OVERSHOOT_BYTES; i++ ) { m_block[ m_last + i + 2 ] = m_block[ ( i % ( m_last + 1 ) ) + 1 ]; } for( i = 0; i <= m_last + NUM_OVERSHOOT_BYTES; i++ ) { m_quadrant[ i ] = 0; } m_block[ 0 ] = m_block[ m_last + 1 ]; if( m_last < 4000 ) { /* * Use simpleSort(), since the full sorting mechanism * has quite a large constant overhead. */ for( i = 0; i <= m_last; i++ ) { m_zptr[ i ] = i; } m_firstAttempt = false; m_workDone = 0; m_workLimit = 0; simpleSort( 0, m_last, 0 ); } else { for( i = 0; i <= 255; i++ ) { bigDone[ i ] = false; } for( i = 0; i <= 65536; i++ ) { m_ftab[ i ] = 0; } c1 = m_block[ 0 ]; for( i = 0; i <= m_last; i++ ) { c2 = m_block[ i + 1 ]; m_ftab[ ( c1 << 8 ) + c2 ]++; c1 = c2; } for( i = 1; i <= 65536; i++ ) { m_ftab[ i ] += m_ftab[ i - 1 ]; } c1 = m_block[ 1 ]; for( i = 0; i < m_last; i++ ) { c2 = m_block[ i + 2 ]; j = ( c1 << 8 ) + c2; c1 = c2; m_ftab[ j ]--; m_zptr[ m_ftab[ j ] ] = i; } j = ( ( m_block[ m_last + 1 ] ) << 8 ) + ( m_block[ 1 ] ); m_ftab[ j ]--; m_zptr[ m_ftab[ j ] ] = m_last; /* * Now ftab contains the first loc of every small bucket. * Calculate the running order, from smallest to largest * big bucket. */ for( i = 0; i <= 255; i++ ) { runningOrder[ i ] = i; } { int vv; int h = 1; do { h = 3 * h + 1; } while( h <= 256 ); do { h = h / 3; for( i = h; i <= 255; i++ ) { vv = runningOrder[ i ]; j = i; while( ( m_ftab[ ( ( runningOrder[ j - h ] ) + 1 ) << 8 ] - m_ftab[ ( runningOrder[ j - h ] ) << 8 ] ) > ( m_ftab[ ( ( vv ) + 1 ) << 8 ] - m_ftab[ ( vv ) << 8 ] ) ) { runningOrder[ j ] = runningOrder[ j - h ]; j = j - h; if( j <= ( h - 1 ) ) { break; } } runningOrder[ j ] = vv; } } while( h != 1 ); } /* * The main sorting loop. */ for( i = 0; i <= 255; i++ ) { /* * Process big buckets, starting with the least full. */ ss = runningOrder[ i ]; /* * Complete the big bucket [ss] by quicksorting * any unsorted small buckets [ss, j]. Hopefully * previous pointer-scanning phases have already * completed many of the small buckets [ss, j], so * we don't have to sort them at all. */ for( j = 0; j <= 255; j++ ) { sb = ( ss << 8 ) + j; if( !( ( m_ftab[ sb ] & SETMASK ) == SETMASK ) ) { int lo = m_ftab[ sb ] & CLEARMASK; int hi = ( m_ftab[ sb + 1 ] & CLEARMASK ) - 1; if( hi > lo ) { qSort3( lo, hi, 2 ); if( m_workDone > m_workLimit && m_firstAttempt ) { return; } } m_ftab[ sb ] |= SETMASK; } } /* * The ss big bucket is now done. Record this fact, * and update the quadrant descriptors. Remember to * update quadrants in the overshoot area too, if * necessary. The "if (i < 255)" test merely skips * this updating for the last bucket processed, since * updating for the last bucket is pointless. */ bigDone[ ss ] = true; if( i < 255 ) { int bbStart = m_ftab[ ss << 8 ] & CLEARMASK; int bbSize = ( m_ftab[ ( ss + 1 ) << 8 ] & CLEARMASK ) - bbStart; int shifts = 0; while( ( bbSize >> shifts ) > 65534 ) { shifts++; } for( j = 0; j < bbSize; j++ ) { int a2update = m_zptr[ bbStart + j ]; int qVal = ( j >> shifts ); m_quadrant[ a2update ] = qVal; if( a2update < NUM_OVERSHOOT_BYTES ) { m_quadrant[ a2update + m_last + 1 ] = qVal; } } if( !( ( ( bbSize - 1 ) >> shifts ) <= 65535 ) ) { panic(); } } /* * Now scan this big bucket so as to synthesise the * sorted order for small buckets [t, ss] for all t != ss. */ for( j = 0; j <= 255; j++ ) { copy[ j ] = m_ftab[ ( j << 8 ) + ss ] & CLEARMASK; } for( j = m_ftab[ ss << 8 ] & CLEARMASK; j < ( m_ftab[ ( ss + 1 ) << 8 ] & CLEARMASK ); j++ ) { c1 = m_block[ m_zptr[ j ] ]; if( !bigDone[ c1 ] ) { m_zptr[ copy[ c1 ] ] = m_zptr[ j ] == 0 ? m_last : m_zptr[ j ] - 1; copy[ c1 ]++; } } for( j = 0; j <= 255; j++ ) { m_ftab[ ( j << 8 ) + ss ] |= SETMASK; } } } } private void makeMaps() { int i; m_nInUse = 0; for( i = 0; i < 256; i++ ) { if( m_inUse[ i ] ) { m_seqToUnseq[ m_nInUse ] = (char)i; m_unseqToSeq[ i ] = (char)m_nInUse; m_nInUse++; } } } private char med3( char a, char b, char c ) { char t; if( a > b ) { t = a; a = b; b = t; } if( b > c ) { t = b; b = c; c = t; } if( a > b ) { b = a; } return b; } private void moveToFrontCodeAndSend() throws IOException { bsPutIntVS( 24, m_origPtr ); generateMTFValues(); sendMTFValues(); } private void qSort3( int loSt, int hiSt, int dSt ) { int unLo; int unHi; int ltLo; int gtHi; int med; int n; int m; int sp; int lo; int hi; int d; StackElem[] stack = new StackElem[ QSORT_STACK_SIZE ]; for( int count = 0; count < QSORT_STACK_SIZE; count++ ) { stack[ count ] = new StackElem(); } sp = 0; stack[ sp ].m_ll = loSt; stack[ sp ].m_hh = hiSt; stack[ sp ].m_dd = dSt; sp++; while( sp > 0 ) { if( sp >= QSORT_STACK_SIZE ) { panic(); } sp--; lo = stack[ sp ].m_ll; hi = stack[ sp ].m_hh; d = stack[ sp ].m_dd; if( hi - lo < SMALL_THRESH || d > DEPTH_THRESH ) { simpleSort( lo, hi, d ); if( m_workDone > m_workLimit && m_firstAttempt ) { return; } continue; } med = med3( m_block[ m_zptr[ lo ] + d + 1 ], m_block[ m_zptr[ hi ] + d + 1 ], m_block[ m_zptr[ ( lo + hi ) >> 1 ] + d + 1 ] ); unLo = lo; ltLo = lo; unHi = hi; gtHi = hi; while( true ) { while( true ) { if( unLo > unHi ) { break; } n = m_block[ m_zptr[ unLo ] + d + 1 ] - med; if( n == 0 ) { int temp = 0; temp = m_zptr[ unLo ]; m_zptr[ unLo ] = m_zptr[ ltLo ]; m_zptr[ ltLo ] = temp; ltLo++; unLo++; continue; } ; if( n > 0 ) { break; } unLo++; } while( true ) { if( unLo > unHi ) { break; } n = m_block[ m_zptr[ unHi ] + d + 1 ] - med; if( n == 0 ) { int temp = 0; temp = m_zptr[ unHi ]; m_zptr[ unHi ] = m_zptr[ gtHi ]; m_zptr[ gtHi ] = temp; gtHi--; unHi--; continue; } ; if( n < 0 ) { break; } unHi--; } if( unLo > unHi ) { break; } int temp = 0; temp = m_zptr[ unLo ]; m_zptr[ unLo ] = m_zptr[ unHi ]; m_zptr[ unHi ] = temp; unLo++; unHi--; } if( gtHi < ltLo ) { stack[ sp ].m_ll = lo; stack[ sp ].m_hh = hi; stack[ sp ].m_dd = d + 1; sp++; continue; } n = ( ( ltLo - lo ) < ( unLo - ltLo ) ) ? ( ltLo - lo ) : ( unLo - ltLo ); vswap( lo, unLo - n, n ); m = ( ( hi - gtHi ) < ( gtHi - unHi ) ) ? ( hi - gtHi ) : ( gtHi - unHi ); vswap( unLo, hi - m + 1, m ); n = lo + unLo - ltLo - 1; m = hi - ( gtHi - unHi ) + 1; stack[ sp ].m_ll = lo; stack[ sp ].m_hh = n; stack[ sp ].m_dd = d; sp++; stack[ sp ].m_ll = n + 1; stack[ sp ].m_hh = m - 1; stack[ sp ].m_dd = d + 1; sp++; stack[ sp ].m_ll = m; stack[ sp ].m_hh = hi; stack[ sp ].m_dd = d; sp++; } } private void randomiseBlock() { int i; int rNToGo = 0; int rTPos = 0; for( i = 0; i < 256; i++ ) { m_inUse[ i ] = false; } for( i = 0; i <= m_last; i++ ) { if( rNToGo == 0 ) { rNToGo = (char)RAND_NUMS[ rTPos ]; rTPos++; if( rTPos == 512 ) { rTPos = 0; } } rNToGo--; m_block[ i + 1 ] ^= ( ( rNToGo == 1 ) ? 1 : 0 ); // handle 16 bit signed numbers m_block[ i + 1 ] &= 0xFF; m_inUse[ m_block[ i + 1 ] ] = true; } } private void sendMTFValues() throws IOException { char[][] len = new char[ N_GROUPS ][ MAX_ALPHA_SIZE ]; int v; int t; int i; int j; int gs; int ge; int bt; int bc; int iter; int nSelectors = 0; int alphaSize; int minLen; int maxLen; int selCtr; int nGroups; alphaSize = m_nInUse + 2; for( t = 0; t < N_GROUPS; t++ ) { for( v = 0; v < alphaSize; v++ ) { len[ t ][ v ] = (char)GREATER_ICOST; } } /* * Decide how many coding tables to use */ if( m_nMTF <= 0 ) { panic(); } if( m_nMTF < 200 ) { nGroups = 2; } else if( m_nMTF < 600 ) { nGroups = 3; } else if( m_nMTF < 1200 ) { nGroups = 4; } else if( m_nMTF < 2400 ) { nGroups = 5; } else { nGroups = 6; } { /* * Generate an initial set of coding tables */ int nPart; int remF; int tFreq; int aFreq; nPart = nGroups; remF = m_nMTF; gs = 0; while( nPart > 0 ) { tFreq = remF / nPart; ge = gs - 1; aFreq = 0; while( aFreq < tFreq && ge < alphaSize - 1 ) { ge++; aFreq += m_mtfFreq[ ge ]; } if( ge > gs && nPart != nGroups && nPart != 1 && ( ( nGroups - nPart ) % 2 == 1 ) ) { aFreq -= m_mtfFreq[ ge ]; ge--; } for( v = 0; v < alphaSize; v++ ) { if( v >= gs && v <= ge ) { len[ nPart - 1 ][ v ] = (char)LESSER_ICOST; } else { len[ nPart - 1 ][ v ] = (char)GREATER_ICOST; } } nPart--; gs = ge + 1; remF -= aFreq; } } int[][] rfreq = new int[ N_GROUPS ][ MAX_ALPHA_SIZE ]; int[] fave = new int[ N_GROUPS ]; short[] cost = new short[ N_GROUPS ]; /* * Iterate up to N_ITERS times to improve the tables. */ for( iter = 0; iter < N_ITERS; iter++ ) { for( t = 0; t < nGroups; t++ ) { fave[ t ] = 0; } for( t = 0; t < nGroups; t++ ) { for( v = 0; v < alphaSize; v++ ) { rfreq[ t ][ v ] = 0; } } nSelectors = 0; gs = 0; while( true ) { /* * Set group start & end marks. */ if( gs >= m_nMTF ) { break; } ge = gs + G_SIZE - 1; if( ge >= m_nMTF ) { ge = m_nMTF - 1; } /* * Calculate the cost of this group as coded * by each of the coding tables. */ for( t = 0; t < nGroups; t++ ) { cost[ t ] = 0; } if( nGroups == 6 ) { short cost0 = 0; short cost1 = 0; short cost2 = 0; short cost3 = 0; short cost4 = 0; short cost5 = 0; for( i = gs; i <= ge; i++ ) { short icv = m_szptr[ i ]; cost0 += len[ 0 ][ icv ]; cost1 += len[ 1 ][ icv ]; cost2 += len[ 2 ][ icv ]; cost3 += len[ 3 ][ icv ]; cost4 += len[ 4 ][ icv ]; cost5 += len[ 5 ][ icv ]; } cost[ 0 ] = cost0; cost[ 1 ] = cost1; cost[ 2 ] = cost2; cost[ 3 ] = cost3; cost[ 4 ] = cost4; cost[ 5 ] = cost5; } else { for( i = gs; i <= ge; i++ ) { short icv = m_szptr[ i ]; for( t = 0; t < nGroups; t++ ) { cost[ t ] += len[ t ][ icv ]; } } } /* * Find the coding table which is best for this group, * and record its identity in the selector table. */ bc = 999999999; bt = -1; for( t = 0; t < nGroups; t++ ) { if( cost[ t ] < bc ) { bc = cost[ t ]; bt = t; } } ; fave[ bt ]++; m_selector[ nSelectors ] = (char)bt; nSelectors++; /* * Increment the symbol frequencies for the selected table. */ for( i = gs; i <= ge; i++ ) { rfreq[ bt ][ m_szptr[ i ] ]++; } gs = ge + 1; } /* * Recompute the tables based on the accumulated frequencies. */ for( t = 0; t < nGroups; t++ ) { hbMakeCodeLengths( len[ t ], rfreq[ t ], alphaSize, 20 ); } } rfreq = null; fave = null; cost = null; if( !( nGroups < 8 ) ) { panic(); } if( !( nSelectors < 32768 && nSelectors <= ( 2 + ( 900000 / G_SIZE ) ) ) ) { panic(); } { /* * Compute MTF values for the selectors. */ char[] pos = new char[ N_GROUPS ]; char ll_i; char tmp2; char tmp; for( i = 0; i < nGroups; i++ ) { pos[ i ] = (char)i; } for( i = 0; i < nSelectors; i++ ) { ll_i = m_selector[ i ]; j = 0; tmp = pos[ j ]; while( ll_i != tmp ) { j++; tmp2 = tmp; tmp = pos[ j ]; pos[ j ] = tmp2; } pos[ 0 ] = tmp; m_selectorMtf[ i ] = (char)j; } } int[][] code = new int[ N_GROUPS ][ MAX_ALPHA_SIZE ]; /* * Assign actual codes for the tables. */ for( t = 0; t < nGroups; t++ ) { minLen = 32; maxLen = 0; for( i = 0; i < alphaSize; i++ ) { if( len[ t ][ i ] > maxLen ) { maxLen = len[ t ][ i ]; } if( len[ t ][ i ] < minLen ) { minLen = len[ t ][ i ]; } } if( maxLen > 20 ) { panic(); } if( minLen < 1 ) { panic(); } hbAssignCodes( code[ t ], len[ t ], minLen, maxLen, alphaSize ); } { /* * Transmit the mapping table. */ boolean[] inUse16 = new boolean[ 16 ]; for( i = 0; i < 16; i++ ) { inUse16[ i ] = false; for( j = 0; j < 16; j++ ) { if( m_inUse[ i * 16 + j ] ) { inUse16[ i ] = true; } } } for( i = 0; i < 16; i++ ) { if( inUse16[ i ] ) { bsW( 1, 1 ); } else { bsW( 1, 0 ); } } for( i = 0; i < 16; i++ ) { if( inUse16[ i ] ) { for( j = 0; j < 16; j++ ) { if( m_inUse[ i * 16 + j ] ) { bsW( 1, 1 ); } else { bsW( 1, 0 ); } } } } } /* * Now the selectors. */ bsW( 3, nGroups ); bsW( 15, nSelectors ); for( i = 0; i < nSelectors; i++ ) { for( j = 0; j < m_selectorMtf[ i ]; j++ ) { bsW( 1, 1 ); } bsW( 1, 0 ); } for( t = 0; t < nGroups; t++ ) { int curr = len[ t ][ 0 ]; bsW( 5, curr ); for( i = 0; i < alphaSize; i++ ) { while( curr < len[ t ][ i ] ) { bsW( 2, 2 ); curr++; /* * 10 */ } while( curr > len[ t ][ i ] ) { bsW( 2, 3 ); curr--; /* * 11 */ } bsW( 1, 0 ); } } /* * And finally, the block data proper */ selCtr = 0; gs = 0; while( true ) { if( gs >= m_nMTF ) { break; } ge = gs + G_SIZE - 1; if( ge >= m_nMTF ) { ge = m_nMTF - 1; } for( i = gs; i <= ge; i++ ) { bsW( len[ m_selector[ selCtr ] ][ m_szptr[ i ] ], code[ m_selector[ selCtr ] ][ m_szptr[ i ] ] ); } gs = ge + 1; selCtr++; } if( !( selCtr == nSelectors ) ) { panic(); } } private void simpleSort( int lo, int hi, int d ) { int i; int j; int h; int bigN; int hp; int v; bigN = hi - lo + 1; if( bigN < 2 ) { return; } hp = 0; while( m_incs[ hp ] < bigN ) { hp++; } hp--; for( ; hp >= 0; hp-- ) { h = m_incs[ hp ]; i = lo + h; while( true ) { /* * copy 1 */ if( i > hi ) { break; } v = m_zptr[ i ]; j = i; while( fullGtU( m_zptr[ j - h ] + d, v + d ) ) { m_zptr[ j ] = m_zptr[ j - h ]; j = j - h; if( j <= ( lo + h - 1 ) ) { break; } } m_zptr[ j ] = v; i++; /* * copy 2 */ if( i > hi ) { break; } v = m_zptr[ i ]; j = i; while( fullGtU( m_zptr[ j - h ] + d, v + d ) ) { m_zptr[ j ] = m_zptr[ j - h ]; j = j - h; if( j <= ( lo + h - 1 ) ) { break; } } m_zptr[ j ] = v; i++; /* * copy 3 */ if( i > hi ) { break; } v = m_zptr[ i ]; j = i; while( fullGtU( m_zptr[ j - h ] + d, v + d ) ) { m_zptr[ j ] = m_zptr[ j - h ]; j = j - h; if( j <= ( lo + h - 1 ) ) { break; } } m_zptr[ j ] = v; i++; if( m_workDone > m_workLimit && m_firstAttempt ) { return; } } } } private void vswap( int p1, int p2, int n ) { int temp = 0; while( n > 0 ) { temp = m_zptr[ p1 ]; m_zptr[ p1 ] = m_zptr[ p2 ]; m_zptr[ p2 ] = temp; p1++; p2++; n--; } } private void writeRun() throws IOException { if( m_last < m_allowableBlockSize ) { m_inUse[ m_currentChar ] = true; for( int i = 0; i < m_runLength; i++ ) { m_crc.updateCRC( (char)m_currentChar ); } switch( m_runLength ) { case 1: m_last++; m_block[ m_last + 1 ] = (char)m_currentChar; break; case 2: m_last++; m_block[ m_last + 1 ] = (char)m_currentChar; m_last++; m_block[ m_last + 1 ] = (char)m_currentChar; break; case 3: m_last++; m_block[ m_last + 1 ] = (char)m_currentChar; m_last++; m_block[ m_last + 1 ] = (char)m_currentChar; m_last++; m_block[ m_last + 1 ] = (char)m_currentChar; break; default: m_inUse[ m_runLength - 4 ] = true; m_last++; m_block[ m_last + 1 ] = (char)m_currentChar; m_last++; m_block[ m_last + 1 ] = (char)m_currentChar; m_last++; m_block[ m_last + 1 ] = (char)m_currentChar; m_last++; m_block[ m_last + 1 ] = (char)m_currentChar; m_last++; m_block[ m_last + 1 ] = (char)( m_runLength - 4 ); break; } } else { endBlock(); initBlock(); writeRun(); } } private static class StackElem { int m_dd; int m_hh; int m_ll; } } 1.1 jakarta-commons-sandbox/io/src/java/org/apache/commons/io/compress/bzip2/CRC.java Index: CRC.java =================================================================== /* * Copyright (C) The Apache Software Foundation. All rights reserved. * * This software is published under the terms of the Apache Software License * version 1.1, a copy of which has been included with this distribution in * the LICENSE.txt file. */ package org.apache.commons.io.compress.bzip2; /** * A simple class the hold and calculate the CRC for sanity checking of the * data. * * @author Keiron Liddle */ class CRC { private static int[] CRC32_TABLE = new int[] { 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692, 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 }; private int m_globalCrc; protected CRC() { initialiseCRC(); } int getFinalCRC() { return ~m_globalCrc; } void initialiseCRC() { m_globalCrc = 0xffffffff; } void updateCRC( final int inCh ) { int temp = ( m_globalCrc >> 24 ) ^ inCh; if( temp < 0 ) { temp = 256 + temp; } m_globalCrc = ( m_globalCrc << 8 ) ^ CRC32_TABLE[ temp ]; } } 1.1 jakarta-commons-sandbox/io/src/java/org/apache/commons/io/compress/bzip2/package.html Index: package.html ===================================================================

Streams that compress and decompress the BZip2 format (without the file header chars). Originally derived from code in the ant project.

-- To unsubscribe, e-mail: For additional commands, e-mail: