commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bode...@apache.org
Subject commons-compress git commit: allow LZ77 algorithm to be tuned
Date Sun, 26 Mar 2017 16:00:41 GMT
Repository: commons-compress
Updated Branches:
  refs/heads/master 5170392a5 -> 043f42b65


allow LZ77 algorithm to be tuned

niceLen and maxCandidates are used in the same way as zlib's deflate
algorithm. The configured values roughly correspond to zlib's
compression levels 5 (tunedForSpeed), 7 (default) and
9 (tunedForCompressionRatio).


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/043f42b6
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/043f42b6
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/043f42b6

Branch: refs/heads/master
Commit: 043f42b65d9c67fd064d2ba638b246389ad3a26d
Parents: 5170392
Author: Stefan Bodewig <bodewig@apache.org>
Authored: Sun Mar 26 17:57:14 2017 +0200
Committer: Stefan Bodewig <bodewig@apache.org>
Committed: Sun Mar 26 17:57:14 2017 +0200

----------------------------------------------------------------------
 .../compressors/lz77support/LZ77Compressor.java |  6 +-
 .../compressors/lz77support/Parameters.java     | 75 +++++++++++++++++++-
 .../lz4/BlockLZ4CompressorRoundtripTest.java    | 29 +++++++-
 .../lz77support/LZ77CompressorTest.java         |  2 +-
 .../compressors/snappy/SnappyRoundtripTest.java | 20 ++++++
 5 files changed, 125 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/043f42b6/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
index 487dee2..c284f4c 100644
--- a/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
+++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/LZ77Compressor.java
@@ -479,7 +479,9 @@ public class LZ77Compressor {
         int longestMatchLength = minLength - 1;
         final int maxPossibleLength = Math.min(params.getMaxBackReferenceLength(), lookahead);
         final int minIndex = Math.max(0, currentPosition - params.getMaxOffset());
-        while (matchHead >= minIndex) {
+        final int niceBackReferenceLength = Math.min(maxPossibleLength, params.getNiceBackReferenceLength());
+        final int maxCandidates = params.getMaxCandidates();
+        for (int candidates = 0; candidates < maxCandidates && matchHead >=
minIndex; candidates++) {
             int currentLength = 0;
             for (int i = 0; i < maxPossibleLength; i++) {
                 if (window[matchHead + i] != window[currentPosition + i]) {
@@ -490,7 +492,7 @@ public class LZ77Compressor {
             if (currentLength > longestMatchLength) {
                 longestMatchLength = currentLength;
                 matchStart = matchHead;
-                if (currentLength == maxPossibleLength) {
+                if (currentLength >= niceBackReferenceLength) {
                     // no need to search any further
                     break;
                 }

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/043f42b6/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java
b/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java
index 3842dd9..e4e9e47 100644
--- a/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java
+++ b/src/main/java/org/apache/commons/compress/compressors/lz77support/Parameters.java
@@ -52,6 +52,7 @@ public final class Parameters {
     public static class Builder {
         private final int windowSize;
         private int minBackReferenceLength, maxBackReferenceLength, maxOffset, maxLiteralLength;
+        private Integer niceBackReferenceLength, maxCandidates;
 
         private Builder(int windowSize) {
             if (windowSize < 2 || !isPowerOfTwo(windowSize)) {
@@ -150,24 +151,78 @@ public final class Parameters {
         }
 
         /**
+         * Sets the "nice length" of a back-reference.
+         *
+         * <p>When a back-references if this size has been found, stop searching for
longer back-references.</p>
+         *
+         * <p>This settings can be used to tune the tradeoff between compression speed
and compression ratio.</p>
+         */
+        public Builder withNiceBackReferenceLength(int niceLen) {
+            niceBackReferenceLength = niceLen;
+            return this;
+        }
+
+        /**
+         * Sets the maximum number of back-reference candidates that should be consulted.
+         *
+         * <p>This settings can be used to tune the tradeoff between compression speed
and compression ratio.</p>
+         */
+        public Builder withMaxNumberOfCandidates(int maxCandidates) {
+            this.maxCandidates = maxCandidates;
+            return this;
+        }
+
+        /**
+         * Changes the default setting for "nice back-reference length" and "maximum number
of candidates" for improved
+         * compression speed at the cost of compression ratio.
+         *
+         * <p>Use this method after configuring "maximum back-reference length".</p>
+         */
+        public Builder tunedForSpeed() {
+            niceBackReferenceLength = Math.max(minBackReferenceLength, maxBackReferenceLength
/ 8);
+            maxCandidates = Math.max(32, windowSize / 1024);
+            return this;
+        }
+
+        /**
+         * Changes the default setting for "nice back-reference length" and "maximum number
of candidates" for improved
+         * compression ratio at the cost of compression speed.
+         *
+         * <p>Use this method after configuring "maximum back-reference length".</p>
+         */
+        public Builder tunedForCompressionRatio() {
+            niceBackReferenceLength = maxBackReferenceLength;
+            maxCandidates = Math.max(32, windowSize / 16);
+            return this;
+        }
+
+        /**
          * Creates the {@link Parameters} instance.
          * @return the configured {@link Parameters} instance.
          */
         public Parameters build() {
+            // default settings tuned for a compromise of good compression and acceptable
speed
+            int niceLen = niceBackReferenceLength != null ? niceBackReferenceLength
+                : Math.max(minBackReferenceLength, maxBackReferenceLength / 2);
+            int candidates = maxCandidates != null ? maxCandidates : Math.max(256, windowSize
/ 128);
+
             return new Parameters(windowSize, minBackReferenceLength, maxBackReferenceLength,
-                maxOffset, maxLiteralLength);
+                maxOffset, maxLiteralLength, niceLen, candidates);
         }
     }
 
-    private final int windowSize, minBackReferenceLength, maxBackReferenceLength, maxOffset,
maxLiteralLength;
+    private final int windowSize, minBackReferenceLength, maxBackReferenceLength, maxOffset,
maxLiteralLength,
+        niceBackReferenceLength, maxCandidates;
 
     private Parameters(int windowSize, int minBackReferenceLength, int maxBackReferenceLength,
int maxOffset,
-        int maxLiteralLength) {
+        int maxLiteralLength, int niceBackReferenceLength, int maxCandidates) {
         this.windowSize = windowSize;
         this.minBackReferenceLength = minBackReferenceLength;
         this.maxBackReferenceLength = maxBackReferenceLength;
         this.maxOffset = maxOffset;
         this.maxLiteralLength = maxLiteralLength;
+        this.niceBackReferenceLength = niceBackReferenceLength;
+        this.maxCandidates = maxCandidates;
     }
 
     /**
@@ -207,6 +262,20 @@ public final class Parameters {
         return maxLiteralLength;
     }
 
+    /**
+     * Gets the length of a back-reference that is considered nice enough to stop searching
for longer ones.
+     */
+    public int getNiceBackReferenceLength() {
+        return niceBackReferenceLength;
+    }
+
+    /**
+     * Gets the maximum number of back-reference candidates to consider.
+     */
+    public int getMaxCandidates() {
+        return maxCandidates;
+    }
+
     private static final boolean isPowerOfTwo(int x) {
         // pre-condition: x > 0
         return (x & (x - 1)) == 0;

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/043f42b6/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java
b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java
index 780d4ce..da4941d 100644
--- a/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java
+++ b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorRoundtripTest.java
@@ -22,22 +22,48 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
 import org.apache.commons.compress.AbstractTestCase;
+import org.apache.commons.compress.compressors.lz77support.Parameters;
 import org.apache.commons.compress.utils.IOUtils;
 import org.junit.Assert;
 import org.junit.Test;
+import org.junit.runners.Parameterized;
+import org.junit.runner.RunWith;
 
+@RunWith(Parameterized.class)
 public final class BlockLZ4CompressorRoundtripTest extends AbstractTestCase {
 
+    @org.junit.runners.Parameterized.Parameters(name = "using {0}")
+    public static Collection<Object[]> factory() {
+        return Arrays.asList(new Object[][] {
+                new Object[] { "default", BlockLZ4CompressorOutputStream.createParameterBuilder().build()
},
+                new Object[] { "tuned for speed",
+                    BlockLZ4CompressorOutputStream.createParameterBuilder().tunedForSpeed().build()
},
+                new Object[] { "tuned for compression ratio",
+                    BlockLZ4CompressorOutputStream.createParameterBuilder().tunedForCompressionRatio().build()
}
+            });
+    }
+
+    private final String config;
+    private final Parameters params;
+
+    public BlockLZ4CompressorRoundtripTest(String config, Parameters params) {
+        this.config = config;
+        this.params = params;
+    }
+
     private void roundTripTest(String testFile) throws IOException {
         File input = getFile(testFile);
         long start = System.currentTimeMillis();
         final File outputSz = new File(dir, input.getName() + ".block.lz4");
         try (FileInputStream is = new FileInputStream(input);
              FileOutputStream os = new FileOutputStream(outputSz);
-             BlockLZ4CompressorOutputStream los = new BlockLZ4CompressorOutputStream(os))
{
+             BlockLZ4CompressorOutputStream los = new BlockLZ4CompressorOutputStream(os,
params)) {
             IOUtils.copy(is, los);
         }
+        System.err.println("Configuration: " + config);
         System.err.println(input.getName() + " written, uncompressed bytes: " + input.length()
             + ", compressed bytes: " + outputSz.length() + " after " + (System.currentTimeMillis()
- start) + "ms");
         start = System.currentTimeMillis();
@@ -62,6 +88,7 @@ public final class BlockLZ4CompressorRoundtripTest extends AbstractTestCase
{
         roundTripTest("lorem-ipsum.txt.gz");
     }
 
+    // yields no compression at all
     @Test
     public void biggerFileRoundtrip() throws IOException {
         roundTripTest("COMPRESS-256.7z");

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/043f42b6/src/test/java/org/apache/commons/compress/compressors/lz77support/LZ77CompressorTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/compressors/lz77support/LZ77CompressorTest.java
b/src/test/java/org/apache/commons/compress/compressors/lz77support/LZ77CompressorTest.java
index 9ac0c14..3163295 100644
--- a/src/test/java/org/apache/commons/compress/compressors/lz77support/LZ77CompressorTest.java
+++ b/src/test/java/org/apache/commons/compress/compressors/lz77support/LZ77CompressorTest.java
@@ -149,7 +149,6 @@ public class LZ77CompressorTest {
         List<LZ77Compressor.Block> blocks = compress(newParameters(8), BLA);
         assertSize(6, blocks);
         assertLiteralBlock("Blah b", blocks.get(0));
-        assertEquals(LZ77Compressor.BackReference.class, blocks.get(1).getClass());
         assertBackReference(5, 7, blocks.get(1));
         assertBackReference(5, 3, blocks.get(2));
         assertBackReference(5, 7, blocks.get(3));
@@ -347,6 +346,7 @@ public class LZ77CompressorTest {
             .withMaxBackReferenceLength(maxBackReferenceLength)
             .withMaxOffset(maxOffset)
             .withMaxLiteralLength(maxLiteralLength)
+            .tunedForCompressionRatio()
             .build();
     }
 }

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/043f42b6/src/test/java/org/apache/commons/compress/compressors/snappy/SnappyRoundtripTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/compressors/snappy/SnappyRoundtripTest.java
b/src/test/java/org/apache/commons/compress/compressors/snappy/SnappyRoundtripTest.java
index 427c8d0..8b96309 100644
--- a/src/test/java/org/apache/commons/compress/compressors/snappy/SnappyRoundtripTest.java
+++ b/src/test/java/org/apache/commons/compress/compressors/snappy/SnappyRoundtripTest.java
@@ -61,15 +61,35 @@ public final class SnappyRoundtripTest extends AbstractTestCase {
     // should yield decent compression
     @Test
     public void blaTarRoundtrip() throws IOException {
+        System.err.println("Configuration: default");
         roundTripTest("bla.tar");
     }
 
+    @Test
+    public void blaTarRoundtripTunedForSpeed() throws IOException {
+        System.err.println("Configuration: tuned for speed");
+        roundTripTest(getFile("bla.tar"),
+            SnappyCompressorOutputStream.createParameterBuilder(SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE)
+                .tunedForSpeed()
+                .build());
+    }
+
+    @Test
+    public void blaTarRoundtripTunedForCompressionRatio() throws IOException {
+        System.err.println("Configuration: tuned for compression ratio");
+        roundTripTest(getFile("bla.tar"),
+            SnappyCompressorOutputStream.createParameterBuilder(SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE)
+                .tunedForCompressionRatio()
+                .build());
+    }
+
     // yields no compression at all
     @Test
     public void gzippedLoremIpsumRoundtrip() throws IOException {
         roundTripTest("lorem-ipsum.txt.gz");
     }
 
+    // yields no compression at all
     @Test
     public void biggerFileRoundtrip() throws IOException {
         roundTripTest("COMPRESS-256.7z");


Mime
View raw message