commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bode...@apache.org
Subject [2/2] commons-compress git commit: COMPRESS-271 read-support for LZ4 block format
Date Mon, 16 Jan 2017 18:57:01 GMT
COMPRESS-271 read-support for LZ4 block format


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/56e82da9
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/56e82da9
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/56e82da9

Branch: refs/heads/master
Commit: 56e82da90f1064c23dd630cf0066231567da3ed6
Parents: 6871295
Author: Stefan Bodewig <bodewig@apache.org>
Authored: Mon Jan 16 19:56:02 2017 +0100
Committer: Stefan Bodewig <bodewig@apache.org>
Committed: Mon Jan 16 19:56:02 2017 +0100

----------------------------------------------------------------------
 .../compressors/CompressorStreamFactory.java    |  18 +-
 .../lz4/BlockLZ4CompressorInputStream.java      | 295 +++++++++++++++++++
 .../compress/compressors/lz4/package.html       |  37 +++
 src/site/xdoc/limitations.xml                   |   8 +
 .../lz4/BlockLZ4CompressorInputStreamTest.java  |  53 ++++
 5 files changed, 409 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/56e82da9/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java
b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java
index d28c9b8..85709af 100644
--- a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java
+++ b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java
@@ -37,6 +37,7 @@ import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStr
 import org.apache.commons.compress.compressors.deflate.DeflateCompressorOutputStream;
 import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
+import org.apache.commons.compress.compressors.lz4.BlockLZ4CompressorInputStream;
 import org.apache.commons.compress.compressors.lzma.LZMACompressorInputStream;
 import org.apache.commons.compress.compressors.lzma.LZMACompressorOutputStream;
 import org.apache.commons.compress.compressors.lzma.LZMAUtils;
@@ -159,6 +160,14 @@ public class CompressorStreamFactory implements CompressorStreamProvider
{
     public static final String DEFLATE = "deflate";
 
     /**
+     * Constant (value {@value}) used to identify the block LZ4
+     * compression method. Not supported as an output stream type.
+     *
+     * @since 1.14
+     */
+    public static final String LZ4_BLOCK = "lz4-block";
+
+    /**
      * Constructs a new sorted map from input stream provider names to provider
      * objects.
      *
@@ -420,8 +429,9 @@ public class CompressorStreamFactory implements CompressorStreamProvider
{
      * @param name
      *            of the compressor, i.e. {@value #GZIP}, {@value #BZIP2},
      *            {@value #XZ}, {@value #LZMA}, {@value #PACK200},
-     *            {@value #SNAPPY_RAW}, {@value #SNAPPY_FRAMED}, {@value #Z} or
-     *            {@value #DEFLATE}
+     *            {@value #SNAPPY_RAW}, {@value #SNAPPY_FRAMED}, {@value #Z},
+     *            {@value #LZ4_BLOCK}
+     *            or {@value #DEFLATE}
      * @param in
      *            the input stream
      * @return compressor input stream
@@ -480,6 +490,10 @@ public class CompressorStreamFactory implements CompressorStreamProvider
{
                 return new DeflateCompressorInputStream(in);
             }
 
+            if (LZ4_BLOCK.equalsIgnoreCase(name)) {
+                return new BlockLZ4CompressorInputStream(in);
+            }
+
         } catch (final IOException e) {
             throw new CompressorException("Could not create CompressorInputStream.", e);
         }

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/56e82da9/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java
b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java
new file mode 100644
index 0000000..635dec9
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStream.java
@@ -0,0 +1,295 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.compressors.lz4;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.commons.compress.compressors.CompressorInputStream;
+import org.apache.commons.compress.utils.ByteUtils;
+import org.apache.commons.compress.utils.IOUtils;
+
+/**
+ * CompressorInputStream for the LZ4 block format.
+ *
+ * @see <a href="http://lz4.github.io/lz4/lz4_Block_format.html">LZ4 Block Format Description</a>
+ * @since 1.14
+ */
+public class BlockLZ4CompressorInputStream extends CompressorInputStream {
+
+    private static final int WINDOW_SIZE = 1 << 16;
+    private static final int SIZE_BITS = 4;
+    private static final int COPY_SIZE_MASK = (1 << SIZE_BITS) - 1;
+    private static final int LITERAL_SIZE_MASK = COPY_SIZE_MASK << SIZE_BITS;
+
+    /** Buffer to write decompressed bytes to for back-references */
+    private final byte[] buf = new byte[3 * WINDOW_SIZE];
+
+    /** One behind the index of the last byte in the buffer that was written */
+    private int writeIndex;
+
+    /** Index of the next byte to be read. */
+    private int readIndex;
+
+    /** The underlying stream to read compressed data from */
+    private final InputStream in;
+
+    /** Number of bytes still to be read from the current literal or copy. */
+    private long bytesRemaining;
+
+    /** Copy-size part of the block starting byte. */
+    private int nextCopySize;
+
+    /** Offset of the current copy. */
+    private int copyOffset;
+
+    /** Current state of the stream */
+    private State state = State.NO_BLOCK;
+
+    /** uncompressed size */
+    private int size = 0;
+
+    // used in no-arg read method
+    private final byte[] oneByte = new byte[1];
+
+    private final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() {
+        @Override
+        public int getAsByte() throws IOException {
+            return readOneByte();
+        }
+    };
+
+    /**
+     * Creates a new LZ4 input stream.
+     *
+     * @param is
+     *            An InputStream to read compressed data from
+     *
+     * @throws IOException if reading fails
+     */
+    public BlockLZ4CompressorInputStream(final InputStream is) throws IOException {
+        this.in = is;
+        writeIndex = readIndex = 0;
+        bytesRemaining = 0;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public int read() throws IOException {
+        return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public void close() throws IOException {
+        in.close();
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public int available() {
+        return writeIndex - readIndex;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public int read(final byte[] b, final int off, final int len) throws IOException {
+        if (state == State.EOF) {
+            return -1;
+        }
+        switch (state) {
+        case NO_BLOCK:
+            readSizes();
+            /*FALLTHROUGH*/
+        case IN_LITERAL:
+            int litLen = readLiteral(b, off, len);
+            if (bytesRemaining == 0) {
+                state = State.LOOKING_FOR_COPY;
+            }
+            return litLen;
+        case LOOKING_FOR_COPY:
+            if (!initializeCopy()) {
+                state = State.EOF;
+                return -1;
+            }
+            /*FALLTHROUGH*/
+        case IN_COPY:
+            int copyLen = readCopy(b, off, len);
+            if (bytesRemaining == 0) {
+                state = State.NO_BLOCK;
+            }
+            return copyLen;
+        default:
+            throw new IOException("Unknown stream state " + state);
+        }
+    }
+
+    /**
+     * Get the uncompressed size of the stream
+     *
+     * @return the uncompressed size
+     */
+    public int getSize() {
+        return size;
+    }
+
+    private void readSizes() throws IOException {
+        int nextBlock = readOneByte();
+        if (nextBlock == -1) {
+            throw new IOException("Premature end of stream while looking for next block");
+        }
+        nextCopySize = nextBlock & COPY_SIZE_MASK;
+        long literalSizePart = (nextBlock & LITERAL_SIZE_MASK) >> SIZE_BITS;
+        if (literalSizePart == COPY_SIZE_MASK) {
+            literalSizePart += readSizeBytes();
+        }
+        bytesRemaining = literalSizePart;
+        state = State.IN_LITERAL;
+    }
+
+    private long readSizeBytes() throws IOException {
+        long accum = 0;
+        int nextByte;
+        do {
+            nextByte = readOneByte();
+            if (nextByte == -1) {
+                throw new IOException("Premature end of stream while parsing length");
+            }
+            accum += nextByte;
+        } while (nextByte == 255);
+        return accum;
+    }
+
+    private int readLiteral(final byte[] b, final int off, final int len) throws IOException
{
+        final int avail = available();
+        if (len > avail) {
+            tryToReadLiteral(len - avail);
+        }
+        return readFromBuffer(b, off, len);
+    }
+
+    private void tryToReadLiteral(int bytesToRead) throws IOException {
+        final int reallyTryToRead = (int) Math.min(Math.min(bytesToRead, bytesRemaining),
+                                                   buf.length - writeIndex);
+        final int bytesRead = reallyTryToRead > 0
+            ? IOUtils.readFully(in, buf, writeIndex, reallyTryToRead)
+            : 0 /* happens for bytesRemaining == 0 */;
+        count(bytesRead);
+        if (reallyTryToRead != bytesRead) {
+            throw new IOException("Premature end of stream reading literal");
+        }
+        writeIndex += reallyTryToRead;
+        bytesRemaining -= reallyTryToRead;
+    }
+
+    private int readFromBuffer(final byte[] b, final int off, final int len) throws IOException
{
+        final int readable = Math.min(len, available());
+        if (readable > 0) {
+            System.arraycopy(buf, readIndex, b, off, readable);
+            readIndex += readable;
+            if (readIndex > 2 * WINDOW_SIZE) {
+                slideBuffer();
+            }
+        }
+        size += readable;
+        return readable;
+    }
+
+    private void slideBuffer() {
+        System.arraycopy(buf, WINDOW_SIZE, buf, 0, WINDOW_SIZE);
+        writeIndex -= WINDOW_SIZE;
+        readIndex -= WINDOW_SIZE;
+    }
+
+    /**
+     * @return false if there is no more copy - this means this is the
+     * last block of the stream.
+     */
+    private boolean initializeCopy() throws IOException {
+        try {
+            copyOffset = (int) ByteUtils.fromLittleEndian(supplier, 2);
+        } catch (IOException ex) {
+            if (nextCopySize == 0) { // the last block has no copy
+                return false;
+            }
+            throw ex;
+        }
+        long copySize = nextCopySize;
+        if (nextCopySize == COPY_SIZE_MASK) {
+            copySize += readSizeBytes();
+        }
+        bytesRemaining = copySize + 4; // minimal match length 4 is encoded as 0
+        state = State.IN_COPY;
+        return true;
+    }
+
+    private int readCopy(final byte[] b, final int off, final int len) throws IOException
{
+        final int avail = available();
+        if (len > avail) {
+            tryToCopy(len - avail);
+        }
+        return readFromBuffer(b, off, len);
+    }
+
+    private void tryToCopy(int bytesToCopy) throws IOException {
+        // this will fit into the buffer without sliding and not
+        // require more than is available inside the copy
+        int copy = (int) Math.min(Math.min(bytesToCopy, bytesRemaining),
+                                  buf.length - writeIndex);
+        if (copy == 0) {
+            // NOP
+        } else if (copyOffset == 1) { // pretty common special case
+            final byte last = buf[writeIndex - 1];
+            for (int i = 0; i < copy; i++) {
+                buf[writeIndex++] = last;
+            }
+        } else if (copy < copyOffset) {
+            System.arraycopy(buf, writeIndex - copyOffset, buf, writeIndex, copy);
+            writeIndex += copy;
+        } else {
+            final int fullRots = copy / copyOffset;
+            for (int i = 0; i < fullRots; i++) {
+                System.arraycopy(buf, writeIndex - copyOffset, buf, writeIndex, copyOffset);
+                writeIndex += copyOffset;
+            }
+
+            final int pad = copy - (copyOffset * fullRots);
+            if (pad > 0) {
+                System.arraycopy(buf, writeIndex - copyOffset, buf, writeIndex, pad);
+                writeIndex += pad;
+            }
+        }
+        bytesRemaining -= copy;
+    }
+
+    private int readOneByte() throws IOException {
+        final int b = in.read();
+        if (b != -1) {
+            count(1);
+            return b & 0xFF;
+        }
+        return -1;
+    }
+
+    private enum State {
+        NO_BLOCK, IN_LITERAL, LOOKING_FOR_COPY, IN_COPY, EOF
+    }
+}

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/56e82da9/src/main/java/org/apache/commons/compress/compressors/lz4/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/lz4/package.html b/src/main/java/org/apache/commons/compress/compressors/lz4/package.html
new file mode 100644
index 0000000..54de62b
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/compressors/lz4/package.html
@@ -0,0 +1,37 @@
+<html>
+<!--
+
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+-->
+  <body>
+    <p>Provides stream classes for the
+      <a href="http://lz4.github.io/lz4/">LZ4</a>
+      algorithm.</p>
+
+    <p>The block LZ4 format which only contains the compressed data is
+      supported by the <code>BlockLZ4Compressor*putStream</code>
+      classes while the frame format is implemented
+      by <code>FramedLZ4Compressor*putStream</code>.  The
+      implementation in Commons Compress is based on the
+      specifications "Last revised: 2015-03-26" for the block format
+      and version "1.5.1 (31/03/2015)" for the frame format.</p>
+
+    <p>Only the frame format can be auto-detected this means you have
+      to speficy the format explicitly if you want to read a block LZ4
+      stream via <code>CompressorStreamFactory</code>.</p>
+  </body>
+</html>

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/56e82da9/src/site/xdoc/limitations.xml
----------------------------------------------------------------------
diff --git a/src/site/xdoc/limitations.xml b/src/site/xdoc/limitations.xml
index 45c3a27..fd0c07e 100644
--- a/src/site/xdoc/limitations.xml
+++ b/src/site/xdoc/limitations.xml
@@ -106,6 +106,14 @@
          MANIFEST</li>
        </ul>
      </section>
+     <section name="LZ4">
+       <ul>
+         <li>In theory LZ4 compressed streams can contain literals and
+         copies of arbitrary length while Commons Compress only
+         supports sizes up to 2<sup>63</sup> - 1 (i.e. &#x2248; 9.2
+         EB).</li>
+       </ul>
+     </section>
      <section name="LZMA">
        <ul>
          <li>the format requires the otherwise optional <a

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/56e82da9/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStreamTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStreamTest.java
b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStreamTest.java
new file mode 100644
index 0000000..1c1155a
--- /dev/null
+++ b/src/test/java/org/apache/commons/compress/compressors/lz4/BlockLZ4CompressorInputStreamTest.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.compressors.lz4;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import org.apache.commons.compress.AbstractTestCase;
+import org.apache.commons.compress.compressors.CompressorStreamFactory;
+import org.apache.commons.compress.utils.IOUtils;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class BlockLZ4CompressorInputStreamTest extends AbstractTestCase {
+
+    @Test
+    public void readBlaLz4() throws IOException {
+        try (InputStream a = new BlockLZ4CompressorInputStream(new FileInputStream(getFile("bla.tar.block_lz4")));
+            FileInputStream e = new FileInputStream(getFile("bla.tar"))) {
+            byte[] expected = IOUtils.toByteArray(e);
+            byte[] actual = IOUtils.toByteArray(a);
+            Assert.assertArrayEquals(expected, actual);
+        }
+    }
+
+    @Test
+    public void readBlaLz4ViaFactory() throws Exception {
+        try (InputStream a = new CompressorStreamFactory()
+                 .createCompressorInputStream(CompressorStreamFactory.LZ4_BLOCK,
+                                              new FileInputStream(getFile("bla.tar.block_lz4")));
+            FileInputStream e = new FileInputStream(getFile("bla.tar"))) {
+            byte[] expected = IOUtils.toByteArray(e);
+            byte[] actual = IOUtils.toByteArray(a);
+            Assert.assertArrayEquals(expected, actual);
+        }
+    }
+}


Mime
View raw message