commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bode...@apache.org
Subject [2/2] commons-compress git commit: COMPRESS-352 add support for IWA files
Date Sun, 22 May 2016 13:02:17 GMT
COMPRESS-352 add support for IWA files

Apple has created a Snappy dialect used in iWorks archives.

The test case is copied from Tika.


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/18daf66b
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/18daf66b
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/18daf66b

Branch: refs/heads/master
Commit: 18daf66b2ccf4c7df9618c9a7067ab56bfa96593
Parents: cfd5387
Author: Stefan Bodewig <bodewig@apache.org>
Authored: Sun May 22 15:00:14 2016 +0200
Committer: Stefan Bodewig <bodewig@apache.org>
Committed: Sun May 22 15:01:20 2016 +0200

----------------------------------------------------------------------
 .../FramedSnappyCompressorInputStream.java      |  32 ++++++++++--
 .../compressors/snappy/FramedSnappyDialect.java |  52 +++++++++++++++++++
 .../FramedSnappyCompressorInputStreamTest.java  |  27 ++++++++++
 src/test/resources/testNumbersNew.numbers       | Bin 0 -> 179147 bytes
 4 files changed, 107 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/18daf66b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java
b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java
index 2ef2dcc..96ac7f9 100644
--- a/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java
+++ b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java
@@ -58,6 +58,8 @@ public class FramedSnappyCompressorInputStream extends CompressorInputStream
{
 
     /** The underlying stream to read compressed data from */
     private final PushbackInputStream in;
+    /** The dialect to expect */
+    private final FramedSnappyDialect dialect;
 
     private SnappyCompressorInputStream currentCompressedChunk;
 
@@ -71,14 +73,31 @@ public class FramedSnappyCompressorInputStream extends CompressorInputStream
{
     private final PureJavaCrc32C checksum = new PureJavaCrc32C();
 
     /**
+     * Constructs a new input stream that decompresses
+     * snappy-framed-compressed data from the specified input stream
+     * using the {@link FramedSnappyDialect#STANDARD} dialect.
+     * @param in  the InputStream from which to read the compressed data
+     * @throws IOException if reading fails
+     */
+    public FramedSnappyCompressorInputStream(final InputStream in) throws IOException {
+        this(in, FramedSnappyDialect.STANDARD);
+    }
+
+    /**
      * Constructs a new input stream that decompresses snappy-framed-compressed data
      * from the specified input stream.
      * @param in  the InputStream from which to read the compressed data
+     * @param dialect the dialect used by the compressed stream
      * @throws IOException if reading fails
      */
-    public FramedSnappyCompressorInputStream(final InputStream in) throws IOException {
+    public FramedSnappyCompressorInputStream(final InputStream in,
+                                             final FramedSnappyDialect dialect)
+        throws IOException {
         this.in = new PushbackInputStream(in, 1);
-        readStreamIdentifier();
+        this.dialect = dialect;
+        if (dialect.hasStreamIdentifier()) {
+            readStreamIdentifier();
+        }
     }
 
     /** {@inheritDoc} */
@@ -182,8 +201,13 @@ public class FramedSnappyCompressorInputStream extends CompressorInputStream
{
             uncompressedBytesRemaining = readSize() - 4 /* CRC */;
             expectedChecksum = unmask(readCrc());
         } else if (type == COMPRESSED_CHUNK_TYPE) {
-            final long size = readSize() - 4 /* CRC */;
-            expectedChecksum = unmask(readCrc());
+            boolean expectChecksum = dialect.usesChecksumWithCompressedChunks();
+            final long size = readSize() - (expectChecksum ? 4 : 0);
+            if (expectChecksum) {
+                expectedChecksum = unmask(readCrc());
+            } else {
+                expectedChecksum = -1;
+            }
             currentCompressedChunk =
                 new SnappyCompressorInputStream(new BoundedInputStream(in, size));
             // constructor reads uncompressed size

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/18daf66b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java
b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java
new file mode 100644
index 0000000..1f0d2b8
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.compressors.snappy;
+
+/**
+ * Dialects of the framing format that {@link FramedSnappyCompressorInputStream} can deal
with.
+ * @since 1.12
+ */
+public enum FramedSnappyDialect {
+    /**
+     * The standard as defined by the <a
+     * href="https://github.com/google/snappy/blob/master/framing_format.txt">Snappy
+     * framing format description</a>
+     */
+    STANDARD(true, true),
+    /**
+     * The format used by Apple's iWork Archives (.iwa files).
+     */
+    IWORK_ARCHIVE(false, false);
+
+    private final boolean streamIdentifier, checksumWithCompressedChunks;
+
+    private FramedSnappyDialect(boolean hasStreamIdentifier,
+                                boolean usesChecksumWithCompressedChunks) {
+        this.streamIdentifier = hasStreamIdentifier;
+        this.checksumWithCompressedChunks = usesChecksumWithCompressedChunks;
+    }
+
+    boolean hasStreamIdentifier() {
+        return streamIdentifier;
+    }
+
+    boolean usesChecksumWithCompressedChunks() {
+        return checksumWithCompressedChunks;
+    }
+}

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/18daf66b/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java
b/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java
index 3126c4d..b12ddeb 100644
--- a/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java
+++ b/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java
@@ -28,6 +28,7 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import org.apache.commons.compress.AbstractTestCase;
+import org.apache.commons.compress.archivers.zip.ZipFile;
 import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 import org.apache.commons.compress.utils.IOUtils;
 import org.junit.Test;
@@ -170,6 +171,32 @@ public final class FramedSnappyCompressorInputStreamTest
         testChecksumUnmasking(0xffffc757l);
     }
 
+    @Test
+    public void readIWAFile() throws Exception {
+        final ZipFile zip = new ZipFile(getFile("testNumbersNew.numbers"));
+        try {
+            InputStream is = zip.getInputStream(zip.getEntry("Index/Document.iwa"));
+            try {
+                final FramedSnappyCompressorInputStream in =
+                    new FramedSnappyCompressorInputStream(is, FramedSnappyDialect.IWORK_ARCHIVE);
+                FileOutputStream out = null;
+                try {
+                    out = new FileOutputStream(new File(dir, "snappyIWATest.raw"));
+                    IOUtils.copy(in, out);
+                } finally {
+                    if (out != null) {
+                        out.close();
+                    }
+                    in.close();
+                }
+            } finally {
+                is.close();
+            }
+        } finally {
+            zip.close();
+        }
+    }
+
     private void testChecksumUnmasking(final long x) {
         assertEquals(Long.toHexString(x),
                      Long.toHexString(FramedSnappyCompressorInputStream

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/18daf66b/src/test/resources/testNumbersNew.numbers
----------------------------------------------------------------------
diff --git a/src/test/resources/testNumbersNew.numbers b/src/test/resources/testNumbersNew.numbers
new file mode 100644
index 0000000..3f9a013
Binary files /dev/null and b/src/test/resources/testNumbersNew.numbers differ


Mime
View raw message