poi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kiwiwi...@apache.org
Subject svn commit: r1804854 - in /poi: site/src/documentation/content/xdocs/ trunk/src/java/org/apache/poi/poifs/filesystem/ trunk/src/java/org/apache/poi/poifs/macros/ trunk/src/java/org/apache/poi/poifs/storage/ trunk/src/java/org/apache/poi/sl/usermodel/ t...
Date Fri, 11 Aug 2017 20:47:48 GMT
Author: kiwiwings
Date: Fri Aug 11 20:47:48 2017
New Revision: 1804854

URL: http://svn.apache.org/viewvc?rev=1804854&view=rev
Log:
#61381 - PushbackInputStreams passed to ZipHelper may not hold 8 bytes

Added:
    poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java   (with props)
Modified:
    poi/site/src/documentation/content/xdocs/status.xml
    poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java
    poi/trunk/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java
    poi/trunk/src/java/org/apache/poi/poifs/filesystem/OPOIFSFileSystem.java
    poi/trunk/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java
    poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java
    poi/trunk/src/java/org/apache/poi/poifs/storage/HeaderBlock.java
    poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java
    poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
    poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java
    poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/util/ZipSecureFile.java
    poi/trunk/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java
    poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java
    poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestOfficeXMLException.java

Modified: poi/site/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/status.xml?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/status.xml (original)
+++ poi/site/src/documentation/content/xdocs/status.xml Fri Aug 11 20:47:48 2017
@@ -61,6 +61,7 @@
         <summary-item>Removal of deprecated classes and methods that were marked for removal in v3.17</summary-item>
       </summary>
       <actions>
+        <action dev="PD" type="fix" fixes-bug="61381" module="POIFS">PushbackInputStreams passed to ZipHelper may not hold 8 bytes</action>
         <action dev="PD" type="fix" fixes-bug="58975" module="SS Common">Support formula evaluation with functions containing more than 127 arguments</action>
         <action dev="PD" type="fix" fixes-bug="60422" module="SS Common">Fix issue with number formatting in non-default locales</action>
         <action dev="PD" type="fix" fixes-bug="61048" module="SXSSF">Fix issue where carriage returns were being escaped as line feeds</action>

Modified: poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java (original)
+++ poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java Fri Aug 11 20:47:48 2017
@@ -17,22 +17,22 @@
 
 package org.apache.poi.poifs.filesystem;
 
-import org.apache.poi.EncryptedDocumentException;
-import org.apache.poi.poifs.common.POIFSConstants;
-import org.apache.poi.poifs.crypt.Decryptor;
-import org.apache.poi.poifs.crypt.EncryptionInfo;
-import org.apache.poi.util.IOUtils;
-
 import java.io.FilterInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.PushbackInputStream;
 import java.security.GeneralSecurityException;
 
+import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.poifs.crypt.Decryptor;
+import org.apache.poi.poifs.crypt.EncryptionInfo;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.Removal;
+
 /**
  * A small base class for the various factories, e.g. WorkbookFactory,
  * SlideShowFactory to combine common code here.
  */
+@Internal
 public class DocumentFactoryHelper {
     /**
      * Wrap the OLE2 data in the NPOIFSFileSystem into a decrypted stream by using
@@ -81,36 +81,19 @@ public class DocumentFactoryHelper {
 
     /**
      * Checks that the supplied InputStream (which MUST
-     *  support mark and reset, or be a PushbackInputStream)
-     *  has a OOXML (zip) header at the start of it.
-     * If your InputStream does not support mark / reset,
-     *  then wrap it in a PushBackInputStream, then be
+     *  support mark and reset) has a OOXML (zip) header at the start of it.<p>
+     *  
+     * If unsure if your InputStream does support mark / reset,
+     *  use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make
      *  sure to always use that, and not the original!
-     * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
+     *  
+     * @param inp An InputStream which supports either mark/reset
+     *
+     * @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == FileMagic.OOXML instead
      */
+    @Deprecated
+    @Removal(version="4.0")
     public static boolean hasOOXMLHeader(InputStream inp) throws IOException {
-        // We want to peek at the first 4 bytes
-        inp.mark(4);
-
-        byte[] header = new byte[4];
-        int bytesRead = IOUtils.readFully(inp, header);
-
-        // Wind back those 4 bytes
-        if(inp instanceof PushbackInputStream) {
-            PushbackInputStream pin = (PushbackInputStream)inp;
-            pin.unread(header, 0, bytesRead);
-        } else {
-            inp.reset();
-        }
-
-        // Did it match the ooxml zip signature?
-        return (
-                bytesRead == 4 &&
-                        header[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
-                        header[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
-                        header[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
-                        header[3] == POIFSConstants.OOXML_FILE_HEADER[3]
-        );
+        return FileMagic.valueOf(inp) == FileMagic.OOXML;
     }
-
 }

Added: poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java?rev=1804854&view=auto
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java (added)
+++ poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java Fri Aug 11 20:47:48 2017
@@ -0,0 +1,155 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.poifs.filesystem;
+
+import static org.apache.poi.poifs.common.POIFSConstants.OOXML_FILE_HEADER;
+import static org.apache.poi.poifs.common.POIFSConstants.RAW_XML_FILE_HEADER;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.poifs.storage.HeaderBlockConstants;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.LocaleUtil;
+
+/**
+ * The file magic number, i.e. the file identification based on the first bytes
+ * of the file
+ */
+public enum FileMagic {
+    /** OLE2 / BIFF8+ stream used for Office 97 and higher documents */
+    OLE2(HeaderBlockConstants._signature),
+    /** OOXML / ZIP stream */
+    OOXML(OOXML_FILE_HEADER),
+    /** XML file */
+    XML(RAW_XML_FILE_HEADER),
+    /** BIFF2 raw stream - for Excel 2 */
+    BIFF2(new byte[]{
+        0x09, 0x00, // sid=0x0009
+        0x04, 0x00, // size=0x0004
+        0x00, 0x00, // unused
+        0x70, 0x00  // 0x70 = multiple values
+    }),
+    /** BIFF3 raw stream - for Excel 3 */
+    BIFF3(new byte[]{
+        0x09, 0x02, // sid=0x0209
+        0x06, 0x00, // size=0x0006
+        0x00, 0x00, // unused
+        0x70, 0x00  // 0x70 = multiple values
+    }),
+    /** BIFF4 raw stream - for Excel 4 */
+    BIFF4(new byte[]{
+        0x09, 0x04, // sid=0x0409
+        0x06, 0x00, // size=0x0006
+        0x00, 0x00, // unused
+        0x70, 0x00  // 0x70 = multiple values
+    },new byte[]{
+        0x09, 0x04, // sid=0x0409
+        0x06, 0x00, // size=0x0006
+        0x00, 0x00, // unused
+        0x00, 0x01
+    }),
+    /** Old MS Write raw stream */
+    MSWRITE(
+        new byte[]{0x31, (byte)0xbe, 0x00, 0x00 },
+        new byte[]{0x32, (byte)0xbe, 0x00, 0x00 }),
+    /** RTF document */
+    RTF("{\\rtf"),
+    /** PDF document */
+    PDF("%PDF"),
+    // keep UNKNOWN always as last enum!
+    /** UNKNOWN magic */
+    UNKNOWN(new byte[0]);
+    
+    final byte[][] magic;
+    
+    FileMagic(long magic) {
+        this.magic = new byte[1][8];
+        LittleEndian.putLong(this.magic[0], 0, magic);
+    }
+    
+    FileMagic(byte[]... magic) {
+        this.magic = magic;
+    }
+    
+    FileMagic(String magic) {
+        this(magic.getBytes(LocaleUtil.CHARSET_1252));
+    }
+
+    public static FileMagic valueOf(byte[] magic) {
+        for (FileMagic fm : values()) {
+            int i=0;
+            boolean found = true;
+            for (byte[] ma : fm.magic) {
+                for (byte m : ma) {
+                    byte d = magic[i++];
+                    if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) {
+                        found = false;
+                        break;
+                    }
+                }
+                if (found) {
+                    return fm;
+                }
+            }
+        }
+        return UNKNOWN;
+    }
+
+    /**
+     * Get the file magic of the supplied InputStream (which MUST
+     *  support mark and reset).<p>
+     *
+     * If unsure if your InputStream does support mark / reset,
+     *  use {@link #prepareToCheckMagic(InputStream)} to wrap it and make
+     *  sure to always use that, and not the original!<p>
+     *
+     * Even if this method returns {@link FileMagic#UNKNOWN} it could potentially mean,
+     *  that the ZIP stream has leading junk bytes
+     *
+     * @param inp An InputStream which supports either mark/reset
+     */
+    public static FileMagic valueOf(InputStream inp) throws IOException {
+        if (!inp.markSupported()) {
+            throw new IOException("getFileMagic() only operates on streams which support mark(int)");
+        }
+
+        // Grab the first 8 bytes
+        byte[] data = IOUtils.peekFirst8Bytes(inp);
+
+        return FileMagic.valueOf(data);
+    }
+
+
+    /**
+     * Checks if an {@link InputStream} can be reseted (i.e. used for checking the header magic) and wraps it if not
+     *
+     * @param stream stream to be checked for wrapping
+     * @return a mark enabled stream
+     */
+    public static InputStream prepareToCheckMagic(InputStream stream) {
+        if (stream.markSupported()) {
+            return stream;
+        }
+        // we used to process the data via a PushbackInputStream, but user code could provide a too small one
+        // so we use a BufferedInputStream instead now
+        return new BufferedInputStream(stream);
+    }
+}
\ No newline at end of file

Propchange: poi/trunk/src/java/org/apache/poi/poifs/filesystem/FileMagic.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: poi/trunk/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java (original)
+++ poi/trunk/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java Fri Aug 11 20:47:48 2017
@@ -19,6 +19,7 @@
 
 package org.apache.poi.poifs.filesystem;
 
+import java.io.ByteArrayInputStream;
 import java.io.Closeable;
 import java.io.File;
 import java.io.FileInputStream;
@@ -26,7 +27,6 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.io.PushbackInputStream;
 import java.nio.ByteBuffer;
 import java.nio.channels.Channels;
 import java.nio.channels.FileChannel;
@@ -51,14 +51,13 @@ import org.apache.poi.poifs.storage.BATB
 import org.apache.poi.poifs.storage.BlockAllocationTableReader;
 import org.apache.poi.poifs.storage.BlockAllocationTableWriter;
 import org.apache.poi.poifs.storage.HeaderBlock;
-import org.apache.poi.poifs.storage.HeaderBlockConstants;
 import org.apache.poi.poifs.storage.HeaderBlockWriter;
 import org.apache.poi.util.CloseIgnoringInputStream;
 import org.apache.poi.util.IOUtils;
 import org.apache.poi.util.Internal;
-import org.apache.poi.util.LongField;
 import org.apache.poi.util.POILogFactory;
 import org.apache.poi.util.POILogger;
+import org.apache.poi.util.Removal;
 
 /**
  * <p>This is the main class of the POIFS system; it manages the entire
@@ -353,44 +352,38 @@ public class NPOIFSFileSystem extends Bl
 
     /**
      * Checks that the supplied InputStream (which MUST
-     *  support mark and reset, or be a PushbackInputStream)
-     *  has a POIFS (OLE2) header at the start of it.
-     * If your InputStream does not support mark / reset,
-     *  then wrap it in a PushBackInputStream, then be
-     *  sure to always use that and not the original!
+     *  support mark and reset) has a POIFS (OLE2) header at the start of it.
+     * If unsure if your InputStream does support mark / reset,
+     *  use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make
+     *  sure to always use that, and not the original!
      *  
      *  After the method call, the InputStream is at the
      *  same position as of the time of entering the method.
      *  
-     * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
+     * @param inp An InputStream which supports mark/reset
+     * 
+     * @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
      */
+    @Deprecated
+    @Removal(version="4.0")
     public static boolean hasPOIFSHeader(InputStream inp) throws IOException {
-        // We want to peek at the first 8 bytes
-        inp.mark(8);
-
-        byte[] header = new byte[8];
-        int bytesRead = IOUtils.readFully(inp, header);
-        LongField signature = new LongField(HeaderBlockConstants._signature_offset, header);
-
-        // Wind back those 8 bytes
-        if(inp instanceof PushbackInputStream) {
-            PushbackInputStream pin = (PushbackInputStream)inp;
-            pin.unread(header, 0, bytesRead);
-        } else {
-            inp.reset();
-        }
-
-        // Did it match the signature?
-        return (signature.get() == HeaderBlockConstants._signature);
+        return FileMagic.valueOf(inp) == FileMagic.OLE2;
     }
     
     /**
      * Checks if the supplied first 8 bytes of a stream / file
      *  has a POIFS (OLE2) header.
+     * 
+     * @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
      */
+    @Deprecated
+    @Removal(version="4.0")
     public static boolean hasPOIFSHeader(byte[] header8Bytes) {
-        LongField signature = new LongField(HeaderBlockConstants._signature_offset, header8Bytes);
-        return (signature.get() == HeaderBlockConstants._signature);
+        try {
+            return hasPOIFSHeader(new ByteArrayInputStream(header8Bytes));
+        } catch (IOException e) {
+            throw new RuntimeException("invalid header check", e);
+        }
     }
     
     /**

Modified: poi/trunk/src/java/org/apache/poi/poifs/filesystem/OPOIFSFileSystem.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/filesystem/OPOIFSFileSystem.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/filesystem/OPOIFSFileSystem.java (original)
+++ poi/trunk/src/java/org/apache/poi/poifs/filesystem/OPOIFSFileSystem.java Fri Aug 11 20:47:48 2017
@@ -42,16 +42,14 @@ import org.apache.poi.poifs.storage.Bloc
 import org.apache.poi.poifs.storage.BlockList;
 import org.apache.poi.poifs.storage.BlockWritable;
 import org.apache.poi.poifs.storage.HeaderBlock;
-import org.apache.poi.poifs.storage.HeaderBlockConstants;
 import org.apache.poi.poifs.storage.HeaderBlockWriter;
 import org.apache.poi.poifs.storage.RawDataBlockList;
 import org.apache.poi.poifs.storage.SmallBlockTableReader;
 import org.apache.poi.poifs.storage.SmallBlockTableWriter;
 import org.apache.poi.util.CloseIgnoringInputStream;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.LongField;
 import org.apache.poi.util.POILogFactory;
 import org.apache.poi.util.POILogger;
+import org.apache.poi.util.Removal;
 
 /**
  * <p>This is the main class of the POIFS system; it manages the entire
@@ -200,27 +198,34 @@ public class OPOIFSFileSystem
 
     /**
      * Checks that the supplied InputStream (which MUST
-     *  support mark and reset, or be a PushbackInputStream)
-     *  has a POIFS (OLE2) header at the start of it.
-     * If your InputStream does not support mark / reset,
-     *  then wrap it in a PushBackInputStream, then be
+     *  support mark and reset) has a POIFS (OLE2) header at the start of it.
+     * If unsure if your InputStream does support mark / reset,
+     *  use {@link FileMagic#prepareToCheckMagic(InputStream)} to wrap it and make
      *  sure to always use that, and not the original!
-     * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
+     *  
+     *  After the method call, the InputStream is at the
+     *  same position as of the time of entering the method.
+     *  
+     * @param inp An InputStream which supports either mark/reset
+     * 
+     * @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
      */
+    @Deprecated
+    @Removal(version="4.0")
     public static boolean hasPOIFSHeader(InputStream inp) throws IOException {
-        // We want to peek at the first 8 bytes
-        byte[] header = IOUtils.peekFirst8Bytes(inp);
-        return hasPOIFSHeader(header);
+        return NPOIFSFileSystem.hasPOIFSHeader(inp);
     }
+
     /**
      * Checks if the supplied first 8 bytes of a stream / file
      *  has a POIFS (OLE2) header.
+     * 
+     * @deprecated in 3.17-beta2, use {@link FileMagic#valueOf(InputStream)} == {@link FileMagic#OLE2} instead
      */
+    @Deprecated
+    @Removal(version="4.0")
     public static boolean hasPOIFSHeader(byte[] header8Bytes) {
-        LongField signature = new LongField(HeaderBlockConstants._signature_offset, header8Bytes);
-
-        // Did it match the signature?
-        return (signature.get() == HeaderBlockConstants._signature);
+        return NPOIFSFileSystem.hasPOIFSHeader(header8Bytes);
     }
 
     /**

Modified: poi/trunk/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java (original)
+++ poi/trunk/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java Fri Aug 11 20:47:48 2017
@@ -115,27 +115,6 @@ public class POIFSFileSystem
     }
     
     /**
-     * Checks that the supplied InputStream (which MUST
-     *  support mark and reset, or be a PushbackInputStream)
-     *  has a POIFS (OLE2) header at the start of it.
-     * If your InputStream does not support mark / reset,
-     *  then wrap it in a PushBackInputStream, then be
-     *  sure to always use that, and not the original!
-     * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
-     */
-    public static boolean hasPOIFSHeader(InputStream inp) throws IOException {
-        return NPOIFSFileSystem.hasPOIFSHeader(inp);
-    }
-
-    /**
-     * Checks if the supplied first 8 bytes of a stream / file
-     *  has a POIFS (OLE2) header.
-     */
-    public static boolean hasPOIFSHeader(byte[] header8Bytes) {
-        return NPOIFSFileSystem.hasPOIFSHeader(header8Bytes);
-    }
-    
-    /**
      * Creates a new {@link POIFSFileSystem} in a new {@link File}.
      * Use {@link #POIFSFileSystem(File)} to open an existing File,
      *  this should only be used to create a new empty filesystem.

Modified: poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java (original)
+++ poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java Fri Aug 11 20:47:48 2017
@@ -17,8 +17,8 @@
 
 package org.apache.poi.poifs.macros;
 
-import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
 import static org.apache.poi.util.StringUtil.endsWithIgnoreCase;
+import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -27,7 +27,6 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.PushbackInputStream;
 import java.nio.charset.Charset;
 import java.util.HashMap;
 import java.util.Map;
@@ -38,6 +37,7 @@ import org.apache.poi.poifs.filesystem.D
 import org.apache.poi.poifs.filesystem.DocumentInputStream;
 import org.apache.poi.poifs.filesystem.DocumentNode;
 import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.FileMagic;
 import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
 import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
 import org.apache.poi.util.CodePageUtil;
@@ -67,13 +67,12 @@ public class VBAMacroReader implements C
     private NPOIFSFileSystem fs;
     
     public VBAMacroReader(InputStream rstream) throws IOException {
-        PushbackInputStream stream = new PushbackInputStream(rstream, 8);
-        byte[] header8 = IOUtils.peekFirst8Bytes(stream);
-
-        if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
-            fs = new NPOIFSFileSystem(stream);
+        InputStream is = FileMagic.prepareToCheckMagic(rstream);
+        FileMagic fm = FileMagic.valueOf(is);
+        if (fm == FileMagic.OLE2) {
+            fs = new NPOIFSFileSystem(is);
         } else {
-            openOOXML(stream);
+            openOOXML(is);
         }
     }
     

Modified: poi/trunk/src/java/org/apache/poi/poifs/storage/HeaderBlock.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/storage/HeaderBlock.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/storage/HeaderBlock.java (original)
+++ poi/trunk/src/java/org/apache/poi/poifs/storage/HeaderBlock.java Fri Aug 11 20:47:48 2017
@@ -26,6 +26,7 @@ import java.util.Arrays;
 import org.apache.poi.hssf.OldExcelFormatException;
 import org.apache.poi.poifs.common.POIFSBigBlockSize;
 import org.apache.poi.poifs.common.POIFSConstants;
+import org.apache.poi.poifs.filesystem.FileMagic;
 import org.apache.poi.poifs.filesystem.NotOLE2FileException;
 import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
 import org.apache.poi.util.HexDump;
@@ -40,41 +41,6 @@ import org.apache.poi.util.ShortField;
  * The block containing the archive header
  */
 public final class HeaderBlock implements HeaderBlockConstants {
-    private static final byte[] MAGIC_BIFF2 = {
-        0x09, 0x00, // sid=0x0009
-        0x04, 0x00, // size=0x0004
-        0x00, 0x00, // unused
-        0x70, 0x00  // 0x70 = multiple values
-    };
-
-    private static final byte[] MAGIC_BIFF3 = {
-        0x09, 0x02, // sid=0x0209
-        0x06, 0x00, // size=0x0006
-        0x00, 0x00, // unused
-        0x70, 0x00  // 0x70 = multiple values
-    };
-
-    private static final byte[] MAGIC_BIFF4a = {
-        0x09, 0x04, // sid=0x0409
-        0x06, 0x00, // size=0x0006
-        0x00, 0x00, // unused
-        0x70, 0x00  // 0x70 = multiple values
-    };
-
-    private static final byte[] MAGIC_BIFF4b = {
-        0x09, 0x04, // sid=0x0409
-        0x06, 0x00, // size=0x0006
-        0x00, 0x00, // unused
-        0x00, 0x01
-    };
-    
-    private static final byte[] MAGIC_MSWRITEa = {
-        0x31, (byte)0xbe, 0x00, 0x00
-    };
-    private static final byte[] MAGIC_MSWRITEb = {
-        0x32, (byte)0xbe, 0x00, 0x00
-    };
-
     private static final byte _default_value = ( byte ) 0xFF;
 
     /**
@@ -151,53 +117,35 @@ public final class HeaderBlock implement
 	   this._data = data.clone();
 	   
 		// verify signature
-		long signature = LittleEndian.getLong(_data, _signature_offset);
-
-		if (signature != _signature) {
-			// Is it one of the usual suspects?
-			if (cmp(POIFSConstants.OOXML_FILE_HEADER, data)) {
-				throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. "
-			        + "You are calling the part of POI that deals with OLE2 Office Documents. "
-			        + "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)");
-			}
-			
-            if (cmp(POIFSConstants.RAW_XML_FILE_HEADER, data)) {
-                throw new NotOLE2FileException("The supplied data appears to be a raw XML file. "
-                    + "Formats such as Office 2003 XML are not supported");
-            }
-            
-            // Old MS Write raw stream
-            if (cmp(MAGIC_MSWRITEa, data) || cmp(MAGIC_MSWRITEb, data)) {
-                throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. "
-                    + "Apache POI doesn't currently support this format");
-            }
-
-            // BIFF2 raw stream
-            if (cmp(MAGIC_BIFF2, data)) {
-                throw new OldExcelFormatException("The supplied data appears to be in BIFF2 format. "
-                    + "HSSF only supports the BIFF8 format, try OldExcelExtractor");
-            }
-            
-            // BIFF3 raw stream
-            if (cmp(MAGIC_BIFF3, data)) {
-                throw new OldExcelFormatException("The supplied data appears to be in BIFF3 format. "
-                    + "HSSF only supports the BIFF8 format, try OldExcelExtractor");
-            }
-            
-            // BIFF4 raw stream
-            if (cmp(MAGIC_BIFF4a, data) || cmp(MAGIC_BIFF4b, data)) {
-                throw new OldExcelFormatException("The supplied data appears to be in BIFF4 format. "
-                    + "HSSF only supports the BIFF8 format, try OldExcelExtractor");
-            }
-
-			// Give a generic error if the OLE2 signature isn't found
-			throw new NotOLE2FileException("Invalid header signature; read "
-				                  + HexDump.longToHex(signature) + ", expected "
-				                  + HexDump.longToHex(_signature) + " - Your file appears "
-				                  + "not to be a valid OLE2 document");
-		}
-
-
+	   FileMagic fm = FileMagic.valueOf(data);
+	   
+	   switch (fm) {
+	   case OLE2:
+	       break;
+	   case OOXML:
+           throw new OfficeXmlFileException("The supplied data appears to be in the Office 2007+ XML. "
+               + "You are calling the part of POI that deals with OLE2 Office Documents. "
+               + "You need to call a different part of POI to process this data (eg XSSF instead of HSSF)");
+	   case XML:
+           throw new NotOLE2FileException("The supplied data appears to be a raw XML file. "
+               + "Formats such as Office 2003 XML are not supported");
+	   case MSWRITE:
+           throw new NotOLE2FileException("The supplied data appears to be in the old MS Write format. "
+               + "Apache POI doesn't currently support this format");
+       case BIFF2:
+       case BIFF3:
+       case BIFF4:
+           throw new OldExcelFormatException("The supplied data appears to be in "+fm+" format. "
+               + "HSSF only supports the BIFF8 format, try OldExcelExtractor");
+	   default:
+           // Give a generic error if the OLE2 signature isn't found
+	       String exp = HexDump.longToHex(_signature);
+	       String act = HexDump.longToHex(LittleEndian.getLong(data, 0));
+           throw new NotOLE2FileException(
+               "Invalid header signature; read " + act + ", expected " + exp +
+               " - Your file appears not to be a valid OLE2 document");
+	   }
+	   
 		// Figure out our block size
 		if (_data[30] == 12) {
 			this.bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS;
@@ -434,15 +382,4 @@ public final class HeaderBlock implement
          stream.write(0);
       }
    }
-   
-   private static boolean cmp(byte[] magic, byte[] data) {
-       int i=0;
-       for (byte m : magic) {
-           byte d = data[i++];
-           if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) {
-               return false;
-           }
-       }
-       return true;
-   }
 }

Modified: poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java (original)
+++ poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java Fri Aug 11 20:47:48 2017
@@ -20,7 +20,6 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.PushbackInputStream;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 
@@ -30,6 +29,7 @@ import org.apache.poi.hssf.record.crypto
 import org.apache.poi.poifs.crypt.Decryptor;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
+import org.apache.poi.poifs.filesystem.FileMagic;
 import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
 import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
 import org.apache.poi.util.IOUtils;
@@ -94,9 +94,7 @@ public class SlideShowFactory {
      * Creates the appropriate HSLFSlideShow / XMLSlideShow from
      *  the given InputStream.
      *
-     * <p>Your input stream MUST either support mark/reset, or
-     *  be wrapped as a {@link PushbackInputStream}! Note that
-     *  using an {@link InputStream} has a higher memory footprint
+     * <p>Note that using an {@link InputStream} has a higher memory footprint
      *  than using a {@link File}.</p>
      *
      * <p>Note that in order to properly release resources the
@@ -118,9 +116,8 @@ public class SlideShowFactory {
     /**
      * Creates the appropriate HSLFSlideShow / XMLSlideShow from
      *  the given InputStream, which may be password protected.
-     * <p>Your input stream MUST either support mark/reset, or
-     *  be wrapped as a {@link PushbackInputStream}! Note that
-     *  using an {@link InputStream} has a higher memory footprint
+     *  
+     * <p>Note that using an {@link InputStream} has a higher memory footprint
      *  than using a {@link File}.</p>
      *
      * <p>Note that in order to properly release resources the
@@ -137,23 +134,18 @@ public class SlideShowFactory {
      *  @throws EncryptedDocumentException If the wrong password is given for a protected file
      */
     public static SlideShow<?,?> create(InputStream inp, String password) throws IOException, EncryptedDocumentException {
-        // If clearly doesn't do mark/reset, wrap up
-        if (! inp.markSupported()) {
-            inp = new PushbackInputStream(inp, 8);
-        }
-
-        // Ensure that there is at least some data there
-        byte[] header8 = IOUtils.peekFirst8Bytes(inp);
-
-        // Try to create
-        if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
-            NPOIFSFileSystem fs = new NPOIFSFileSystem(inp);
+        InputStream is = FileMagic.prepareToCheckMagic(inp);
+        FileMagic fm = FileMagic.valueOf(is);
+        
+        switch (fm) {
+        case OLE2:
+            NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
             return create(fs, password);
+        case OOXML:
+            return createXSLFSlideShow(is);
+        default:
+            throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
         }
-        if (DocumentFactoryHelper.hasOOXMLHeader(inp)) {
-            return createXSLFSlideShow(inp);
-        }
-        throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
     }
 
     /**

Modified: poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java Fri Aug 11 20:47:48 2017
@@ -21,7 +21,6 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.PushbackInputStream;
 import java.util.ArrayList;
 import java.util.Iterator;
 
@@ -45,8 +44,8 @@ import org.apache.poi.poifs.crypt.Decryp
 import org.apache.poi.poifs.crypt.EncryptionInfo;
 import org.apache.poi.poifs.filesystem.DirectoryEntry;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
 import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.FileMagic;
 import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
 import org.apache.poi.poifs.filesystem.NotOLE2FileException;
 import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
@@ -175,21 +174,20 @@ public class ExtractorFactory {
      }
 
     public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException {
-        // Figure out the kind of stream
-        // If clearly doesn't do mark/reset, wrap up
-        if (! inp.markSupported()) {
-            inp = new PushbackInputStream(inp, 8);
-        }
+        InputStream is = FileMagic.prepareToCheckMagic(inp);
 
-        if (NPOIFSFileSystem.hasPOIFSHeader(inp)) {
-            NPOIFSFileSystem fs = new NPOIFSFileSystem(inp);
+        FileMagic fm = FileMagic.valueOf(is);
+        
+        switch (fm) {
+        case OLE2:
+            NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
             boolean isEncrypted = fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY); 
             return isEncrypted ? createEncyptedOOXMLExtractor(fs) : createExtractor(fs);
+        case OOXML:
+            return createExtractor(OPCPackage.open(is));
+        default:
+            throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
         }
-        if (DocumentFactoryHelper.hasOOXMLHeader(inp)) {
-            return createExtractor(OPCPackage.open(inp));
-        }
-        throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
     }
 
     /**

Modified: poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java Fri Aug 11 20:47:48 2017
@@ -22,7 +22,6 @@ import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.PushbackInputStream;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.Enumeration;
@@ -38,12 +37,11 @@ import org.apache.poi.openxml4j.opc.Pack
 import org.apache.poi.openxml4j.opc.ZipPackage;
 import org.apache.poi.openxml4j.util.ZipSecureFile;
 import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream;
-import org.apache.poi.poifs.common.POIFSConstants;
-import org.apache.poi.poifs.storage.HeaderBlockConstants;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.LittleEndian;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.util.Internal;
 import org.apache.poi.util.Removal;
 
+@Internal
 public final class ZipHelper {
     /**
      * Forward slash use to convert part name between OPC and zip item naming
@@ -172,59 +170,29 @@ public final class ZipHelper {
      * Warning - this will consume the first few bytes of the stream,
      *  you should push-back or reset the stream after use!
      */
-    public static void verifyZipHeader(InputStream stream) 
-            throws NotOfficeXmlFileException, IOException {
-        // Grab the first 8 bytes
-        byte[] data = new byte[8];
-        IOUtils.readFully(stream, data);
-        
-        // OLE2?
-        long signature = LittleEndian.getLong(data);
-        if (signature == HeaderBlockConstants._signature) {
+    public static void verifyZipHeader(InputStream stream) throws NotOfficeXmlFileException, IOException {
+        InputStream is = FileMagic.prepareToCheckMagic(stream);
+        FileMagic fm = FileMagic.valueOf(is);
+
+        switch (fm) {
+        case OLE2:
             throw new OLE2NotOfficeXmlFileException(
                 "The supplied data appears to be in the OLE2 Format. " +
                 "You are calling the part of POI that deals with OOXML "+
                 "(Office Open XML) Documents. You need to call a different " +
                 "part of POI to process this data (eg HSSF instead of XSSF)");
-        }
-        
-        // Raw XML?
-        byte[] RAW_XML_FILE_HEADER = POIFSConstants.RAW_XML_FILE_HEADER;
-        if (data[0] == RAW_XML_FILE_HEADER[0] &&
-            data[1] == RAW_XML_FILE_HEADER[1] &&
-            data[2] == RAW_XML_FILE_HEADER[2] &&
-            data[3] == RAW_XML_FILE_HEADER[3] &&
-            data[4] == RAW_XML_FILE_HEADER[4]) {
+        case XML:
             throw new NotOfficeXmlFileException(
                 "The supplied data appears to be a raw XML file. " +
                 "Formats such as Office 2003 XML are not supported");
+        default:
+        case OOXML:
+        case UNKNOWN:
+            // Don't check for a Zip header, as to maintain backwards
+            //  compatibility we need to let them seek over junk at the
+            //  start before beginning processing.
+            break;
         }
-
-        // Don't check for a Zip header, as to maintain backwards
-        //  compatibility we need to let them seek over junk at the
-        //  start before beginning processing.
-        
-        // Put things back
-        if (stream instanceof PushbackInputStream) {
-            ((PushbackInputStream)stream).unread(data);
-        } else if (stream.markSupported()) {
-            stream.reset();
-        } else if (stream instanceof FileInputStream) {
-            // File open check, about to be closed, nothing to do
-        } else {
-            // Oh dear... I hope you know what you're doing!
-        }
-    }
-    
-    private static InputStream prepareToCheckHeader(InputStream stream) {
-        if (stream instanceof PushbackInputStream) {
-            return stream;
-        }
-        if (stream.markSupported()) {
-            stream.mark(8);
-            return stream;
-        }
-        return new PushbackInputStream(stream, 8);
     }
 
     /**
@@ -237,7 +205,7 @@ public final class ZipHelper {
     @SuppressWarnings("resource")
     public static ThresholdInputStream openZipStream(InputStream stream) throws IOException {
         // Peek at the first few bytes to sanity check
-        InputStream checkedStream = prepareToCheckHeader(stream);
+        InputStream checkedStream = FileMagic.prepareToCheckMagic(stream);
         verifyZipHeader(checkedStream);
         
         // Open as a proper zip stream

Modified: poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/util/ZipSecureFile.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/util/ZipSecureFile.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/util/ZipSecureFile.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/util/ZipSecureFile.java Fri Aug 11 20:47:48 2017
@@ -198,10 +198,11 @@ public class ZipSecureFile extends ZipFi
 
     public static class ThresholdInputStream extends PushbackInputStream {
         long counter = 0;
+        long markPos = 0;
         ThresholdInputStream cis;
 
         public ThresholdInputStream(InputStream is, ThresholdInputStream cis) {
-            super(is,1);
+            super(is);
             this.cis = cis;
         }
 
@@ -225,14 +226,15 @@ public class ZipSecureFile extends ZipFi
 
         @Override
         public long skip(long n) throws IOException {
-            counter = 0;
-            return in.skip(n);
+            long s = in.skip(n);
+            counter += s;
+            return s;
         }
 
         @Override
         public synchronized void reset() throws IOException {
-            counter = 0;
-            in.reset();
+            counter = markPos;
+            super.reset();
         }
 
         public void advance(int advance) throws IOException {
@@ -263,10 +265,10 @@ public class ZipSecureFile extends ZipFi
             }
 
             // one of the limits was reached, report it
-            throw new IOException("Zip bomb detected! The file would exceed the max. ratio of compressed file size to the size of the expanded data. "
-                    + "This may indicate that the file is used to inflate memory usage and thus could pose a security risk. "
-                    + "You can adjust this limit via ZipSecureFile.setMinInflateRatio() if you need to work with files which exceed this limit. "
-                    + "Counter: " + counter + ", cis.counter: " + cis.counter + ", ratio: " + (((double)cis.counter)/counter)
+            throw new IOException("Zip bomb detected! The file would exceed the max. ratio of compressed file size to the size of the expanded data.\n"
+                    + "This may indicate that the file is used to inflate memory usage and thus could pose a security risk.\n"
+                    + "You can adjust this limit via ZipSecureFile.setMinInflateRatio() if you need to work with files which exceed this limit.\n"
+                    + "Counter: " + counter + ", cis.counter: " + cis.counter + ", ratio: " + ratio + "\n"
                     + "Limits: MIN_INFLATE_RATIO: " + MIN_INFLATE_RATIO);
         }
 
@@ -322,6 +324,7 @@ public class ZipSecureFile extends ZipFi
 
         @Override
         public synchronized void mark(int readlimit) {
+            markPos = counter;
             in.mark(readlimit);
         }
     }

Modified: poi/trunk/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/ss/usermodel/WorkbookFactory.java Fri Aug 11 20:47:48 2017
@@ -16,11 +16,11 @@
 ==================================================================== */
 package org.apache.poi.ss.usermodel;
 
+import java.io.BufferedInputStream;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.PushbackInputStream;
 
 import org.apache.poi.EmptyFileException;
 import org.apache.poi.EncryptedDocumentException;
@@ -32,6 +32,7 @@ import org.apache.poi.openxml4j.opc.Pack
 import org.apache.poi.poifs.crypt.Decryptor;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
+import org.apache.poi.poifs.filesystem.FileMagic;
 import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
 import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@@ -127,7 +128,7 @@ public class WorkbookFactory {
      *  the given InputStream.
      *
      * <p>Your input stream MUST either support mark/reset, or
-     *  be wrapped as a {@link PushbackInputStream}! Note that
+     *  be wrapped as a {@link BufferedInputStream}! Note that
      *  using an {@link InputStream} has a higher memory footprint
      *  than using a {@link File}.</p>
      *
@@ -150,16 +151,15 @@ public class WorkbookFactory {
 
     /**
      * Creates the appropriate HSSFWorkbook / XSSFWorkbook from
-     *  the given InputStream, which may be password protected.
-     * <p>Your input stream MUST either support mark/reset, or
-     *  be wrapped as a {@link PushbackInputStream}! Note that
-     *  using an {@link InputStream} has a higher memory footprint
-     *  than using a {@link File}.</p>
+     *  the given InputStream, which may be password protected.<p>
+     *  
+     * Note that using an {@link InputStream} has a higher memory footprint
+     *  than using a {@link File}.<p>
      *
-     * <p>Note that in order to properly release resources the
+     * Note that in order to properly release resources the
      *  Workbook should be closed after use. Note also that loading
      *  from an InputStream requires more memory than loading
-     *  from a File, so prefer {@link #create(File)} where possible.</p>
+     *  from a File, so prefer {@link #create(File)} where possible.
      *
      *  @param inp The {@link InputStream} to read data from.
      *  @param password The password that should be used or null if no password is necessary.
@@ -172,23 +172,19 @@ public class WorkbookFactory {
      *  @throws EmptyFileException If an empty stream is given
      */
     public static Workbook create(InputStream inp, String password) throws IOException, InvalidFormatException, EncryptedDocumentException {
-        // If clearly doesn't do mark/reset, wrap up
-        if (! inp.markSupported()) {
-            inp = new PushbackInputStream(inp, 8);
-        }
-
-        // Ensure that there is at least some data there
-        byte[] header8 = IOUtils.peekFirst8Bytes(inp);
-
-        // Try to create
-        if (NPOIFSFileSystem.hasPOIFSHeader(header8)) {
-            NPOIFSFileSystem fs = new NPOIFSFileSystem(inp);
+        InputStream is = FileMagic.prepareToCheckMagic(inp);
+        
+        FileMagic fm = FileMagic.valueOf(is);
+        
+        switch (fm) {
+        case OLE2:
+            NPOIFSFileSystem fs = new NPOIFSFileSystem(is);
             return create(fs, password);
+        case OOXML:
+            return new XSSFWorkbook(OPCPackage.open(is));
+        default:
+            throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
         }
-        if (DocumentFactoryHelper.hasOOXMLHeader(inp)) {
-            return new XSSFWorkbook(OPCPackage.open(inp));
-        }
-        throw new InvalidFormatException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
     }
 
     /**

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java Fri Aug 11 20:47:48 2017
@@ -20,7 +20,6 @@ package org.apache.poi.xssf.usermodel;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.PushbackInputStream;
 
 import javax.xml.namespace.QName;
 
@@ -29,7 +28,7 @@ import org.apache.poi.POIXMLException;
 import org.apache.poi.openxml4j.opc.PackagePart;
 import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
 import org.apache.poi.poifs.filesystem.DirectoryEntry;
-import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
+import org.apache.poi.poifs.filesystem.FileMagic;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.ss.usermodel.ObjectData;
 import org.apache.poi.util.IOUtils;
@@ -161,17 +160,8 @@ public class XSSFObjectData extends XSSF
         InputStream is = null;
         try {
             is = getObjectPart().getInputStream();
-
-            // If clearly doesn't do mark/reset, wrap up
-            if (! is.markSupported()) {
-                is = new PushbackInputStream(is, 8);
-            }
-
-            // Ensure that there is at least some data there
-            byte[] header8 = IOUtils.peekFirst8Bytes(is);
-
-            // Try to create
-            return NPOIFSFileSystem.hasPOIFSHeader(header8);
+            is = FileMagic.prepareToCheckMagic(is);
+            return FileMagic.valueOf(is) == FileMagic.OLE2;
         } catch (IOException e) {
             LOG.log(POILogger.WARN, "can't determine if directory entry exists", e);
             return false;

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java Fri Aug 11 20:47:48 2017
@@ -19,68 +19,70 @@
 
 package org.apache.poi;
 
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
 import java.io.ByteArrayInputStream;
+import java.io.IOException;
 import java.io.InputStream;
-import java.io.PushbackInputStream;
-import java.util.Arrays;
-
-import junit.framework.TestCase;
 
 import org.apache.poi.hssf.HSSFTestDataSamples;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.poifs.filesystem.DocumentFactoryHelper;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.util.IOUtils;
+import org.junit.Test;
 
 /**
  * Class to test that HXF correctly detects OOXML
  *  documents
  */
-public class TestDetectAsOOXML extends TestCase
-{
-	public void testOpensProperly() throws Exception
-	{
+public class TestDetectAsOOXML {
+    @Test
+	public void testOpensProperly() throws IOException, InvalidFormatException {
         OPCPackage.open(HSSFTestDataSamples.openSampleFileStream("sample.xlsx"));
 	}
 	
-	public void testDetectAsPOIFS() throws Exception {
-		InputStream in;
-		
-		// ooxml file is
-		in = new PushbackInputStream(
-				HSSFTestDataSamples.openSampleFileStream("SampleSS.xlsx"), 10
-		);
-		assertTrue(DocumentFactoryHelper.hasOOXMLHeader(in));
-		in.close();
-		
-		// xls file isn't
-		in = new PushbackInputStream(
-				HSSFTestDataSamples.openSampleFileStream("SampleSS.xls"), 10
-		);
-		assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in));
-		in.close();
-		
-		// text file isn't
-		in = new PushbackInputStream(
-				HSSFTestDataSamples.openSampleFileStream("SampleSS.txt"), 10
-		);
-		assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in));
-		in.close();
+    @Test
+	public void testDetectAsPOIFS() throws IOException {
+	    Object fileAndMagic[][] = {
+            { "SampleSS.xlsx", FileMagic.OOXML },
+            { "SampleSS.xls", FileMagic.OLE2 },
+            { "SampleSS.txt", FileMagic.UNKNOWN }
+	    };
+
+	    for (Object fm[] : fileAndMagic) {
+	        InputStream is = HSSFTestDataSamples.openSampleFileStream((String)fm[0]);
+	        is = FileMagic.prepareToCheckMagic(is);
+	        FileMagic act = FileMagic.valueOf(is);
+	        
+	        if (act == FileMagic.OOXML) {
+	            assertTrue(DocumentFactoryHelper.hasOOXMLHeader(is));
+	        }
+	        
+	        assertEquals("file magic failed for "+fm[0], fm[1], act);
+	        is.close();
+	    }
 	}
     
+    @Test
     public void testFileCorruption() throws Exception {
 	    
 	    // create test InputStream
-	    byte[] testData = { (byte)1, (byte)2, (byte)3 };
+	    byte[] testData = { 1, 2, 3 };
         ByteArrayInputStream testInput = new ByteArrayInputStream(testData);
+        InputStream is = FileMagic.prepareToCheckMagic(testInput);
         
         // detect header
-        InputStream in = new PushbackInputStream(testInput, 10);
-        assertFalse(DocumentFactoryHelper.hasOOXMLHeader(in));
+        assertFalse(DocumentFactoryHelper.hasOOXMLHeader(is));
         
         // check if InputStream is still intact
-        byte[] test = new byte[3];
-        assertEquals(3, in.read(test));
-        assertTrue(Arrays.equals(testData, test));
-        assertEquals(-1, in.read());
-        in.close();
+        byte[] act = IOUtils.toByteArray(is);
+        assertArrayEquals(testData, act);
+        assertEquals(-1, is.read());
+        is.close();
 	}
 }

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java Fri Aug 11 20:47:48 2017
@@ -17,11 +17,50 @@
 
 package org.apache.poi.openxml4j.opc;
 
-import org.apache.poi.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.PushbackInputStream;
+import java.lang.reflect.InvocationTargetException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.List;
+import java.util.TreeMap;
+import java.util.regex.Pattern;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipOutputStream;
+
+import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.POITestCase;
+import org.apache.poi.POITextExtractor;
+import org.apache.poi.POIXMLException;
+import org.apache.poi.UnsupportedFileFormatException;
 import org.apache.poi.extractor.ExtractorFactory;
 import org.apache.poi.hssf.HSSFTestDataSamples;
 import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
-import org.apache.poi.openxml4j.exceptions.*;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
+import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
+import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
+import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
 import org.apache.poi.openxml4j.opc.internal.FileHelper;
 import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
@@ -29,7 +68,11 @@ import org.apache.poi.openxml4j.opc.inte
 import org.apache.poi.openxml4j.util.ZipSecureFile;
 import org.apache.poi.ss.usermodel.Workbook;
 import org.apache.poi.ss.usermodel.WorkbookFactory;
-import org.apache.poi.util.*;
+import org.apache.poi.util.DocumentHelper;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+import org.apache.poi.util.TempFile;
 import org.apache.poi.xssf.XSSFTestDataSamples;
 import org.apache.xmlbeans.XmlException;
 import org.junit.Ignore;
@@ -39,21 +82,6 @@ import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;
 import org.xml.sax.SAXException;
 
-import java.io.*;
-import java.lang.reflect.InvocationTargetException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.util.Enumeration;
-import java.util.HashMap;
-import java.util.List;
-import java.util.TreeMap;
-import java.util.regex.Pattern;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipFile;
-import java.util.zip.ZipOutputStream;
-
-import static org.junit.Assert.*;
-
 public final class TestPackage {
     private static final POILogger logger = POILogFactory.getLogger(TestPackage.class);
 
@@ -947,20 +975,32 @@ public final class TestPackage {
     }
     
     // bug 60128
-    @Test
+    @Test(expected=NotOfficeXmlFileException.class)
     public void testCorruptFile() throws IOException, InvalidFormatException {
-        OPCPackage pkg = null;
         File file = OpenXML4JTestDataSamples.getSampleFile("invalid.xlsx");
+        OPCPackage.open(file, PackageAccess.READ);
+    }
+
+    // bug 61381
+    @Test
+    public void testTooShortFilterStreams() throws IOException, InvalidFormatException {
+        File xssf = OpenXML4JTestDataSamples.getSampleFile("sample.xlsx");
+        File hssf = POIDataSamples.getSpreadSheetInstance().getFile("SampleSS.xls");
+        
+        InputStream isList[] = {
+            new PushbackInputStream(new FileInputStream(xssf), 2),
+            new BufferedInputStream(new FileInputStream(xssf), 2),
+            new PushbackInputStream(new FileInputStream(hssf), 2),
+            new BufferedInputStream(new FileInputStream(hssf), 2),
+        };
+        
         try {
-            pkg = OPCPackage.open(file, PackageAccess.READ);
-        } catch (NotOfficeXmlFileException e) {
-            /*System.out.println(e.getClass().getName());
-            System.out.println(e.getMessage());
-            e.printStackTrace();*/
-            // ignore exception
+            for (InputStream is : isList) {
+                WorkbookFactory.create(is).close();
+            }
         } finally {
-            if (pkg != null) {
-                pkg.close();
+            for (InputStream is : isList) {
+                IOUtils.closeQuietly(is);
             }
         }
     }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java Fri Aug 11 20:47:48 2017
@@ -20,7 +20,6 @@ package org.apache.poi.hwpf;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.PushbackInputStream;
 import java.security.GeneralSecurityException;
 
 import org.apache.poi.EncryptedDocumentException;
@@ -47,6 +46,7 @@ import org.apache.poi.poifs.filesystem.D
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.DocumentEntry;
 import org.apache.poi.poifs.filesystem.DocumentInputStream;
+import org.apache.poi.poifs.filesystem.FileMagic;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.util.BoundedInputStream;
 import org.apache.poi.util.IOUtils;
@@ -116,22 +116,14 @@ public abstract class HWPFDocumentCore e
      *  POIFSFileSystem from it, and returns that.
      */
     public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException {
-    	// Open a PushbackInputStream, so we can peek at the first few bytes
-    	PushbackInputStream pis = new PushbackInputStream(istream,6);
-    	byte[] first6 = IOUtils.toByteArray(pis, 6);
-
-    	// Does it start with {\rtf ? If so, it's really RTF
-    	if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r'
-    		&& first6[3] == 't' && first6[4] == 'f') {
-    		throw new IllegalArgumentException("The document is really a RTF file");
-    	} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) {
-    		throw new IllegalArgumentException("The document is really a PDF file");
-    	}
-
-    	// OK, so it's neither RTF nor PDF
-    	// Open a POIFSFileSystem on the (pushed back) stream
-    	pis.unread(first6);
-    	return new POIFSFileSystem(pis);
+        InputStream is = FileMagic.prepareToCheckMagic(istream);
+        FileMagic fm = FileMagic.valueOf(is);
+
+        if (fm != FileMagic.OLE2) {
+            throw new IllegalArgumentException("The document is really a "+fm+" file");
+        }
+
+        return new POIFSFileSystem(is);
     }
 
     /**

Modified: poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestOfficeXMLException.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestOfficeXMLException.java?rev=1804854&r1=1804853&r2=1804854&view=diff
==============================================================================
--- poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestOfficeXMLException.java (original)
+++ poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestOfficeXMLException.java Fri Aug 11 20:47:48 2017
@@ -22,7 +22,6 @@ import static org.apache.poi.POITestCase
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.PushbackInputStream;
 import java.util.Arrays;
 
 import org.apache.poi.hssf.HSSFTestDataSamples;
@@ -86,8 +85,9 @@ public class TestOfficeXMLException exte
 		// text file isn't
 		confirmIsPOIFS("SampleSS.txt", false);
 	}
+	
 	private void confirmIsPOIFS(String sampleFileName, boolean expectedResult) throws IOException {
-		InputStream in  = new PushbackInputStream(openSampleStream(sampleFileName), 10);
+		InputStream in  = FileMagic.prepareToCheckMagic(openSampleStream(sampleFileName));
 		try {
     		boolean actualResult;
     		try {
@@ -108,7 +108,7 @@ public class TestOfficeXMLException exte
         InputStream testInput = new ByteArrayInputStream(testData);
         
         // detect header
-        InputStream in = new PushbackInputStream(testInput, 10);
+        InputStream in = FileMagic.prepareToCheckMagic(testInput);
         assertFalse(POIFSFileSystem.hasPOIFSHeader(in));
         
         // check if InputStream is still intact
@@ -126,7 +126,7 @@ public class TestOfficeXMLException exte
         InputStream testInput = new ByteArrayInputStream(testData);
         
         // detect header
-        InputStream in = new PushbackInputStream(testInput, 10);
+        InputStream in = FileMagic.prepareToCheckMagic(testInput);
         assertFalse(OPOIFSFileSystem.hasPOIFSHeader(in));
 
         // check if InputStream is still intact



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org


Mime
View raw message