poi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kiwiwi...@apache.org
Subject svn commit: r1829653 [1/2] - in /poi: site/src/documentation/content/xdocs/ site/src/documentation/content/xdocs/slideshow/ trunk/src/integrationtest/org/apache/poi/ trunk/src/integrationtest/org/apache/poi/stress/ trunk/src/java/org/apache/poi/ trunk/...
Date Fri, 20 Apr 2018 12:53:00 GMT
Author: kiwiwings
Date: Fri Apr 20 12:52:59 2018
New Revision: 1829653

URL: http://svn.apache.org/viewvc?rev=1829653&view=rev
Log:
#62319 - Decommission XSLF-/PowerPointExtractor

Modified:
    poi/site/src/documentation/content/xdocs/slideshow/quick-guide.xml
    poi/site/src/documentation/content/xdocs/status.xml
    poi/site/src/documentation/content/xdocs/text-extraction.xml
    poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java
    poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java
    poi/trunk/src/java/org/apache/poi/POIOLE2TextExtractor.java
    poi/trunk/src/java/org/apache/poi/POITextExtractor.java
    poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java
    poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java
    poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java
    poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShow.java
    poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java
    poi/trunk/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java
    poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFPlaceholderDetails.java
    poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/TestHxxFEncryption.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
    poi/trunk/src/scratchpad/src/org/apache/poi/extractor/OLE2ScratchpadExtractorFactory.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShow.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowFactory.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hslf/usermodel/HSLFSlideShowImpl.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestBugs.java
    poi/trunk/test-data/slideshow/SampleShow.pptx

Modified: poi/site/src/documentation/content/xdocs/slideshow/quick-guide.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/slideshow/quick-guide.xml?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/slideshow/quick-guide.xml (original)
+++ poi/site/src/documentation/content/xdocs/slideshow/quick-guide.xml Fri Apr 20 12:52:59 2018
@@ -31,10 +31,9 @@
     <body>
         <section><title>Basic Text Extraction</title>
         <p>For basic text extraction, make use of 
-<code>org.apache.poi.hslf.extractor.PowerPointExtractor</code>. It accepts a file or an input
-stream. The <code>getText()</code> method can be used to get the text from the slides, and the <code>getNotes()</code> method can be used to get the text
-from the notes. Finally, <code>getText(true,true)</code> will get the text
-from both.
+			<code>org.apache.poi.sl.extractor.SlideShowExtractor</code>.
+			It accepts a slideshow which can be created from a file or stream via <code>org.apache.poi.sl.usermodel.SlideShowFactory</code>.
+			The <code>getText()</code> method can be used to get the text from the slides.
 		</p>
 		</section>
 		
@@ -121,7 +120,7 @@ The paragraph formatting is defined in t
   		<li><code>org.apache.poi.hslf.usermodel.HSLFTextRun</code>
   Holds a run of text, all having the same character stylings. It is possible to modify text, and/or text stylings.
   		</li>
-  		<li><code>org.apache.poi.hslf.extractor.PowerPointExtractor</code>
+  		<li><code>org.apache.poi.sl.extractor.SlideShowExtractor</code>
   Uses the model code to allow extraction of text from files
 		</li>
 		<li><code>org.apache.poi.hslf.extractor.QuickButCruddyTextExtractor</code>

Modified: poi/site/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/status.xml?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/status.xml (original)
+++ poi/site/src/documentation/content/xdocs/status.xml Fri Apr 20 12:52:59 2018
@@ -68,6 +68,7 @@
         <summary-item>Provide new ooxml-schemas-1.4.jar</summary-item>
       </summary>
       <actions>
+        <action dev="PD" type="add" fixes-bug="62319" breaks-compatibility="true" module="SL Common">Decommission XSLF-/PowerPointExtractor</action>
         <action dev="PD" type="add" fixes-bug="62092" module="SL Common">Text not extracted from grouped text shapes in HSLF</action>
 		<action dev="PD" type="add" fixes-bug="62159" module="OPC">Support XML signature over windows certificate store</action>
         <action dev="PD" type="add" fixes-bug="57369" module="XDDF">Add support for major and minor units on chart axes</action>

Modified: poi/site/src/documentation/content/xdocs/text-extraction.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/text-extraction.xml?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/text-extraction.xml (original)
+++ poi/site/src/documentation/content/xdocs/text-extraction.xml Fri Apr 20 12:52:59 2018
@@ -107,11 +107,11 @@
     </section>
 
     <section><title>PowerPoint</title>
-     <p>For .ppt files, in scratchpad there is 
-      <em>org.apache.poi.hslf.extractor.PowerPointExtractor</em>, which 
+     <p>For .ppt and .pptx files, there is common extractor
+      <em>org.apache.poi.sl.extractor.SlideShowExtractor.SlideShowExtractor</em>, which
       will return text for your slideshow, optionally restricted to just
-      slides text or notes text. For .pptx files, the class to use is
-      <em>org.apache.poi.xslf.extractor.XSLFPowerPointExtractor</em></p>
+      slides text or notes text. For .ppt you need to add the poi-scratchpad.jar
+      and for .pptx the poi-ooxml.jar and its dependencies are needed</p>
     </section>
 
     <section><title>Publisher</title>

Modified: poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java (original)
+++ poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java Fri Apr 20 12:52:59 2018
@@ -330,8 +330,6 @@ public class TestAllFiles {
     );
 
     private static final Set<String> IGNORED = unmodifiableHashSet(
-        // need JDK8+ - https://bugs.openjdk.java.net/browse/JDK-8038081
-        "slideshow/42474-2.ppt",
         // OPC handler works / XSSF handler fails
         "spreadsheet/57181.xlsm",
         "spreadsheet/61300.xls"//intentionally fuzzed -- used to cause infinite loop

Modified: poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java (original)
+++ poi/trunk/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java Fri Apr 20 12:52:59 2018
@@ -24,6 +24,7 @@ import java.io.FileInputStream;
 import java.io.InputStream;
 
 import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
 import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
 import org.apache.poi.xslf.usermodel.XMLSlideShow;
 import org.apache.poi.xslf.usermodel.XSLFSlideShow;
@@ -53,12 +54,19 @@ public class XSLFFileHandler extends Sli
         
         // additionally try the other getText() methods
 
-		try (XSLFPowerPointExtractor extractor = (XSLFPowerPointExtractor) ExtractorFactory.createExtractor(file)) {
+		try (SlideShowExtractor extractor = ExtractorFactory.createExtractor(file)) {
 			assertNotNull(extractor);
+			extractor.setSlidesByDefault(true);
+			extractor.setNotesByDefault(true);
+			extractor.setMasterByDefault(true);
 
-			assertNotNull(extractor.getText(true, true, true));
-			assertEquals("With all options disabled we should not get text",
-					"", extractor.getText(false, false, false));
+			assertNotNull(extractor.getText());
+
+			extractor.setSlidesByDefault(false);
+			extractor.setNotesByDefault(false);
+			extractor.setMasterByDefault(false);
+
+			assertEquals("With all options disabled we should not get text", "", extractor.getText());
 		}
     }
 

Modified: poi/trunk/src/java/org/apache/poi/POIOLE2TextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/POIOLE2TextExtractor.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/POIOLE2TextExtractor.java (original)
+++ poi/trunk/src/java/org/apache/poi/POIOLE2TextExtractor.java Fri Apr 20 12:52:59 2018
@@ -105,6 +105,7 @@ public abstract class POIOLE2TextExtract
      *
      * @return the underlying POIDocument
      */
+    @Override
     public POIDocument getDocument() {
         return document;
     }

Modified: poi/trunk/src/java/org/apache/poi/POITextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/POITextExtractor.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/POITextExtractor.java (original)
+++ poi/trunk/src/java/org/apache/poi/POITextExtractor.java Fri Apr 20 12:52:59 2018
@@ -74,4 +74,9 @@ public abstract class POITextExtractor i
 		    fsToClose.close();
 		}
 	}
+
+	/**
+	 * @return the processed document
+	 */
+	public abstract Object getDocument();
 }

Modified: poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java (original)
+++ poi/trunk/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java Fri Apr 20 12:52:59 2018
@@ -115,26 +115,23 @@ public class OLE2ExtractorFactory {
         return threadPreferEventExtractors.get();
     }
 
-    public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
-        // Only ever an OLE2 one from the root of the FS
-        return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
+    public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException {
+        return (T)createExtractor(fs.getRoot());
     }
-    public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException {
-        // Only ever an OLE2 one from the root of the FS
-        return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
+    public static <T extends POITextExtractor> T createExtractor(NPOIFSFileSystem fs) throws IOException {
+        return (T)createExtractor(fs.getRoot());
     }
-    public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException {
-        // Only ever an OLE2 one from the root of the FS
-        return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
+    public static <T extends POITextExtractor> T createExtractor(OPOIFSFileSystem fs) throws IOException {
+        return (T)createExtractor(fs.getRoot());
     }
 
-    public static POITextExtractor createExtractor(InputStream input) throws IOException {
+    public static <T extends POITextExtractor> T createExtractor(InputStream input) throws IOException {
         Class<?> cls = getOOXMLClass();
         if (cls != null) {
             // Use Reflection to get us the full OOXML-enabled version
             try {
                 Method m = cls.getDeclaredMethod("createExtractor", InputStream.class);
-                return (POITextExtractor)m.invoke(null, input);
+                return (T)m.invoke(null, input);
             } catch (IllegalArgumentException iae) {
                 throw iae;
             } catch (Exception e) {

Modified: poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java (original)
+++ poi/trunk/src/java/org/apache/poi/poifs/filesystem/DocumentFactoryHelper.java Fri Apr 20 12:52:59 2018
@@ -44,8 +44,30 @@ public class DocumentFactoryHelper {
      * @throws IOException If an error occurs while decrypting or if the password does not match
      */
     public static InputStream getDecryptedStream(final NPOIFSFileSystem fs, String password)
+    throws IOException {
+        // wrap the stream in a FilterInputStream to close the NPOIFSFileSystem
+        // as well when the resulting OPCPackage is closed
+        return new FilterInputStream(getDecryptedStream(fs.getRoot(), password)) {
+            @Override
+            public void close() throws IOException {
+                fs.close();
+                super.close();
+            }
+        };
+    }
+
+    /**
+     * Wrap the OLE2 data of the DirectoryNode into a decrypted stream by using
+     * the given password.
+     *
+     * @param root The OLE2 directory node for the document
+     * @param password The password, null if the default password should be used
+     * @return A stream for reading the decrypted data
+     * @throws IOException If an error occurs while decrypting or if the password does not match
+     */
+    public static InputStream getDecryptedStream(final DirectoryNode root, String password)
             throws IOException {
-        EncryptionInfo info = new EncryptionInfo(fs);
+        EncryptionInfo info = new EncryptionInfo(root);
         Decryptor d = Decryptor.getInstance(info);
 
         try {
@@ -58,21 +80,11 @@ public class DocumentFactoryHelper {
             }
 
             if (passwordCorrect) {
-                // wrap the stream in a FilterInputStream to close the NPOIFSFileSystem
-                // as well when the resulting OPCPackage is closed
-                return new FilterInputStream(d.getDataStream(fs.getRoot())) {
-                    @Override
-                    public void close() throws IOException {
-                        fs.close();
-
-                        super.close();
-                    }
-                };
+                return d.getDataStream(root);
+            } else if (password != null) {
+                throw new EncryptedDocumentException("Password incorrect");
             } else {
-                if (password != null)
-                    throw new EncryptedDocumentException("Password incorrect");
-                else
-                    throw new EncryptedDocumentException("The supplied spreadsheet is protected, but no password was supplied");
+                throw new EncryptedDocumentException("The supplied spreadsheet is protected, but no password was supplied");
             }
         } catch (GeneralSecurityException e) {
             throw new IOException(e);

Modified: poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java (original)
+++ poi/trunk/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java Fri Apr 20 12:52:59 2018
@@ -1,3 +1,20 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
 package org.apache.poi.sl.extractor;
 
 import java.util.ArrayList;
@@ -49,6 +66,16 @@ public class SlideShowExtractor<
     }
 
     /**
+     * Returns opened document
+     *
+     * @return the opened document
+     */
+    @Override
+    public final Object getDocument() {
+        return slideshow.getPersistDocument();
+    }
+
+    /**
      * Should a call to getText() return slide text? Default is yes
      */
     public void setSlidesByDefault(final boolean slidesByDefault) {
@@ -219,7 +246,6 @@ public class SlideShowExtractor<
             return;
         }
         for (final P para : paraList) {
-            final int oldLen = sb.length();
             for (final TextRun tr : para) {
                 final String str = tr.getRawText().replace("\r", "");
                 final String newStr;

Modified: poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShow.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShow.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShow.java (original)
+++ poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShow.java Fri Apr 20 12:52:59 2018
@@ -126,4 +126,13 @@ public interface SlideShow<
      * @since POI 4.0.0
      */
     POITextExtractor getMetadataTextExtractor();
+
+    /**
+     * @return the instance which handles the persisting of the slideshow,
+     * which is either a subclass of {@link org.apache.poi.POIDocument}
+     * or {@link org.apache.poi.POIXMLDocument}
+     *
+     * @since POI 4.0.0
+     */
+    Object getPersistDocument();
 }

Modified: poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java (original)
+++ poi/trunk/src/java/org/apache/poi/sl/usermodel/SlideShowFactory.java Fri Apr 20 12:52:59 2018
@@ -60,13 +60,40 @@ public class SlideShowFactory {
      * @throws IOException if an error occurs while reading the data
      */
     public static SlideShow<?,?> create(final NPOIFSFileSystem fs, String password) throws IOException {
-        DirectoryNode root = fs.getRoot();
+        return create(fs.getRoot(), password);
+    }
 
+    /**
+     * Creates a SlideShow from the given NPOIFSFileSystem.
+     *
+     * @param root The {@link DirectoryNode} to start reading the document from
+     *
+     * @return The created SlideShow
+     *
+     * @throws IOException if an error occurs while reading the data
+     */
+    public static SlideShow<?,?> create(final DirectoryNode root) throws IOException {
+        return create(root, null);
+    }
+
+
+    /**
+     * Creates a SlideShow from the given NPOIFSFileSystem, which may
+     * be password protected
+     *
+     * @param root The {@link DirectoryNode} to start reading the document from
+     * @param password The password that should be used or null if no password is necessary.
+     *
+     * @return The created SlideShow
+     *
+     * @throws IOException if an error occurs while reading the data
+     */
+    public static SlideShow<?,?> create(final DirectoryNode root, String password) throws IOException {
         // Encrypted OOXML files go inside OLE2 containers, is this one?
         if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
             InputStream stream = null;
             try {
-                stream = DocumentFactoryHelper.getDecryptedStream(fs, password);
+                stream = DocumentFactoryHelper.getDecryptedStream(root, password);
 
                 return createXSLFSlideShow(stream);
             } finally {
@@ -82,7 +109,7 @@ public class SlideShowFactory {
             passwordSet = true;
         }
         try {
-            return createHSLFSlideShow(fs);
+            return createHSLFSlideShow(root);
         } finally {
             if (passwordSet) {
                 Biff8EncryptionKey.setCurrentUserPassword(null);

Modified: poi/trunk/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java Fri Apr 20 12:52:59 2018
@@ -68,6 +68,7 @@ public abstract class POIXMLTextExtracto
 	 * 
 	 * @return the opened document
 	 */
+	@Override
 	public final POIXMLDocument getDocument() {
 		return _document;
 	}

Modified: poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java Fri Apr 20 12:52:59 2018
@@ -51,6 +51,7 @@ import org.apache.poi.poifs.filesystem.N
 import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
 import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
 import org.apache.poi.util.IOUtils;
 import org.apache.poi.util.NotImplemented;
 import org.apache.poi.util.POILogFactory;
@@ -58,6 +59,7 @@ import org.apache.poi.util.POILogger;
 import org.apache.poi.util.Removal;
 import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
 import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
 import org.apache.poi.xslf.usermodel.XSLFRelation;
 import org.apache.poi.xslf.usermodel.XSLFSlideShow;
 import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
@@ -127,20 +129,20 @@ public class ExtractorFactory {
          return OLE2ExtractorFactory.getPreferEventExtractor();
     }
 
-    public static POITextExtractor createExtractor(File f) throws IOException, OpenXML4JException, XmlException {
+    public static <T extends POITextExtractor> T createExtractor(File f) throws IOException, OpenXML4JException, XmlException {
         NPOIFSFileSystem fs = null;
         try {
             fs = new NPOIFSFileSystem(f);
             if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
-                return createEncryptedOOXMLExtractor(fs);
+                return (T)createEncryptedOOXMLExtractor(fs);
             }
-            POIOLE2TextExtractor extractor = createExtractor(fs);
+            POITextExtractor extractor = createExtractor(fs);
             extractor.setFilesystem(fs);
-            return extractor;
+            return (T)extractor;
         } catch (OfficeXmlFileException e) {
             // ensure file-handle release
             IOUtils.closeQuietly(fs);
-            return createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
+            return (T)createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
         } catch (NotOLE2FileException ne) {
             // ensure file-handle release
             IOUtils.closeQuietly(fs);
@@ -179,7 +181,7 @@ public class ExtractorFactory {
      * @throws XmlException If an XML parsing error occurs.
      * @throws IllegalArgumentException If no matching file type could be found.
      */
-    public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
+    public static POITextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
         try {
             // Check for the normal Office core document
             PackageRelationshipCollection core;
@@ -226,13 +228,13 @@ public class ExtractorFactory {
             // Is it XSLF?
             for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
                 if ( rel.getContentType().equals( contentType ) ) {
-                    return new XSLFPowerPointExtractor(pkg);
+                    return new SlideShowExtractor(new XMLSlideShow(pkg));
                 }
             }
      
             // special handling for SlideShow-Theme-files, 
             if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
-                return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
+                return new SlideShowExtractor(new XMLSlideShow(pkg));
             }
 
             // How about xlsb?
@@ -252,28 +254,28 @@ public class ExtractorFactory {
         }
     }
 
-    public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
-        return OLE2ExtractorFactory.createExtractor(fs);
+    public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
+        return createExtractor(fs.getRoot());
     }
-    public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
-        return OLE2ExtractorFactory.createExtractor(fs);
+    public static <T extends POITextExtractor> T createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
+        return createExtractor(fs.getRoot());
     }
-    public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
-        return OLE2ExtractorFactory.createExtractor(fs);
+    public static <T extends POITextExtractor> T createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
+        return createExtractor(fs.getRoot());
     }
 
-    public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
+    public static <T extends POITextExtractor> T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
     {
         // First, check for OOXML
         for (String entryName : poifsDir.getEntryNames()) {
             if (entryName.equals("Package")) {
                 OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package"));
-                return createExtractor(pkg);
+                return (T)createExtractor(pkg);
             }
         }
 
         // If not, ask the OLE2 code to check, with Scratchpad if possible
-        return OLE2ExtractorFactory.createExtractor(poifsDir);
+        return (T)OLE2ExtractorFactory.createExtractor(poifsDir);
     }
 
     /**
@@ -403,7 +405,7 @@ public class ExtractorFactory {
         throw new IllegalStateException("Not yet supported");
     }
     
-    private static POIXMLTextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs)
+    private static POITextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs)
     throws IOException {
         String pass = Biff8EncryptionKey.getCurrentUserPassword();
         if (pass == null) {

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java Fri Apr 20 12:52:59 2018
@@ -37,7 +37,7 @@ import org.apache.xmlbeans.XmlException;
  * @deprecated use {@link SlideShowExtractor}
  */
 @Deprecated
-@Removal(version="4.2.0")
+@Removal(version="5.0.0")
 public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
     public static final XSLFRelation[] SUPPORTED_TYPES = new XSLFRelation[]{
             XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE,

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XMLSlideShow.java Fri Apr 20 12:52:59 2018
@@ -631,4 +631,9 @@ public class XMLSlideShow extends POIXML
     public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
         return new POIXMLPropertiesTextExtractor(this);
     }
+
+    @Override
+    public Object getPersistDocument() {
+        return this;
+    }
 }

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFPlaceholderDetails.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFPlaceholderDetails.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFPlaceholderDetails.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFPlaceholderDetails.java Fri Apr 20 12:52:59 2018
@@ -1,3 +1,20 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
 package org.apache.poi.xslf.usermodel;
 
 import static org.apache.poi.xslf.usermodel.XSLFShape.PML_NS;

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java Fri Apr 20 12:52:59 2018
@@ -182,10 +182,18 @@ implements Slide<XSLFShape,XSLFTextParag
      */
     public XSLFCommentAuthors getCommentAuthorsPart() {
         if(_commentAuthors == null) {
+            // first scan the slide relations
             for (POIXMLDocumentPart p : getRelations()) {
                 if (p instanceof XSLFCommentAuthors) {
                     _commentAuthors = (XSLFCommentAuthors)p;
                     return _commentAuthors;
+                }
+            }
+            // then scan the presentation relations
+            for (POIXMLDocumentPart p : getSlideShow().getRelations()) {
+                if (p instanceof XSLFCommentAuthors) {
+                    _commentAuthors = (XSLFCommentAuthors)p;
+                    return _commentAuthors;
                 }
             }
         }

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java Fri Apr 20 12:52:59 2018
@@ -27,16 +27,15 @@ import static org.junit.Assert.fail;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.util.Locale;
 
 import org.apache.poi.POIDataSamples;
 import org.apache.poi.POIOLE2TextExtractor;
 import org.apache.poi.POITextExtractor;
-import org.apache.poi.POIXMLException;
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.UnsupportedFileFormatException;
 import org.apache.poi.hdgf.extractor.VisioTextExtractor;
 import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
-import org.apache.poi.hslf.extractor.PowerPointExtractor;
 import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
 import org.apache.poi.hssf.HSSFTestDataSamples;
 import org.apache.poi.hssf.OldExcelFormatException;
@@ -44,18 +43,20 @@ import org.apache.poi.hssf.extractor.Eve
 import org.apache.poi.hssf.extractor.ExcelExtractor;
 import org.apache.poi.hwpf.extractor.Word6Extractor;
 import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.openxml4j.opc.PackageAccess;
 import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
 import org.apache.poi.util.POILogFactory;
 import org.apache.poi.util.POILogger;
 import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
-import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
 import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
 import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
 import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
-import org.junit.BeforeClass;
+import org.apache.xmlbeans.XmlException;
 import org.junit.Test;
 
 /**
@@ -65,34 +66,39 @@ public class TestExtractorFactory {
 
     private static final POILogger LOG = POILogFactory.getLogger(TestExtractorFactory.class);
 
-    private static File txt;
+    private static final POIDataSamples ssTests = POIDataSamples.getSpreadSheetInstance();
+    private static final File xls = getFileAndCheck(ssTests, "SampleSS.xls");
+    private static final File xlsx = getFileAndCheck(ssTests, "SampleSS.xlsx");
+    private static final File xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx");
+    private static final File xltx = getFileAndCheck(ssTests, "test.xltx");
+    private static final File xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
+    private static final File xlsb = getFileAndCheck(ssTests, "testVarious.xlsb");
+
+    private static final POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
+    private static final File doc = getFileAndCheck(wpTests, "SampleDoc.doc");
+    private static final File doc6 = getFileAndCheck(wpTests, "Word6.doc");
+    private static final File doc95 = getFileAndCheck(wpTests, "Word95.doc");
+    private static final File docx = getFileAndCheck(wpTests, "SampleDoc.docx");
+    private static final File dotx = getFileAndCheck(wpTests, "test.dotx");
+    private static final File docEmb = getFileAndCheck(wpTests, "word_with_embeded.doc");
+    private static final File docEmbOOXML = getFileAndCheck(wpTests, "word_with_embeded_ooxml.doc");
+
+    private static final POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
+    private static final File ppt = getFileAndCheck(slTests, "SampleShow.ppt");
+    private static final File pptx = getFileAndCheck(slTests, "SampleShow.pptx");
+    private static final File txt = getFileAndCheck(slTests, "SampleShow.txt");
+
+    private static final POIDataSamples olTests = POIDataSamples.getHSMFInstance();
+    private static final File msg = getFileAndCheck(olTests, "quick.msg");
+    private static final File msgEmb = getFileAndCheck(olTests, "attachment_test_msg.msg");
+    private static final File msgEmbMsg = getFileAndCheck(olTests, "attachment_msg_pdf.msg");
+
+    private static final POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
+    private static final File vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd");
+    private static final File vsdx = getFileAndCheck(dgTests, "test.vsdx");
 
-    private static File xls;
-    private static File xlsx;
-    private static File xlsxStrict;
-    private static File xltx;
-    private static File xlsEmb;
-    private static File xlsb;
-
-    private static File doc;
-    private static File doc6;
-    private static File doc95;
-    private static File docx;
-    private static File dotx;
-    private static File docEmb;
-    private static File docEmbOOXML;
-
-    private static File ppt;
-    private static File pptx;
-
-    private static File msg;
-    private static File msgEmb;
-    private static File msgEmbMsg;
-
-    private static File vsd;
-    private static File vsdx;
-
-    private static File pub;
+    private static POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
+    private static File pub = getFileAndCheck(pubTests, "Simple.pub");
 
     private static File getFileAndCheck(POIDataSamples samples, String name) {
         File file = samples.getFile(name);
@@ -104,595 +110,133 @@ public class TestExtractorFactory {
         return file;
     }
 
-    @BeforeClass
-    public static void setUp() throws Exception {
+    private static final Object[] TEST_SET = {
+        "Excel", xls, ExcelExtractor.class, 200,
+        "Excel - xlsx", xlsx, XSSFExcelExtractor.class, 200,
+        "Excel - xltx", xltx, XSSFExcelExtractor.class, -1,
+        "Excel - xlsb", xlsb, XSSFBEventBasedExcelExtractor.class, -1,
+        "Word", doc, WordExtractor.class, 120,
+        "Word - docx", docx, XWPFWordExtractor.class, 120,
+        "Word - dotx", dotx, XWPFWordExtractor.class, -1,
+        "Word 6", doc6, Word6Extractor.class, 20,
+        "Word 95", doc95, Word6Extractor.class, 120,
+        "PowerPoint", ppt, SlideShowExtractor.class, 120,
+        "PowerPoint - pptx", pptx, SlideShowExtractor.class, 120,
+        "Visio", vsd, VisioTextExtractor.class, 50,
+        "Visio - vsdx", vsdx, XDGFVisioExtractor.class, 20,
+        "Publisher", pub, PublisherTextExtractor.class, 50,
+        "Outlook msg", msg, OutlookTextExtactor.class, 50,
+
+        // TODO Support OOXML-Strict, see bug #57699
+        // xlsxStrict
+    };
 
-        POIDataSamples ssTests = POIDataSamples.getSpreadSheetInstance();
-        xls = getFileAndCheck(ssTests, "SampleSS.xls");
-        xlsx = getFileAndCheck(ssTests, "SampleSS.xlsx");
-        xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx");
-        xltx = getFileAndCheck(ssTests, "test.xltx");
-        xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
-        xlsb = getFileAndCheck(ssTests, "testVarious.xlsb");
-
-        POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
-        doc = getFileAndCheck(wpTests, "SampleDoc.doc");
-        doc6 = getFileAndCheck(wpTests, "Word6.doc");
-        doc95 = getFileAndCheck(wpTests, "Word95.doc");
-        docx = getFileAndCheck(wpTests, "SampleDoc.docx");
-        dotx = getFileAndCheck(wpTests, "test.dotx");
-        docEmb = getFileAndCheck(wpTests, "word_with_embeded.doc");
-        docEmbOOXML = getFileAndCheck(wpTests, "word_with_embeded_ooxml.doc");
-
-        POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
-        ppt = getFileAndCheck(slTests, "SampleShow.ppt");
-        pptx = getFileAndCheck(slTests, "SampleShow.pptx");
-        txt = getFileAndCheck(slTests, "SampleShow.txt");
-
-        POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
-        vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd");
-        vsdx = getFileAndCheck(dgTests, "test.vsdx");
-
-        POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
-        pub = getFileAndCheck(pubTests, "Simple.pub");
-
-        POIDataSamples olTests = POIDataSamples.getHSMFInstance();
-        msg = getFileAndCheck(olTests, "quick.msg");
-        msgEmb = getFileAndCheck(olTests, "attachment_test_msg.msg");
-        msgEmbMsg = getFileAndCheck(olTests, "attachment_msg_pdf.msg");
+    @FunctionalInterface
+    interface FunctionEx<T, R> {
+        R apply(T t) throws IOException, OpenXML4JException, XmlException;
     }
 
+
     @Test
     public void testFile() throws Exception {
-        // Excel
-        POITextExtractor xlsExtractor = ExtractorFactory.createExtractor(xls);
-        assertNotNull("Had empty extractor for " + xls, xlsExtractor);
-        assertTrue("Expected instanceof ExcelExtractor, but had: " + xlsExtractor.getClass(), 
-                xlsExtractor
-                instanceof ExcelExtractor
-        );
-        assertTrue(
-                xlsExtractor.getText().length() > 200
-        );
-        xlsExtractor.close();
-
-        POITextExtractor extractor = ExtractorFactory.createExtractor(xlsx);
-        assertTrue(
-                extractor.getClass().getName(),
-                extractor
-                instanceof XSSFExcelExtractor
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(xlsx);
-        assertTrue(
-                extractor.getText().length() > 200
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(xltx);
-        assertTrue(
-                extractor.getClass().getName(),
-                extractor
-                instanceof XSSFExcelExtractor
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(xlsb);
-        assertContains(extractor.getText(), "test");
-        extractor.close();
-
-
-        extractor = ExtractorFactory.createExtractor(xltx);
-        assertContains(extractor.getText(), "test");
-        extractor.close();
-
-        // TODO Support OOXML-Strict, see bug #57699
-        try {
-            /*extractor =*/ ExtractorFactory.createExtractor(xlsxStrict);
-            fail("OOXML-Strict isn't yet supported");
-        } catch (POIXMLException e) {
-            // Expected, for now
+        for (int i = 0; i < TEST_SET.length; i += 4) {
+            try (POITextExtractor ext = ExtractorFactory.createExtractor((File) TEST_SET[i + 1])) {
+                testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
+            }
         }
-//        extractor = ExtractorFactory.createExtractor(xlsxStrict);
-//        assertTrue(
-//                extractor
-//                instanceof XSSFExcelExtractor
-//        );
-//        extractor.close();
-//
-//        extractor = ExtractorFactory.createExtractor(xlsxStrict);
-//        assertTrue(
-//                extractor.getText().contains("test")
-//        );
-//        extractor.close();
-
-
-        // Word
-        extractor = ExtractorFactory.createExtractor(doc);
-        assertTrue(
-                extractor
-                instanceof WordExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 120
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(doc6);
-        assertTrue(
-                extractor
-                instanceof Word6Extractor
-        );
-        assertTrue(
-                extractor.getText().length() > 20
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(doc95);
-        assertTrue(
-                extractor
-                instanceof Word6Extractor
-        );
-        assertTrue(
-                extractor.getText().length() > 120
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(docx);
-        assertTrue(
-                extractor instanceof XWPFWordExtractor
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(docx);
-        assertTrue(
-                extractor.getText().length() > 120
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(dotx);
-        assertTrue(
-                extractor instanceof XWPFWordExtractor
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(dotx);
-        assertContains(extractor.getText(), "Test");
-        extractor.close();
-
-        // PowerPoint (PPT)
-        extractor = ExtractorFactory.createExtractor(ppt);
-        assertTrue(
-                extractor
-                instanceof PowerPointExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 120
-        );
-        extractor.close();
-
-        // PowerPoint (PPTX)
-        extractor = ExtractorFactory.createExtractor(pptx);
-        assertTrue(
-                extractor
-                instanceof XSLFPowerPointExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 120
-        );
-        extractor.close();
-
-        // Visio - binary
-        extractor = ExtractorFactory.createExtractor(vsd);
-        assertTrue(
-                extractor
-                instanceof VisioTextExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 50
-        );
-        extractor.close();
-
-        // Visio - vsdx
-        extractor = ExtractorFactory.createExtractor(vsdx);
-        assertTrue(
-                extractor
-                instanceof XDGFVisioExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 20
-        );
-        extractor.close();
-
-        // Publisher
-        extractor = ExtractorFactory.createExtractor(pub);
-        assertTrue(
-                extractor
-                instanceof PublisherTextExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 50
-        );
-        extractor.close();
-
-        // Outlook msg
-        extractor = ExtractorFactory.createExtractor(msg);
-        assertTrue(
-                extractor
-                instanceof OutlookTextExtactor
-        );
-        assertTrue(
-                extractor.getText().length() > 50
-        );
-        extractor.close();
+    }
 
+    @Test(expected = IllegalArgumentException.class)
+    public void testFileInvalid() throws Exception {
         // Text
-        try {
-            ExtractorFactory.createExtractor(txt);
-            fail("expected IllegalArgumentException");
-        } catch(IllegalArgumentException e) {
-            // Good
-        }
+        try (POITextExtractor te = ExtractorFactory.createExtractor(txt)) {}
     }
 
     @Test
     public void testInputStream() throws Exception {
-        // Excel
-        POITextExtractor extractor = ExtractorFactory.createExtractor(new FileInputStream(xls));
-        assertTrue(
-                extractor
-                instanceof ExcelExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 200
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(new FileInputStream(xlsx));
-        assertTrue(
-                extractor.getClass().getName(),
-                extractor
-                instanceof XSSFExcelExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 200
-        );
-        // TODO Support OOXML-Strict, see bug #57699
-//        assertTrue(
-//                ExtractorFactory.createExtractor(new FileInputStream(xlsxStrict))
-//                instanceof XSSFExcelExtractor
-//        );
-//        assertTrue(
-//                ExtractorFactory.createExtractor(new FileInputStream(xlsxStrict)).getText().length() > 200
-//        );
-        extractor.close();
-
-        // Word
-        extractor = ExtractorFactory.createExtractor(new FileInputStream(doc));
-        assertTrue(
-                extractor.getClass().getName(),
-                extractor
-                instanceof WordExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 120
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(new FileInputStream(doc6));
-        assertTrue(
-                extractor.getClass().getName(),
-                extractor
-                instanceof Word6Extractor
-        );
-        assertTrue(
-                extractor.getText().length() > 20
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(new FileInputStream(doc95));
-        assertTrue(
-                extractor.getClass().getName(),
-                extractor
-                instanceof Word6Extractor
-        );
-        assertTrue(
-                extractor.getText().length() > 120
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(new FileInputStream(docx));
-        assertTrue(
-                extractor
-                instanceof XWPFWordExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 120
-        );
-        extractor.close();
-
-        // PowerPoint
-        extractor = ExtractorFactory.createExtractor(new FileInputStream(ppt));
-        assertTrue(
-                extractor
-                instanceof PowerPointExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 120
-        );
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(new FileInputStream(pptx));
-        assertTrue(
-                extractor
-                instanceof XSLFPowerPointExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 120
-        );
-        extractor.close();
-
-        // Visio
-        extractor = ExtractorFactory.createExtractor(new FileInputStream(vsd));
-        assertTrue(
-                extractor
-                instanceof VisioTextExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 50
-        );
-        extractor.close();
-
-        // Visio - vsdx
-        extractor = ExtractorFactory.createExtractor(new FileInputStream(vsdx));
-        assertTrue(
-                extractor
-                instanceof XDGFVisioExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 20
-        );
-        extractor.close();
-        
-        // Publisher
-        extractor = ExtractorFactory.createExtractor(new FileInputStream(pub));
-        assertTrue(
-                extractor
-                instanceof PublisherTextExtractor
-        );
-        assertTrue(
-                extractor.getText().length() > 50
-        );
-        extractor.close();
-
-        // Outlook msg
-        extractor = ExtractorFactory.createExtractor(new FileInputStream(msg));
-        assertTrue(
-                extractor
-                instanceof OutlookTextExtactor
-        );
-        assertTrue(
-                extractor.getText().length() > 50
-        );
-        extractor.close();
+        testStream((f) -> ExtractorFactory.createExtractor(f), true);
+    }
 
-        // Text
-        try (FileInputStream stream = new FileInputStream(txt)) {
-            ExtractorFactory.createExtractor(stream);
-            fail("expected IllegalArgumentException");
-        } catch(IllegalArgumentException e) {
-            // Good
-        }
+    @Test(expected = IllegalArgumentException.class)
+    public void testInputStreamInvalid() throws Exception {
+        testInvalid((f) -> ExtractorFactory.createExtractor(f));
     }
 
     @Test
     public void testPOIFS() throws Exception {
-        // Excel
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))
-                instanceof ExcelExtractor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))).getText().length() > 200
-        );
-
-        // Word
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc)))
-                instanceof WordExtractor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc))).getText().length() > 120
-        );
-
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc6)))
-                instanceof Word6Extractor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc6))).getText().length() > 20
-        );
-
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc95)))
-                instanceof Word6Extractor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc95))).getText().length() > 120
-        );
-
-        // PowerPoint
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt)))
-                instanceof PowerPointExtractor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt))).getText().length() > 120
-        );
-
-        // Visio
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd)))
-                instanceof VisioTextExtractor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50
-        );
-
-        // Publisher
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(pub)))
-                instanceof PublisherTextExtractor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(pub))).getText().length() > 50
-        );
-
-        // Outlook msg
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(msg)))
-                instanceof OutlookTextExtactor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(msg))).getText().length() > 50
-        );
-
-        // Text
-        try {
-            ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(txt)));
-            fail("expected IllegalArgumentException");
-        } catch(IOException e) {
-            // Good
-        }
+        testStream((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)), false);
     }
 
+    @Test(expected = IOException.class)
+    public void testPOIFSInvalid() throws Exception {
+        testInvalid((f) -> ExtractorFactory.createExtractor(new POIFSFileSystem(f)));
+    }
 
     @Test
     public void testOPOIFS() throws Exception {
-        // Excel
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(xls)))
-                        instanceof ExcelExtractor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(xls))).getText().length() > 200
-        );
-
-        // Word
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc)))
-                        instanceof WordExtractor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc))).getText().length() > 120
-        );
-
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc6)))
-                        instanceof Word6Extractor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc6))).getText().length() > 20
-        );
-
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc95)))
-                        instanceof Word6Extractor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(doc95))).getText().length() > 120
-        );
+        testStream((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f)), false);
+    }
 
-        // PowerPoint
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(ppt)))
-                        instanceof PowerPointExtractor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(ppt))).getText().length() > 120
-        );
+    @Test(expected = IOException.class)
+    public void testOPOIFSInvalid() throws Exception {
+        testInvalid((f) -> ExtractorFactory.createExtractor(new OPOIFSFileSystem(f)));
+    }
 
-        // Visio
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(vsd)))
-                        instanceof VisioTextExtractor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50
-        );
 
-        // Publisher
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(pub)))
-                        instanceof PublisherTextExtractor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(pub))).getText().length() > 50
-        );
+    private void testStream(final FunctionEx<FileInputStream, POITextExtractor> poifsIS, final boolean loadOOXML)
+    throws IOException, OpenXML4JException, XmlException {
+        for (int i = 0; i < TEST_SET.length; i += 4) {
+            File testFile = (File) TEST_SET[i + 1];
+            if (!loadOOXML && (testFile.getName().endsWith("x") || testFile.getName().endsWith("xlsb"))) {
+                continue;
+            }
+            try (FileInputStream fis = new FileInputStream(testFile);
+                 POITextExtractor ext = poifsIS.apply(fis)) {
+                testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
+            } catch (IllegalArgumentException e) {
+                fail("failed to process "+testFile);
+            }
+        }
+    }
 
-        // Outlook msg
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(msg)))
-                        instanceof OutlookTextExtactor
-        );
-        assertTrue(
-                ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(msg))).getText().length() > 50
-        );
+    private void testExtractor(final POITextExtractor ext, final String testcase, final Class extrClass, final Integer minLength) {
+        assertTrue("invalid extractor for " + testcase, extrClass.isInstance(ext));
+        final String actual = ext.getText();
+        if (minLength == -1) {
+            assertContains(actual.toLowerCase(Locale.ROOT), "test");
+        } else {
+            assertTrue("extracted content too short for " + testcase, actual.length() > minLength);
+        }
+    }
 
+    private void testInvalid(FunctionEx<FileInputStream, POITextExtractor> poifs) throws IOException, OpenXML4JException, XmlException {
         // Text
-        try {
-            ExtractorFactory.createExtractor(new OPOIFSFileSystem(new FileInputStream(txt)));
-            fail("expected IllegalArgumentException");
-        } catch(IOException e) {
-            // Good
+        try (FileInputStream fis = new FileInputStream(txt);
+             POITextExtractor te = poifs.apply(fis)) {
         }
     }
 
     @Test
     public void testPackage() throws Exception {
-        // Excel
-        POIXMLTextExtractor extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
-        assertTrue(extractor instanceof XSSFExcelExtractor);
-        extractor.close();
-        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
-        assertTrue(extractor.getText().length() > 200);
-        extractor.close();
-
-        // Word
-        extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString()));
-        assertTrue(extractor instanceof XWPFWordExtractor);
-        extractor.close();
-
-        extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString()));
-        assertTrue(extractor.getText().length() > 120);
-        extractor.close();
-
-        // PowerPoint
-        extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString()));
-        assertTrue(extractor instanceof XSLFPowerPointExtractor);
-        extractor.close();
+        for (int i = 0; i < TEST_SET.length; i += 4) {
+            final File testFile = (File) TEST_SET[i + 1];
+            if (!testFile.getName().endsWith("x")) {
+                continue;
+            }
 
-        extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString()));
-        assertTrue(extractor.getText().length() > 120);
-        extractor.close();
-        
-        // Visio
-        extractor = ExtractorFactory.createExtractor(OPCPackage.open(vsdx.toString()));
-        assertTrue(extractor instanceof XDGFVisioExtractor);
-        assertTrue(extractor.getText().length() > 20);
-        extractor.close();
+            try (final OPCPackage pkg = OPCPackage.open(testFile, PackageAccess.READ);
+                 final POITextExtractor ext = ExtractorFactory.createExtractor(pkg)) {
+                testExtractor(ext, (String) TEST_SET[i], (Class) TEST_SET[i + 2], (Integer) TEST_SET[i + 3]);
+                pkg.revert();
+            }
+        }
+    }
 
+    @Test(expected = UnsupportedFileFormatException.class)
+    public void testPackageInvalid() throws Exception {
         // Text
-        try {
-            ExtractorFactory.createExtractor(OPCPackage.open(txt.toString()));
-            fail("TestExtractorFactory.testPackage() failed on " + txt);
-        } catch(UnsupportedFileFormatException e) {
-            // Good
-        } catch (Exception e) {
-            LOG.log(POILogger.WARN, "TestExtractorFactory.testPackage() failed on " + txt);
-            throw e;
-        }
+        try (final OPCPackage pkg = OPCPackage.open(txt, PackageAccess.READ);
+             final POITextExtractor te = ExtractorFactory.createExtractor(pkg)) {}
     }
 
     @Test
@@ -781,142 +325,49 @@ public class TestExtractorFactory {
      *  does poifs embedded, but will do ooxml ones
      *  at some point.
      */
-    @SuppressWarnings("deprecation")
     @Test
     public void testEmbedded() throws Exception {
-        POIOLE2TextExtractor ext;
-        POITextExtractor[] embeds;
-
-        // No embeddings
-        ext = (POIOLE2TextExtractor)
-                ExtractorFactory.createExtractor(xls);
-        embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-        assertEquals(0, embeds.length);
-        ext.close();
-
-        // No embeddings
-        ext = (POIOLE2TextExtractor)
-                ExtractorFactory.createExtractor(xls);
-        embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext);
-        assertEquals(0, embeds.length);
-        ext.close();
-
-        // Excel
-        ext = (POIOLE2TextExtractor)
-                ExtractorFactory.createExtractor(xlsEmb);
-        embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-        assertNotNull(embeds);
-        ext.close();
-
-        // Excel
-        ext = (POIOLE2TextExtractor)
-                ExtractorFactory.createExtractor(xlsEmb);
-        embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext);
-
-        assertEquals(6, embeds.length);
-        int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX;
-        for (POITextExtractor embed : embeds) {
-            assertTrue(embed.getText().length() > 20);
-
-            if (embed instanceof PowerPointExtractor) numPpt++;
-            else if (embed instanceof ExcelExtractor) numXls++;
-            else if (embed instanceof WordExtractor) numWord++;
-            else if (embed instanceof OutlookTextExtactor) numMsg++;
-        }
-        assertEquals(2, numPpt);
-        assertEquals(2, numXls);
-        assertEquals(2, numWord);
-        assertEquals(0, numMsg);
-        ext.close();
-
-        // Word
-        ext = (POIOLE2TextExtractor)
-                ExtractorFactory.createExtractor(docEmb);
-        embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-
-        numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
-        assertEquals(4, embeds.length);
-        for (POITextExtractor embed : embeds) {
-            assertTrue(embed.getText().length() > 20);
-            if (embed instanceof PowerPointExtractor) numPpt++;
-            else if (embed instanceof ExcelExtractor) numXls++;
-            else if (embed instanceof WordExtractor) numWord++;
-            else if (embed instanceof OutlookTextExtactor) numMsg++;
-        }
-        assertEquals(1, numPpt);
-        assertEquals(2, numXls);
-        assertEquals(1, numWord);
-        assertEquals(0, numMsg);
-        ext.close();
-
-        // Word which contains an OOXML file
-        ext = (POIOLE2TextExtractor)
-                ExtractorFactory.createExtractor(docEmbOOXML);
-        embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-
-        numWord = 0; numXls = 0; numPpt = 0; numMsg = 0; numWordX = 0;
-        assertEquals(3, embeds.length);
-        for (POITextExtractor embed : embeds) {
-            assertTrue(embed.getText().length() > 20);
-            if (embed instanceof PowerPointExtractor) numPpt++;
-            else if (embed instanceof ExcelExtractor) numXls++;
-            else if (embed instanceof WordExtractor) numWord++;
-            else if (embed instanceof OutlookTextExtactor) numMsg++;
-            else if (embed instanceof XWPFWordExtractor) numWordX++;
-        }
-        assertEquals(1, numPpt);
-        assertEquals(1, numXls);
-        assertEquals(0, numWord);
-        assertEquals(1, numWordX);
-        assertEquals(0, numMsg);
-        ext.close();
-
-        // Outlook
-        ext = (OutlookTextExtactor)
-                ExtractorFactory.createExtractor(msgEmb);
-        embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-
-        numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
-        assertEquals(1, embeds.length);
-        for (POITextExtractor embed : embeds) {
-            assertTrue(embed.getText().length() > 20);
-            if (embed instanceof PowerPointExtractor) numPpt++;
-            else if (embed instanceof ExcelExtractor) numXls++;
-            else if (embed instanceof WordExtractor) numWord++;
-            else if (embed instanceof OutlookTextExtactor) numMsg++;
-        }
-        assertEquals(0, numPpt);
-        assertEquals(0, numXls);
-        assertEquals(1, numWord);
-        assertEquals(0, numMsg);
-        ext.close();
-
-        // Outlook with another outlook file in it
-        ext = (OutlookTextExtactor)
-                ExtractorFactory.createExtractor(msgEmbMsg);
-        embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-
-        numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
-        assertEquals(1, embeds.length);
-        for (POITextExtractor embed : embeds) {
-            assertTrue(embed.getText().length() > 20);
-            if (embed instanceof PowerPointExtractor) numPpt++;
-            else if (embed instanceof ExcelExtractor) numXls++;
-            else if (embed instanceof WordExtractor) numWord++;
-            else if (embed instanceof OutlookTextExtactor) numMsg++;
+        final Object[] testObj = {
+            "No embeddings", xls, "0-0-0-0-0-0",
+            "Excel", xlsEmb, "6-2-2-2-0-0",
+            "Word", docEmb, "4-1-2-1-0-0",
+            "Word which contains an OOXML file", docEmbOOXML, "3-0-1-1-0-1",
+            "Outlook", msgEmb, "1-1-0-0-0-0",
+            "Outlook with another outlook file in it", msgEmbMsg, "1-0-0-0-1-0",
+        };
+
+        for (int i=0; i<testObj.length; i+=3) {
+            try (final POIOLE2TextExtractor ext = ExtractorFactory.createExtractor((File)testObj[i+1])) {
+                final POITextExtractor[] embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext);
+
+                int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX = 0;
+                for (POITextExtractor embed : embeds) {
+                    assertTrue(embed.getText().length() > 20);
+                    if (embed instanceof SlideShowExtractor) {
+                        numPpt++;
+                    } else if (embed instanceof ExcelExtractor) {
+                        numXls++;
+                    } else if (embed instanceof WordExtractor) {
+                        numWord++;
+                    } else if (embed instanceof OutlookTextExtactor) {
+                        numMsg++;
+                    } else if (embed instanceof XWPFWordExtractor) {
+                        numWordX++;
+                    }
+                }
+
+                final String actual = embeds.length+"-"+numWord+"-"+numXls+"-"+numPpt+"-"+numMsg+"-"+numWordX;
+                final String expected = (String)testObj[i+2];
+                assertEquals("invalid number of embeddings - "+testObj[i], expected, actual);
+            }
         }
-        assertEquals(0, numPpt);
-        assertEquals(0, numXls);
-        assertEquals(0, numWord);
-        assertEquals(1, numMsg);
-        ext.close();
 
         // TODO - PowerPoint
         // TODO - Publisher
         // TODO - Visio
     }
 
-    private static final String[] EXPECTED_FAILURES = new String[] {
+    private static final String[] EXPECTED_FAILURES = {
         // password protected files
         "spreadsheet/password.xls",
         "spreadsheet/protected_passtika.xlsx",
@@ -1018,37 +469,26 @@ public class TestExtractorFactory {
      *  #59074 - Excel 95 files should give a helpful message, not just 
      *   "No supported documents found in the OLE2 stream"
      */
-    @Test
+    @Test(expected = OldExcelFormatException.class)
     public void bug59074() throws Exception {
-        try {
-            ExtractorFactory.createExtractor(
-                    POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
-            fail("Old excel formats not supported via ExtractorFactory");
-        } catch (OldExcelFormatException e) {
-            // expected here
-        }
+        ExtractorFactory.createExtractor(
+                POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
     }
 
     @SuppressWarnings("deprecation")
-    @Test
-    public void testGetEmbeddedFromXMLExtractor() {
-        try {
-            // currently not implemented
-            ExtractorFactory.getEmbededDocsTextExtractors((POIXMLTextExtractor)null);
-            fail("Unsupported currently");
-        } catch (IllegalStateException e) {
-            // expected here
-        }
+    @Test(expected = IllegalStateException.class)
+    public void testGetEmbedFromXMLExtractor() {
+        // currently not implemented
+        ExtractorFactory.getEmbededDocsTextExtractors((POIXMLTextExtractor) null);
+    }
 
-        try {
-            // currently not implemented
-            ExtractorFactory.getEmbeddedDocsTextExtractors((POIXMLTextExtractor)null);
-            fail("Unsupported currently");
-        } catch (IllegalStateException e) {
-            // expected here
-        }
+    @SuppressWarnings("deprecation")
+    @Test(expected = IllegalStateException.class)
+    public void testGetEmbeddedFromXMLExtractor() {
+        // currently not implemented
+        ExtractorFactory.getEmbeddedDocsTextExtractors((POIXMLTextExtractor)null);
     }
-    
+
     // This bug is currently open. This test will fail with "expected error not thrown" when the bug has been fixed.
     // When this happens, change this from @Test(expected=...) to @Test
     // bug 45565: text within TextBoxes is extracted by ExcelExtractor and WordExtractor

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/TestHxxFEncryption.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/TestHxxFEncryption.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/TestHxxFEncryption.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/poifs/crypt/TestHxxFEncryption.java Fri Apr 20 12:52:59 2018
@@ -120,10 +120,10 @@ public class TestHxxFEncryption {
     public void newPassword(String newPass) throws IOException, OpenXML4JException, XmlException {
         Biff8EncryptionKey.setCurrentUserPassword(password);
         File f = sampleDir.getFile(file);
-        POIOLE2TextExtractor te1 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(f);
+        POITextExtractor te1 = ExtractorFactory.createExtractor(f);
         Biff8EncryptionKey.setCurrentUserPassword(newPass);
         ByteArrayOutputStream bos = new ByteArrayOutputStream();
-        POIDocument doc = te1.getDocument();
+        POIDocument doc = (POIDocument)te1.getDocument();
         doc.write(bos);
         doc.close();
         te1.close();
@@ -140,25 +140,25 @@ public class TestHxxFEncryption {
         ByteArrayOutputStream bos = new ByteArrayOutputStream();
         Biff8EncryptionKey.setCurrentUserPassword(password);
         File f = sampleDir.getFile(file);
-        POIOLE2TextExtractor te1 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(f);
+        POITextExtractor te1 = ExtractorFactory.createExtractor(f);
         // first remove encryption
         Biff8EncryptionKey.setCurrentUserPassword(null);
-        POIDocument doc = te1.getDocument();
+        POIDocument doc = (POIDocument)te1.getDocument();
         doc.write(bos);
         doc.close();
         te1.close();
         // then use default setting, which is cryptoapi
         String newPass = "newPass";
-        POIOLE2TextExtractor te2 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
+        POITextExtractor te2 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
         Biff8EncryptionKey.setCurrentUserPassword(newPass);
-        doc = te2.getDocument();
+        doc = (POIDocument)te2.getDocument();
         bos.reset();
         doc.write(bos);
         doc.close();
         te2.close();
         // and finally update cryptoapi setting
-        POIOLE2TextExtractor te3 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
-        doc = te3.getDocument();
+        POITextExtractor te3 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
+        doc = (POIDocument)te3.getDocument();
         // need to cache data (i.e. read all data) before changing the key size
         if (doc instanceof HSLFSlideShowImpl) {
             HSLFSlideShowImpl hss = (HSLFSlideShowImpl)doc;
@@ -175,8 +175,8 @@ public class TestHxxFEncryption {
         doc.close();
         te3.close();
         // check the setting
-        POIOLE2TextExtractor te4 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
-        doc = te4.getDocument();
+        POITextExtractor te4 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
+        doc = (POIDocument)te4.getDocument();
         ei = doc.getEncryptionInfo();
         assertNotNull(ei);
         assertTrue(ei.getHeader() instanceof CryptoAPIEncryptionHeader);

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java?rev=1829653&r1=1829652&r2=1829653&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xslf/TestXSLFBugs.java Fri Apr 20 12:52:59 2018
@@ -50,6 +50,7 @@ import org.apache.poi.openxml4j.opc.OPCP
 import org.apache.poi.openxml4j.opc.PackagePartName;
 import org.apache.poi.openxml4j.opc.PackagingURIHelper;
 import org.apache.poi.sl.draw.DrawPaint;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
 import org.apache.poi.sl.usermodel.PaintStyle;
 import org.apache.poi.sl.usermodel.PaintStyle.SolidPaint;
 import org.apache.poi.sl.usermodel.PaintStyle.TexturePaint;
@@ -221,28 +222,27 @@ public class TestXSLFBugs {
      *  rID2 -> slide3.xml
      */
     @Test
-    public void bug54916() throws Exception {
-        XMLSlideShow ss = XSLFTestDataSamples.openSampleDocument("OverlappingRelations.pptx");
-        XSLFSlide slide;
-
-        // Should find 4 slides
-        assertEquals(4, ss.getSlides().size());
-
-        // Check the text, to see we got them in order
-        slide = ss.getSlides().get(0);
-        assertContains(getSlideText(slide), "POI cannot read this");
-
-        slide = ss.getSlides().get(1);
-        assertContains(getSlideText(slide), "POI can read this");
-        assertContains(getSlideText(slide), "Has a relationship to another slide");
+    public void bug54916() throws IOException {
+        try (XMLSlideShow ss = XSLFTestDataSamples.openSampleDocument("OverlappingRelations.pptx")) {
+            XSLFSlide slide;
+
+            // Should find 4 slides
+            assertEquals(4, ss.getSlides().size());
+
+            // Check the text, to see we got them in order
+            slide = ss.getSlides().get(0);
+            assertContains(getSlideText(ss, slide), "POI cannot read this");
+
+            slide = ss.getSlides().get(1);
+            assertContains(getSlideText(ss, slide), "POI can read this");
+            assertContains(getSlideText(ss, slide), "Has a relationship to another slide");
 
-        slide = ss.getSlides().get(2);
-        assertContains(getSlideText(slide), "POI can read this");
+            slide = ss.getSlides().get(2);
+            assertContains(getSlideText(ss, slide), "POI can read this");
 
-        slide = ss.getSlides().get(3);
-        assertContains(getSlideText(slide), "POI can read this");
-
-        ss.close();
+            slide = ss.getSlides().get(3);
+            assertContains(getSlideText(ss, slide), "POI can read this");
+        }
     }
 
     /**
@@ -311,8 +311,15 @@ public class TestXSLFBugs {
         ss.close();
     }
 
-    protected String getSlideText(XSLFSlide slide) {
-        return XSLFPowerPointExtractor.getText(slide, true, false, false);
+    protected String getSlideText(XMLSlideShow ppt, XSLFSlide slide) throws IOException {
+        try (SlideShowExtractor extr = new SlideShowExtractor(ppt)) {
+            // do not auto-close the slideshow
+            extr.setFilesystem(null);
+            extr.setSlidesByDefault(true);
+            extr.setNotesByDefault(false);
+            extr.setMasterByDefault(false);
+            return extr.getText(slide);
+        }
     }
 
     @Test
@@ -458,7 +465,7 @@ public class TestXSLFBugs {
 
         for (int i = 0; i < slideTexts.length; i++) {
             XSLFSlide slide = ss.getSlides().get(i);
-            assertContains(getSlideText(slide), slideTexts[i]);
+            assertContains(getSlideText(ss, slide), slideTexts[i]);
         }
     }
 



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org


Mime
View raw message