poi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From n...@apache.org
Subject svn commit: r1665933 - /poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
Date Wed, 11 Mar 2015 16:30:03 GMT
Author: nick
Date: Wed Mar 11 16:30:02 2015
New Revision: 1665933

URL: http://svn.apache.org/r1665933
Log:
Fix inconsistent indents

Modified:
    poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java?rev=1665933&r1=1665932&r2=1665933&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java Wed Mar
11 16:30:02 2015
@@ -47,261 +47,260 @@ import org.apache.poi.xwpf.extractor.XWP
  * Test that the extractor factory plays nicely
  */
 public class TestExtractorFactory extends TestCase {
+    private File txt;
 
-   private File txt;
+    private File xls;
+    private File xlsx;
+    private File xltx;
+    private File xlsEmb;
+
+    private File doc;
+    private File doc6;
+    private File doc95;
+    private File docx;
+    private File dotx;
+    private File docEmb;
+    private File docEmbOOXML;
+
+    private File ppt;
+    private File pptx;
+
+    private File msg;
+    private File msgEmb;
+    private File msgEmbMsg;
+
+    private File vsd;
+    private File vsdx;
+
+    private File pub;
+
+    private File getFileAndCheck(POIDataSamples samples, String name) {
+        File file = samples.getFile(name);
+
+        assertNotNull("Did not get a file for " + name, file);
+        assertTrue("Did not get a type file for " + name, file.isFile());
+        assertTrue("File did not exist: " + name, file.exists());
+
+        return file;
+    }
+    @Override
+    protected void setUp() throws Exception {
+        super.setUp();
+
+        POIDataSamples ssTests = POIDataSamples.getSpreadSheetInstance();
+        xls = getFileAndCheck(ssTests, "SampleSS.xls");
+        xlsx = getFileAndCheck(ssTests, "SampleSS.xlsx");
+        xltx = getFileAndCheck(ssTests, "test.xltx");
+        xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
+
+        POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
+        doc = getFileAndCheck(wpTests, "SampleDoc.doc");
+        doc6 = getFileAndCheck(wpTests, "Word6.doc");
+        doc95 = getFileAndCheck(wpTests, "Word95.doc");
+        docx = getFileAndCheck(wpTests, "SampleDoc.docx");
+        dotx = getFileAndCheck(wpTests, "test.dotx");
+        docEmb = getFileAndCheck(wpTests, "word_with_embeded.doc");
+        docEmbOOXML = getFileAndCheck(wpTests, "word_with_embeded_ooxml.doc");
+
+        POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
+        ppt = getFileAndCheck(slTests, "SampleShow.ppt");
+        pptx = getFileAndCheck(slTests, "SampleShow.pptx");
+        txt = getFileAndCheck(slTests, "SampleShow.txt");
+
+        POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
+        vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd");
+        vsdx = getFileAndCheck(dgTests, "test.vsdx");
+
+        POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
+        pub = getFileAndCheck(pubTests, "Simple.pub");
+
+        POIDataSamples olTests = POIDataSamples.getHSMFInstance();
+        msg = getFileAndCheck(olTests, "quick.msg");
+        msgEmb = getFileAndCheck(olTests, "attachment_test_msg.msg");
+        msgEmbMsg = getFileAndCheck(olTests, "attachment_msg_pdf.msg");
+    }
+
+    public void testFile() throws Exception {
+        // Excel
+        POITextExtractor xlsExtractor = ExtractorFactory.createExtractor(xls);
+        assertNotNull("Had empty extractor for " + xls, xlsExtractor);
+        assertTrue("Expected instanceof ExcelExtractor, but had: " + xlsExtractor.getClass(),

+                xlsExtractor
+                instanceof ExcelExtractor
+        );
+        assertTrue(
+                xlsExtractor.getText().length() > 200
+        );
+        xlsExtractor.close();
+
+        POITextExtractor extractor = ExtractorFactory.createExtractor(xlsx);
+        assertTrue(
+                extractor
+                instanceof XSSFExcelExtractor
+        );
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(xlsx);
+        assertTrue(
+                extractor.getText().length() > 200
+        );
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(xltx);
+        assertTrue(
+                extractor
+                instanceof XSSFExcelExtractor
+        );
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(xltx);
+        assertTrue(
+                extractor.getText().contains("test")
+        );
+        extractor.close();
+
+
+        // Word
+        assertTrue(
+                ExtractorFactory.createExtractor(doc)
+                instanceof WordExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(doc).getText().length() > 120
+        );
+
+        assertTrue(
+                ExtractorFactory.createExtractor(doc6)
+                instanceof Word6Extractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(doc6).getText().length() > 20
+        );
+
+        assertTrue(
+                ExtractorFactory.createExtractor(doc95)
+                instanceof Word6Extractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(doc95).getText().length() > 120
+        );
+
+        extractor = ExtractorFactory.createExtractor(docx);
+        assertTrue(
+                extractor instanceof XWPFWordExtractor
+        );
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(docx);
+        assertTrue(
+                extractor.getText().length() > 120
+        );
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(dotx);
+        assertTrue(
+                extractor instanceof XWPFWordExtractor
+        );
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(dotx);
+        assertTrue(
+                extractor.getText().contains("Test")
+        );
+        extractor.close();
+
+        // PowerPoint
+        assertTrue(
+                ExtractorFactory.createExtractor(ppt)
+                instanceof PowerPointExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(ppt).getText().length() > 120
+        );
+
+        extractor = ExtractorFactory.createExtractor(pptx);
+        assertTrue(
+                extractor
+                instanceof XSLFPowerPointExtractor
+        );
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(pptx);
+        assertTrue(
+                extractor.getText().length() > 120
+        );
+        extractor.close();
+
+        // Visio - binary
+        assertTrue(
+                ExtractorFactory.createExtractor(vsd)
+                instanceof VisioTextExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(vsd).getText().length() > 50
+        );
+        // Visio - vsdx
+        try {
+            ExtractorFactory.createExtractor(vsdx);
+            fail();
+        } catch(IllegalArgumentException e) {
+            // Good
+        }
+
+        // Publisher
+        assertTrue(
+                ExtractorFactory.createExtractor(pub)
+                instanceof PublisherTextExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(pub).getText().length() > 50
+        );
+
+        // Outlook msg
+        assertTrue(
+                ExtractorFactory.createExtractor(msg)
+                instanceof OutlookTextExtactor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(msg).getText().length() > 50
+        );
+
+        // Text
+        try {
+            ExtractorFactory.createExtractor(txt);
+            fail();
+        } catch(IllegalArgumentException e) {
+            // Good
+        }
+    }
+
+    public void testInputStream() throws Exception {
+        // Excel
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(xls))
+                instanceof ExcelExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(xls)).getText().length()
> 200
+        );
+
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(xlsx))
+                instanceof XSSFExcelExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(xlsx)).getText().length()
> 200
+        );
+
+        // Word
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(doc))
+                instanceof WordExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(doc)).getText().length()
> 120
+        );
 
-   private File xls;
-   private File xlsx;
-   private File xltx;
-   private File xlsEmb;
-
-   private File doc;
-   private File doc6;
-   private File doc95;
-   private File docx;
-   private File dotx;
-   private File docEmb;
-   private File docEmbOOXML;
-
-   private File ppt;
-   private File pptx;
-
-   private File msg;
-   private File msgEmb;
-   private File msgEmbMsg;
-   
-   private File vsd;
-   private File vsdx;
-   
-   private File pub;
-
-   private File getFileAndCheck(POIDataSamples samples, String name) {
-       File file = samples.getFile(name);
-       
-       assertNotNull("Did not get a file for " + name, file);
-       assertTrue("Did not get a type file for " + name, file.isFile());
-       assertTrue("File did not exist: " + name, file.exists());
-       
-       return file;
-   }
-   @Override
-   protected void setUp() throws Exception {
-      super.setUp();
-
-      POIDataSamples ssTests = POIDataSamples.getSpreadSheetInstance();
-      xls = getFileAndCheck(ssTests, "SampleSS.xls");
-      xlsx = getFileAndCheck(ssTests, "SampleSS.xlsx");
-      xltx = getFileAndCheck(ssTests, "test.xltx");
-      xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
-
-      POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
-      doc = getFileAndCheck(wpTests, "SampleDoc.doc");
-      doc6 = getFileAndCheck(wpTests, "Word6.doc");
-      doc95 = getFileAndCheck(wpTests, "Word95.doc");
-      docx = getFileAndCheck(wpTests, "SampleDoc.docx");
-      dotx = getFileAndCheck(wpTests, "test.dotx");
-      docEmb = getFileAndCheck(wpTests, "word_with_embeded.doc");
-      docEmbOOXML = getFileAndCheck(wpTests, "word_with_embeded_ooxml.doc");
-
-      POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
-      ppt = getFileAndCheck(slTests, "SampleShow.ppt");
-      pptx = getFileAndCheck(slTests, "SampleShow.pptx");
-      txt = getFileAndCheck(slTests, "SampleShow.txt");
-
-      POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
-      vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd");
-      vsdx = getFileAndCheck(dgTests, "test.vsdx");
-      
-      POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
-      pub = getFileAndCheck(pubTests, "Simple.pub");
-      
-      POIDataSamples olTests = POIDataSamples.getHSMFInstance();
-      msg = getFileAndCheck(olTests, "quick.msg");
-      msgEmb = getFileAndCheck(olTests, "attachment_test_msg.msg");
-      msgEmbMsg = getFileAndCheck(olTests, "attachment_msg_pdf.msg");
-   }
-
-   public void testFile() throws Exception {
-      // Excel
-      POITextExtractor xlsExtractor = ExtractorFactory.createExtractor(xls);
-      assertNotNull("Had empty extractor for " + xls, xlsExtractor);
-      assertTrue("Expected instanceof ExcelExtractor, but had: " + xlsExtractor.getClass(),

-            xlsExtractor
-            instanceof ExcelExtractor
-      );
-      assertTrue(
-            xlsExtractor.getText().length() > 200
-      );
-      xlsExtractor.close();
-
-      POITextExtractor extractor = ExtractorFactory.createExtractor(xlsx);
-      assertTrue(
-            extractor
-            instanceof XSSFExcelExtractor
-      );
-      extractor.close();
-
-      extractor = ExtractorFactory.createExtractor(xlsx);
-      assertTrue(
-            extractor.getText().length() > 200
-      );
-      extractor.close();
-
-      extractor = ExtractorFactory.createExtractor(xltx);
-      assertTrue(
-            extractor
-            instanceof XSSFExcelExtractor
-      );
-      extractor.close();
-
-      extractor = ExtractorFactory.createExtractor(xltx);
-      assertTrue(
-            extractor.getText().contains("test")
-      );
-      extractor.close();
-
-
-      // Word
-      assertTrue(
-            ExtractorFactory.createExtractor(doc)
-            instanceof WordExtractor
-      );
-      assertTrue(
-            ExtractorFactory.createExtractor(doc).getText().length() > 120
-      );
-
-      assertTrue(
-            ExtractorFactory.createExtractor(doc6)
-            instanceof Word6Extractor
-      );
-      assertTrue(
-            ExtractorFactory.createExtractor(doc6).getText().length() > 20
-      );
-      
-      assertTrue(
-            ExtractorFactory.createExtractor(doc95)
-            instanceof Word6Extractor
-      );
-      assertTrue(
-            ExtractorFactory.createExtractor(doc95).getText().length() > 120
-      );
-          
-      extractor = ExtractorFactory.createExtractor(docx);
-      assertTrue(
-            extractor instanceof XWPFWordExtractor
-      );
-      extractor.close();
-      
-      extractor = ExtractorFactory.createExtractor(docx);
-      assertTrue(
-            extractor.getText().length() > 120
-      );
-      extractor.close();
-
-      extractor = ExtractorFactory.createExtractor(dotx);
-      assertTrue(
-            extractor instanceof XWPFWordExtractor
-      );
-      extractor.close();
-      
-      extractor = ExtractorFactory.createExtractor(dotx);
-      assertTrue(
-            extractor.getText().contains("Test")
-      );
-      extractor.close();
-
-      // PowerPoint
-      assertTrue(
-            ExtractorFactory.createExtractor(ppt)
-            instanceof PowerPointExtractor
-      );
-      assertTrue(
-            ExtractorFactory.createExtractor(ppt).getText().length() > 120
-      );
-
-      extractor = ExtractorFactory.createExtractor(pptx);
-      assertTrue(
-            extractor
-            instanceof XSLFPowerPointExtractor
-      );
-      extractor.close();
-
-      extractor = ExtractorFactory.createExtractor(pptx);
-      assertTrue(
-            extractor.getText().length() > 120
-      );
-      extractor.close();
-
-      // Visio - binary
-      assertTrue(
-            ExtractorFactory.createExtractor(vsd)
-            instanceof VisioTextExtractor
-      );
-      assertTrue(
-            ExtractorFactory.createExtractor(vsd).getText().length() > 50
-      );
-      // Visio - vsdx
-      try {
-          ExtractorFactory.createExtractor(vsdx);
-          fail();
-      } catch(IllegalArgumentException e) {
-          // Good
-      }
-      
-      // Publisher
-      assertTrue(
-            ExtractorFactory.createExtractor(pub)
-            instanceof PublisherTextExtractor
-      );
-      assertTrue(
-            ExtractorFactory.createExtractor(pub).getText().length() > 50
-      );
-      
-      // Outlook msg
-      assertTrue(
-            ExtractorFactory.createExtractor(msg)
-            instanceof OutlookTextExtactor
-      );
-      assertTrue(
-            ExtractorFactory.createExtractor(msg).getText().length() > 50
-      );
-
-      // Text
-      try {
-         ExtractorFactory.createExtractor(txt);
-         fail();
-      } catch(IllegalArgumentException e) {
-         // Good
-      }
-	}
-	
-	public void testInputStream() throws Exception {
-		// Excel
-		assertTrue(
-				ExtractorFactory.createExtractor(new FileInputStream(xls))
-				instanceof ExcelExtractor
-		);
-		assertTrue(
-				ExtractorFactory.createExtractor(new FileInputStream(xls)).getText().length() > 200
-		);
-		
-		assertTrue(
-				ExtractorFactory.createExtractor(new FileInputStream(xlsx))
-				instanceof XSSFExcelExtractor
-		);
-		assertTrue(
-				ExtractorFactory.createExtractor(new FileInputStream(xlsx)).getText().length() > 200
-		);
-		
-		// Word
-		assertTrue(
-				ExtractorFactory.createExtractor(new FileInputStream(doc))
-				instanceof WordExtractor
-		);
-		assertTrue(
-				ExtractorFactory.createExtractor(new FileInputStream(doc)).getText().length() > 120
-		);
-		
         assertTrue(
                 ExtractorFactory.createExtractor(new FileInputStream(doc6))
                 instanceof Word6Extractor
@@ -309,7 +308,7 @@ public class TestExtractorFactory extend
         assertTrue(
                 ExtractorFactory.createExtractor(new FileInputStream(doc6)).getText().length()
> 20
         );
-        
+
         assertTrue(
                 ExtractorFactory.createExtractor(new FileInputStream(doc95))
                 instanceof Word6Extractor
@@ -317,99 +316,99 @@ public class TestExtractorFactory extend
         assertTrue(
                 ExtractorFactory.createExtractor(new FileInputStream(doc95)).getText().length()
> 120
         );
-        
-		assertTrue(
-				ExtractorFactory.createExtractor(new FileInputStream(docx))
-				instanceof XWPFWordExtractor
-		);
-		assertTrue(
-				ExtractorFactory.createExtractor(new FileInputStream(docx)).getText().length() > 120
-		);
-		
-		// PowerPoint
-		assertTrue(
-				ExtractorFactory.createExtractor(new FileInputStream(ppt))
-				instanceof PowerPointExtractor
-		);
-		assertTrue(
-				ExtractorFactory.createExtractor(new FileInputStream(ppt)).getText().length() > 120
-		);
-		
-		assertTrue(
-				ExtractorFactory.createExtractor(new FileInputStream(pptx))
-				instanceof XSLFPowerPointExtractor
-		);
-		assertTrue(
-				ExtractorFactory.createExtractor(new FileInputStream(pptx)).getText().length() > 120
-		);
-		
-		// Visio
-		assertTrue(
-				ExtractorFactory.createExtractor(new FileInputStream(vsd))
-				instanceof VisioTextExtractor
-		);
-		assertTrue(
-				ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50
-		);
-	      // Visio - vsdx
-	      try {
-	          ExtractorFactory.createExtractor(new FileInputStream(vsdx));
-	          fail();
-	      } catch(IllegalArgumentException e) {
-	          // Good
-	      }
-		
-      // Publisher
-      assertTrue(
-            ExtractorFactory.createExtractor(new FileInputStream(pub))
-            instanceof PublisherTextExtractor
-      );
-      assertTrue(
-            ExtractorFactory.createExtractor(new FileInputStream(pub)).getText().length()
> 50
-      );
-      
-		// Outlook msg
-      assertTrue(
-            ExtractorFactory.createExtractor(new FileInputStream(msg))
-            instanceof OutlookTextExtactor
-      );
-      assertTrue(
-            ExtractorFactory.createExtractor(new FileInputStream(msg)).getText().length()
> 50
-      );
-		
-		// Text
-		try {
-			FileInputStream stream = new FileInputStream(txt);
-			try {
+
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(docx))
+                instanceof XWPFWordExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(docx)).getText().length()
> 120
+        );
+
+        // PowerPoint
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(ppt))
+                instanceof PowerPointExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(ppt)).getText().length()
> 120
+        );
+
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(pptx))
+                instanceof XSLFPowerPointExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(pptx)).getText().length()
> 120
+        );
+
+        // Visio
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(vsd))
+                instanceof VisioTextExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length()
> 50
+        );
+        // Visio - vsdx
+        try {
+            ExtractorFactory.createExtractor(new FileInputStream(vsdx));
+            fail();
+        } catch(IllegalArgumentException e) {
+            // Good
+        }
+
+        // Publisher
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(pub))
+                instanceof PublisherTextExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(pub)).getText().length()
> 50
+        );
+
+        // Outlook msg
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(msg))
+                instanceof OutlookTextExtactor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new FileInputStream(msg)).getText().length()
> 50
+        );
+
+        // Text
+        try {
+            FileInputStream stream = new FileInputStream(txt);
+            try {
                 ExtractorFactory.createExtractor(stream);
-    			fail();
-			} finally {
-			    stream.close();
-			}
-		} catch(IllegalArgumentException e) {
-			// Good
-		}
-	}
-	
-	public void testPOIFS() throws Exception {
-		// Excel
-		assertTrue(
-				ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))
-				instanceof ExcelExtractor
-		);
-		assertTrue(
-				ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))).getText().length()
> 200
-		);
-		
-		// Word
-		assertTrue(
-				ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc)))
-				instanceof WordExtractor
-		);
-		assertTrue(
-				ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc))).getText().length()
> 120
-		);
-		
+                fail();
+            } finally {
+                stream.close();
+            }
+        } catch(IllegalArgumentException e) {
+            // Good
+        }
+    }
+
+    public void testPOIFS() throws Exception {
+        // Excel
+        assertTrue(
+                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))
+                instanceof ExcelExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))).getText().length()
> 200
+        );
+
+        // Word
+        assertTrue(
+                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc)))
+                instanceof WordExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc))).getText().length()
> 120
+        );
+
         assertTrue(
                 ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc6)))
                 instanceof Word6Extractor
@@ -417,7 +416,7 @@ public class TestExtractorFactory extend
         assertTrue(
                 ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc6))).getText().length()
> 20
         );
-        
+
         assertTrue(
                 ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc95)))
                 instanceof Word6Extractor
@@ -425,297 +424,297 @@ public class TestExtractorFactory extend
         assertTrue(
                 ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc95))).getText().length()
> 120
         );
-        
-		// PowerPoint
-		assertTrue(
-				ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt)))
-				instanceof PowerPointExtractor
-		);
-		assertTrue(
-				ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt))).getText().length()
> 120
-		);
-		
-		// Visio
-		assertTrue(
-				ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd)))
-				instanceof VisioTextExtractor
-		);
-		assertTrue(
-				ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))).getText().length()
> 50
-		);
-      
-      // Publisher
-      assertTrue(
-            ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(pub)))
-            instanceof PublisherTextExtractor
-      );
-      assertTrue(
-            ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(pub))).getText().length()
> 50
-      );
-		
-      // Outlook msg
-      assertTrue(
-            ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(msg)))
-            instanceof OutlookTextExtactor
-      );
-      assertTrue(
-            ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(msg))).getText().length()
> 50
-      );
-      
-		// Text
-		try {
-			ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(txt)));
-			fail();
-		} catch(IOException e) {
-			// Good
-		}
-	}
-	
-	public void testPackage() throws Exception {
-		// Excel
-		POIXMLTextExtractor extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(),
PackageAccess.READ));
-        assertTrue(
-				extractor
-				instanceof XSSFExcelExtractor
-		);
+
+        // PowerPoint
+        assertTrue(
+                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt)))
+                instanceof PowerPointExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt))).getText().length()
> 120
+        );
+
+        // Visio
+        assertTrue(
+                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd)))
+                instanceof VisioTextExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))).getText().length()
> 50
+        );
+
+        // Publisher
+        assertTrue(
+                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(pub)))
+                instanceof PublisherTextExtractor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(pub))).getText().length()
> 50
+        );
+
+        // Outlook msg
+        assertTrue(
+                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(msg)))
+                instanceof OutlookTextExtactor
+        );
+        assertTrue(
+                ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(msg))).getText().length()
> 50
+        );
+
+        // Text
+        try {
+            ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(txt)));
+            fail();
+        } catch(IOException e) {
+            // Good
+        }
+    }
+
+    public void testPackage() throws Exception {
+        // Excel
+        POIXMLTextExtractor extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(),
PackageAccess.READ));
+        assertTrue(
+                extractor
+                instanceof XSSFExcelExtractor
+        );
         extractor.close();
-		extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
+        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
         assertTrue(extractor.getText().length() > 200);
         extractor.close();
-		
-		// Word
-		extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString()));
-        assertTrue(
-				extractor
-				instanceof XWPFWordExtractor
-		);
-        extractor.close();
-        
-		extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString()));
-        assertTrue(
-				extractor.getText().length() > 120
-		);
-        extractor.close();
-		
-		// PowerPoint
-		extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString()));
-        assertTrue(
-				extractor
-				instanceof XSLFPowerPointExtractor
-		);
-        extractor.close();
-
-		extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString()));
-        assertTrue(
-				extractor.getText().length() > 120
-		);
-        extractor.close();
-		
-		// Text
-		try {
-			ExtractorFactory.createExtractor(OPCPackage.open(txt.toString()));
-			fail();
-		} catch(InvalidOperationException e) {
-			// Good
-		}
-	}
-	
-	public void testPreferEventBased() throws Exception {
-	   assertFalse(ExtractorFactory.getPreferEventExtractor());
-	   assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
-	   assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
-	   
-	   ExtractorFactory.setThreadPrefersEventExtractors(true);
-	   
-      assertTrue(ExtractorFactory.getPreferEventExtractor());
-      assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
-      assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
-      
-      ExtractorFactory.setAllThreadsPreferEventExtractors(false);
-      
-      assertFalse(ExtractorFactory.getPreferEventExtractor());
-      assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
-      assertEquals(Boolean.FALSE, ExtractorFactory.getAllThreadsPreferEventExtractors());
-      
-      ExtractorFactory.setAllThreadsPreferEventExtractors(null);
-      
-      assertTrue(ExtractorFactory.getPreferEventExtractor());
-      assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
-      assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
-      
-      
-      // Check we get the right extractors now
-      POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new
FileInputStream(xls)));
-      assertTrue(
-            extractor
-            instanceof EventBasedExcelExtractor
-      );
-      extractor.close();
-      extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
-      assertTrue(
-            extractor.getText().length() > 200
-      );
-      extractor.close();
-      
-      extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
-      assertTrue(extractor instanceof XSSFEventBasedExcelExtractor);
-      extractor.close();
-
-      extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
-      assertTrue(
-            extractor.getText().length() > 200
-      );
-      extractor.close();
-      
-      
-      // Put back to normal
-      ExtractorFactory.setThreadPrefersEventExtractors(false);
-      assertFalse(ExtractorFactory.getPreferEventExtractor());
-      assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
-      assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
-      
-      // And back
-      extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
-      assertTrue(
-            extractor
-            instanceof ExcelExtractor
-      );
-      extractor.close();
-      extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
-      assertTrue(
-            extractor.getText().length() > 200
-      );
-      extractor.close();
-      
-      extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
-      assertTrue(
-            extractor
-            instanceof XSSFExcelExtractor
-      );
-      extractor.close();
-      extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
-      assertTrue(
-            extractor.getText().length() > 200
-      );
-      extractor.close();
-	}
-
-   /**
-    * Test embeded docs text extraction. For now, only
-    *  does poifs embeded, but will do ooxml ones 
-    *  at some point.
-    */
-   public void testEmbeded() throws Exception {
-      POIOLE2TextExtractor ext;
-      POITextExtractor[] embeds;
-
-      // No embedings
-      ext = (POIOLE2TextExtractor)
-      ExtractorFactory.createExtractor(xls);
-      embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-      assertEquals(0, embeds.length);
-
-      // Excel
-      ext = (POIOLE2TextExtractor)
-      ExtractorFactory.createExtractor(xlsEmb);
-      embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-
-      assertEquals(6, embeds.length);
-      int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX;
-      for(int i=0; i<embeds.length; i++) {
-         assertTrue(embeds[i].getText().length() > 20);
-
-         if(embeds[i] instanceof PowerPointExtractor) numPpt++;
-         else if(embeds[i] instanceof ExcelExtractor) numXls++;
-         else if(embeds[i] instanceof WordExtractor) numWord++;
-         else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
-      }
-      assertEquals(2, numPpt);
-      assertEquals(2, numXls);
-      assertEquals(2, numWord);
-      assertEquals(0, numMsg);
-
-      // Word
-      ext = (POIOLE2TextExtractor)
-      ExtractorFactory.createExtractor(docEmb);
-      embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-
-      numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
-      assertEquals(4, embeds.length);
-      for(int i=0; i<embeds.length; i++) {
-         assertTrue(embeds[i].getText().length() > 20);
-         if(embeds[i] instanceof PowerPointExtractor) numPpt++;
-         else if(embeds[i] instanceof ExcelExtractor) numXls++;
-         else if(embeds[i] instanceof WordExtractor) numWord++;
-         else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
-      }
-      assertEquals(1, numPpt);
-      assertEquals(2, numXls);
-      assertEquals(1, numWord);
-      assertEquals(0, numMsg);
-      
-      // Word which contains an OOXML file
-      ext = (POIOLE2TextExtractor)
-      ExtractorFactory.createExtractor(docEmbOOXML);
-      embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-
-      numWord = 0; numXls = 0; numPpt = 0; numMsg = 0; numWordX = 0;
-      assertEquals(3, embeds.length);
-      for(int i=0; i<embeds.length; i++) {
-         assertTrue(embeds[i].getText().length() > 20);
-         if(embeds[i] instanceof PowerPointExtractor) numPpt++;
-         else if(embeds[i] instanceof ExcelExtractor) numXls++;
-         else if(embeds[i] instanceof WordExtractor) numWord++;
-         else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
-         else if(embeds[i] instanceof XWPFWordExtractor) numWordX++;
-      }
-      assertEquals(1, numPpt);
-      assertEquals(1, numXls);
-      assertEquals(0, numWord);
-      assertEquals(1, numWordX);
-      assertEquals(0, numMsg);
-      
-      // Outlook
-      ext = (OutlookTextExtactor)
-      ExtractorFactory.createExtractor(msgEmb);
-      embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-
-      numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
-      assertEquals(1, embeds.length);
-      for(int i=0; i<embeds.length; i++) {
-         assertTrue(embeds[i].getText().length() > 20);
-         if(embeds[i] instanceof PowerPointExtractor) numPpt++;
-         else if(embeds[i] instanceof ExcelExtractor) numXls++;
-         else if(embeds[i] instanceof WordExtractor) numWord++;
-         else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
-      }
-      assertEquals(0, numPpt);
-      assertEquals(0, numXls);
-      assertEquals(1, numWord);
-      assertEquals(0, numMsg);
-      
-      // Outlook with another outlook file in it
-      ext = (OutlookTextExtactor)
-      ExtractorFactory.createExtractor(msgEmbMsg);
-      embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
-
-      numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
-      assertEquals(1, embeds.length);
-      for(int i=0; i<embeds.length; i++) {
-         assertTrue(embeds[i].getText().length() > 20);
-         if(embeds[i] instanceof PowerPointExtractor) numPpt++;
-         else if(embeds[i] instanceof ExcelExtractor) numXls++;
-         else if(embeds[i] instanceof WordExtractor) numWord++;
-         else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
-      }
-      assertEquals(0, numPpt);
-      assertEquals(0, numXls);
-      assertEquals(0, numWord);
-      assertEquals(1, numMsg);
-      
-
-      // TODO - PowerPoint
-      // TODO - Publisher
-      // TODO - Visio
-   }
+
+        // Word
+        extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString()));
+        assertTrue(
+                extractor
+                instanceof XWPFWordExtractor
+        );
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(OPCPackage.open(docx.toString()));
+        assertTrue(
+                extractor.getText().length() > 120
+        );
+        extractor.close();
+
+        // PowerPoint
+        extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString()));
+        assertTrue(
+                extractor
+                instanceof XSLFPowerPointExtractor
+        );
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(OPCPackage.open(pptx.toString()));
+        assertTrue(
+                extractor.getText().length() > 120
+        );
+        extractor.close();
+
+        // Text
+        try {
+            ExtractorFactory.createExtractor(OPCPackage.open(txt.toString()));
+            fail();
+        } catch(InvalidOperationException e) {
+            // Good
+        }
+    }
+
+    public void testPreferEventBased() throws Exception {
+        assertFalse(ExtractorFactory.getPreferEventExtractor());
+        assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
+        assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+        ExtractorFactory.setThreadPrefersEventExtractors(true);
+
+        assertTrue(ExtractorFactory.getPreferEventExtractor());
+        assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
+        assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+        ExtractorFactory.setAllThreadsPreferEventExtractors(false);
+
+        assertFalse(ExtractorFactory.getPreferEventExtractor());
+        assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
+        assertEquals(Boolean.FALSE, ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+        ExtractorFactory.setAllThreadsPreferEventExtractors(null);
+
+        assertTrue(ExtractorFactory.getPreferEventExtractor());
+        assertTrue(ExtractorFactory.getThreadPrefersEventExtractors());
+        assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+
+        // Check we get the right extractors now
+        POITextExtractor extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new
FileInputStream(xls)));
+        assertTrue(
+                extractor
+                instanceof EventBasedExcelExtractor
+        );
+        extractor.close();
+        extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
+        assertTrue(
+                extractor.getText().length() > 200
+        );
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
+        assertTrue(extractor instanceof XSSFEventBasedExcelExtractor);
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
+        assertTrue(
+                extractor.getText().length() > 200
+        );
+        extractor.close();
+
+
+        // Put back to normal
+        ExtractorFactory.setThreadPrefersEventExtractors(false);
+        assertFalse(ExtractorFactory.getPreferEventExtractor());
+        assertFalse(ExtractorFactory.getThreadPrefersEventExtractors());
+        assertNull(ExtractorFactory.getAllThreadsPreferEventExtractors());
+
+        // And back
+        extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
+        assertTrue(
+                extractor
+                instanceof ExcelExtractor
+        );
+        extractor.close();
+        extractor = ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)));
+        assertTrue(
+                extractor.getText().length() > 200
+        );
+        extractor.close();
+
+        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString(), PackageAccess.READ));
+        assertTrue(
+                extractor
+                instanceof XSSFExcelExtractor
+        );
+        extractor.close();
+        extractor = ExtractorFactory.createExtractor(OPCPackage.open(xlsx.toString()));
+        assertTrue(
+                extractor.getText().length() > 200
+        );
+        extractor.close();
+    }
+
+    /**
+     * Test embeded docs text extraction. For now, only
+     *  does poifs embeded, but will do ooxml ones 
+     *  at some point.
+     */
+    public void testEmbeded() throws Exception {
+        POIOLE2TextExtractor ext;
+        POITextExtractor[] embeds;
+
+        // No embedings
+        ext = (POIOLE2TextExtractor)
+                ExtractorFactory.createExtractor(xls);
+        embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
+        assertEquals(0, embeds.length);
+
+        // Excel
+        ext = (POIOLE2TextExtractor)
+                ExtractorFactory.createExtractor(xlsEmb);
+        embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
+
+        assertEquals(6, embeds.length);
+        int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX;
+        for(int i=0; i<embeds.length; i++) {
+            assertTrue(embeds[i].getText().length() > 20);
+
+            if(embeds[i] instanceof PowerPointExtractor) numPpt++;
+            else if(embeds[i] instanceof ExcelExtractor) numXls++;
+            else if(embeds[i] instanceof WordExtractor) numWord++;
+            else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
+        }
+        assertEquals(2, numPpt);
+        assertEquals(2, numXls);
+        assertEquals(2, numWord);
+        assertEquals(0, numMsg);
+
+        // Word
+        ext = (POIOLE2TextExtractor)
+                ExtractorFactory.createExtractor(docEmb);
+        embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
+
+        numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
+        assertEquals(4, embeds.length);
+        for(int i=0; i<embeds.length; i++) {
+            assertTrue(embeds[i].getText().length() > 20);
+            if(embeds[i] instanceof PowerPointExtractor) numPpt++;
+            else if(embeds[i] instanceof ExcelExtractor) numXls++;
+            else if(embeds[i] instanceof WordExtractor) numWord++;
+            else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
+        }
+        assertEquals(1, numPpt);
+        assertEquals(2, numXls);
+        assertEquals(1, numWord);
+        assertEquals(0, numMsg);
+
+        // Word which contains an OOXML file
+        ext = (POIOLE2TextExtractor)
+                ExtractorFactory.createExtractor(docEmbOOXML);
+        embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
+
+        numWord = 0; numXls = 0; numPpt = 0; numMsg = 0; numWordX = 0;
+        assertEquals(3, embeds.length);
+        for(int i=0; i<embeds.length; i++) {
+            assertTrue(embeds[i].getText().length() > 20);
+            if(embeds[i] instanceof PowerPointExtractor) numPpt++;
+            else if(embeds[i] instanceof ExcelExtractor) numXls++;
+            else if(embeds[i] instanceof WordExtractor) numWord++;
+            else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
+            else if(embeds[i] instanceof XWPFWordExtractor) numWordX++;
+        }
+        assertEquals(1, numPpt);
+        assertEquals(1, numXls);
+        assertEquals(0, numWord);
+        assertEquals(1, numWordX);
+        assertEquals(0, numMsg);
+
+        // Outlook
+        ext = (OutlookTextExtactor)
+                ExtractorFactory.createExtractor(msgEmb);
+        embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
+
+        numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
+        assertEquals(1, embeds.length);
+        for(int i=0; i<embeds.length; i++) {
+            assertTrue(embeds[i].getText().length() > 20);
+            if(embeds[i] instanceof PowerPointExtractor) numPpt++;
+            else if(embeds[i] instanceof ExcelExtractor) numXls++;
+            else if(embeds[i] instanceof WordExtractor) numWord++;
+            else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
+        }
+        assertEquals(0, numPpt);
+        assertEquals(0, numXls);
+        assertEquals(1, numWord);
+        assertEquals(0, numMsg);
+
+        // Outlook with another outlook file in it
+        ext = (OutlookTextExtactor)
+                ExtractorFactory.createExtractor(msgEmbMsg);
+        embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
+
+        numWord = 0; numXls = 0; numPpt = 0; numMsg = 0;
+        assertEquals(1, embeds.length);
+        for(int i=0; i<embeds.length; i++) {
+            assertTrue(embeds[i].getText().length() > 20);
+            if(embeds[i] instanceof PowerPointExtractor) numPpt++;
+            else if(embeds[i] instanceof ExcelExtractor) numXls++;
+            else if(embeds[i] instanceof WordExtractor) numWord++;
+            else if(embeds[i] instanceof OutlookTextExtactor) numMsg++;
+        }
+        assertEquals(0, numPpt);
+        assertEquals(0, numXls);
+        assertEquals(0, numWord);
+        assertEquals(1, numMsg);
+
+
+        // TODO - PowerPoint
+        // TODO - Publisher
+        // TODO - Visio
+    }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org


Mime
View raw message