poi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From n...@apache.org
Subject svn commit: r897887 - in /poi/trunk/src: documentation/content/xdocs/ ooxml/java/org/apache/poi/extractor/ ooxml/testcases/org/apache/poi/extractor/ scratchpad/src/org/apache/poi/hpbf/extractor/
Date Mon, 11 Jan 2010 14:55:44 GMT
Author: nick
Date: Mon Jan 11 14:55:43 2010
New Revision: 897887

URL: http://svn.apache.org/viewvc?rev=897887&view=rev
Log:
Add PublisherTextExtractor support to ExtractorFactory

Modified:
    poi/trunk/src/documentation/content/xdocs/status.xml
    poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java

Modified: poi/trunk/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/status.xml?rev=897887&r1=897886&r2=897887&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/status.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/status.xml Mon Jan 11 14:55:43 2010
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.7-SNAPSHOT" date="2010-??-??">
+           <action dev="POI-DEVELOPERS" type="add">Add PublisherTextExtractor support
to ExtractorFactory</action>
            <action dev="POI-DEVELOPERS" type="add">Add XSLF support for text extraction
from tables</action>
            <action dev="POI-DEVELOPERS" type="add">Support attachments as embeded documents
within the new OutlookTextExtractor</action>
            <action dev="POI-DEVELOPERS" type="add">Add a text extractor (OutlookTextExtractor)
to HSMF for simpler extraction of text from .msg files</action>

Modified: poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java?rev=897887&r1=897886&r2=897887&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java Mon Jan 11 14:55:43
2010
@@ -31,6 +31,7 @@
 import org.apache.poi.POIXMLDocument;
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.hdgf.extractor.VisioTextExtractor;
+import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
 import org.apache.poi.hslf.extractor.PowerPointExtractor;
 import org.apache.poi.hsmf.MAPIMessage;
 import org.apache.poi.hsmf.datatypes.AttachmentChunks;
@@ -142,6 +143,9 @@
 			if(entry.getName().equals("VisioDocument")) {
 				return new VisioTextExtractor(poifsDir, fs);
 			}
+         if(entry.getName().equals("Quill")) {
+            return new PublisherTextExtractor(poifsDir, fs);
+         }
 			if(
 			      entry.getName().equals("__substg1.0_1000001E") ||
                entry.getName().equals("__substg1.0_1000001F") ||

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java?rev=897887&r1=897886&r2=897887&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java Mon Jan
11 14:55:43 2010
@@ -24,6 +24,7 @@
 import org.apache.poi.POITextExtractor;
 import org.apache.poi.POIDataSamples;
 import org.apache.poi.hdgf.extractor.VisioTextExtractor;
+import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
 import org.apache.poi.hslf.extractor.PowerPointExtractor;
 import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
 import org.apache.poi.hssf.extractor.ExcelExtractor;
@@ -62,6 +63,8 @@
    private File msgEmb;
    
    private File vsd;
+   
+   private File pub;
 
    protected void setUp() throws Exception {
       super.setUp();
@@ -86,6 +89,9 @@
       POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
       vsd = dgTests.getFile("Test_Visio-Some_Random_Text.vsd");
       
+      POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
+      pub = pubTests.getFile("Simple.pub");
+      
       POIDataSamples olTests = POIDataSamples.getHSMFInstance();
       msg = olTests.getFile("quick.msg");
       msgEmb = olTests.getFile("attachment_test_msg.msg");
@@ -169,6 +175,15 @@
             ExtractorFactory.createExtractor(vsd).getText().length() > 50
       );
       
+      // Publisher
+      assertTrue(
+            ExtractorFactory.createExtractor(pub)
+            instanceof PublisherTextExtractor
+      );
+      assertTrue(
+            ExtractorFactory.createExtractor(pub).getText().length() > 50
+      );
+      
       // Outlook msg
       assertTrue(
             ExtractorFactory.createExtractor(msg)
@@ -248,6 +263,15 @@
 				ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50
 		);
 		
+      // Publisher
+      assertTrue(
+            ExtractorFactory.createExtractor(new FileInputStream(pub))
+            instanceof PublisherTextExtractor
+      );
+      assertTrue(
+            ExtractorFactory.createExtractor(new FileInputStream(pub)).getText().length()
> 50
+      );
+      
 		// Outlook msg
       assertTrue(
             ExtractorFactory.createExtractor(new FileInputStream(msg))
@@ -302,6 +326,15 @@
 		assertTrue(
 				ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))).getText().length()
> 50
 		);
+      
+      // Publisher
+      assertTrue(
+            ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(pub)))
+            instanceof PublisherTextExtractor
+      );
+      assertTrue(
+            ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(pub))).getText().length()
> 50
+      );
 		
       // Outlook msg
       assertTrue(
@@ -426,6 +459,7 @@
       assertEquals(1, numWord);
 
       // TODO - PowerPoint
+      // TODO - Publisher
       // TODO - Visio
    }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java?rev=897887&r1=897886&r2=897887&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
(original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hpbf/extractor/PublisherTextExtractor.java
Mon Jan 11 14:55:43 2010
@@ -26,6 +26,7 @@
 import org.apache.poi.hpbf.model.qcbits.QCBit;
 import org.apache.poi.hpbf.model.qcbits.QCTextBit;
 import org.apache.poi.hpbf.model.qcbits.QCPLCBit.Type12;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 
 /**
@@ -39,6 +40,9 @@
 		super(doc);
 		this.doc = doc;
 	}
+   public PublisherTextExtractor(DirectoryNode dir, POIFSFileSystem fs) throws IOException
{
+      this(new HPBFDocument(dir, fs));
+   }
 	public PublisherTextExtractor(POIFSFileSystem fs) throws IOException {
 		this(new HPBFDocument(fs));
 	}



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org


Mime
View raw message