poi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From n...@apache.org
Subject svn commit: r897249 - in /poi/trunk/src: documentation/content/xdocs/ ooxml/java/org/apache/poi/extractor/ ooxml/testcases/org/apache/poi/extractor/ scratchpad/src/org/apache/poi/hsmf/extractor/ scratchpad/testcases/org/apache/poi/hsmf/extractor/
Date Fri, 08 Jan 2010 16:19:43 GMT
Author: nick
Date: Fri Jan  8 16:18:52 2010
New Revision: 897249

URL: http://svn.apache.org/viewvc?rev=897249&view=rev
Log:
Rename the outlook extractor to be more consistent with other extractors

Added:
    poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
Removed:
    poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/HSMFTextExtactor.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestHSMFTextExtractor.java
Modified:
    poi/trunk/src/documentation/content/xdocs/status.xml
    poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java

Modified: poi/trunk/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/status.xml?rev=897249&r1=897248&r2=897249&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/status.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/status.xml Fri Jan  8 16:18:52 2010
@@ -34,7 +34,7 @@
 
     <changes>
         <release version="3.7-SNAPSHOT" date="2010-??-??">
-           <action dev="POI-DEVELOPERS" type="fix">Add a text extractor to HSMF for
simpler extraction of text from .msg files</action>
+           <action dev="POI-DEVELOPERS" type="fix">Add a text extractor (OutlookTextExtractor)
to HSMF for simpler extraction of text from .msg files</action>
            <action dev="POI-DEVELOPERS" type="fix">Some improvements to HSMF parsing
of .msg files</action>
            <action dev="POI-DEVELOPERS" type="fix">Initialise the link type of HSSFHyperLink,
so that getType() on it works</action>
            <action dev="POI-DEVELOPERS" type="fix">48425 - improved performance of
DateUtil.isCellDateFormatted()  </action>

Modified: poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java?rev=897249&r1=897248&r2=897249&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java Fri Jan  8 16:18:52
2010
@@ -31,7 +31,7 @@
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.hdgf.extractor.VisioTextExtractor;
 import org.apache.poi.hslf.extractor.PowerPointExtractor;
-import org.apache.poi.hsmf.extractor.HSMFTextExtactor;
+import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
 import org.apache.poi.hssf.extractor.ExcelExtractor;
 import org.apache.poi.hwpf.extractor.WordExtractor;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
@@ -142,7 +142,7 @@
 			if(entry.getName().equals("__substg1.0_1000001E") ||
 			      entry.getName().equals("__substg1.0_0047001E") ||
 			      entry.getName().equals("__substg1.0_0037001E")) {
-			   return new HSMFTextExtactor(poifsDir, fs);
+			   return new OutlookTextExtactor(poifsDir, fs);
 			}
 		}
 		throw new IllegalArgumentException("No supported documents found in the OLE2 stream");

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java?rev=897249&r1=897248&r2=897249&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java Fri Jan
 8 16:18:52 2010
@@ -25,7 +25,7 @@
 import org.apache.poi.POIDataSamples;
 import org.apache.poi.hdgf.extractor.VisioTextExtractor;
 import org.apache.poi.hslf.extractor.PowerPointExtractor;
-import org.apache.poi.hsmf.extractor.HSMFTextExtactor;
+import org.apache.poi.hsmf.extractor.OutlookTextExtactor;
 import org.apache.poi.hssf.extractor.ExcelExtractor;
 import org.apache.poi.hwpf.extractor.WordExtractor;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@@ -169,7 +169,7 @@
       // Outlook msg
       assertTrue(
             ExtractorFactory.createExtractor(msg)
-            instanceof HSMFTextExtactor
+            instanceof OutlookTextExtactor
       );
       assertTrue(
             ExtractorFactory.createExtractor(msg).getText().length() > 50
@@ -248,7 +248,7 @@
 		// Outlook msg
       assertTrue(
             ExtractorFactory.createExtractor(new FileInputStream(msg))
-            instanceof HSMFTextExtactor
+            instanceof OutlookTextExtactor
       );
       assertTrue(
             ExtractorFactory.createExtractor(new FileInputStream(msg)).getText().length()
> 50
@@ -303,7 +303,7 @@
       // Outlook msg
       assertTrue(
             ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(msg)))
-            instanceof HSMFTextExtactor
+            instanceof OutlookTextExtactor
       );
       assertTrue(
             ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(msg))).getText().length()
> 50

Added: poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java?rev=897249&view=auto
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java (added)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hsmf/extractor/OutlookTextExtactor.java Fri
Jan  8 16:18:52 2010
@@ -0,0 +1,81 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hsmf.extractor;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.SimpleDateFormat;
+
+import org.apache.poi.POIOLE2TextExtractor;
+import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * A text extractor for HSMF (Outlook) .msg files.
+ * Outputs in a format somewhat like a plain text email.
+ */
+public class OutlookTextExtactor extends POIOLE2TextExtractor {
+   public OutlookTextExtactor(MAPIMessage msg) {
+      super(msg);
+   }
+   public OutlookTextExtactor(DirectoryNode poifsDir, POIFSFileSystem fs) throws IOException
{
+      this(new MAPIMessage(poifsDir, fs));
+   }
+   public OutlookTextExtactor(POIFSFileSystem fs) throws IOException {
+      this(new MAPIMessage(fs));
+   }
+   public OutlookTextExtactor(InputStream inp) throws IOException {
+      this(new MAPIMessage(inp));
+   }
+
+   /**
+    * Outputs something a little like a RFC822 email
+    */
+   public String getText() {
+      MAPIMessage msg = (MAPIMessage)document;
+      StringBuffer s = new StringBuffer();
+      
+      try {
+         s.append("From: " + msg.getDisplayFrom() + "\n");
+      } catch(ChunkNotFoundException e) {}
+      try {
+         s.append("To: " + msg.getDisplayTo() + "\n");
+      } catch(ChunkNotFoundException e) {}
+      try {
+         if(msg.getDisplayCC().length() > 0)
+            s.append("CC: " + msg.getDisplayCC() + "\n");
+      } catch(ChunkNotFoundException e) {}
+      try {
+         if(msg.getDisplayBCC().length() > 0)
+            s.append("BCC: " + msg.getDisplayBCC() + "\n");
+      } catch(ChunkNotFoundException e) {}
+      try {
+         SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss");
+         s.append("Date: " + f.format(msg.getMessageDate().getTime()) + "\n");
+      } catch(ChunkNotFoundException e) {}
+      try {
+         s.append("Subject: " + msg.getSubject() + "\n");
+      } catch(ChunkNotFoundException e) {}
+      try {
+         s.append("\n" + msg.getTextBody() + "\n");
+      } catch(ChunkNotFoundException e) {}
+      
+      return s.toString();
+   }
+}

Added: poi/trunk/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java?rev=897249&view=auto
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
(added)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java
Fri Jan  8 16:18:52 2010
@@ -0,0 +1,95 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hsmf.extractor;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+/**
+ * Tests to verify that the text extractor works
+ */
+public final class TestOutlookTextExtractor extends TestCase {
+   private POIDataSamples samples;
+
+	public TestOutlookTextExtractor() throws IOException {
+        samples = POIDataSamples.getHSMFInstance();
+	}
+	
+	private void assertContains(String haystack, String needle) {
+	   if(haystack.indexOf(needle) > -1) {
+	      return;
+	   }
+	   fail("'" + needle + "' wasn't found in '" + haystack + "'");
+	}
+	
+   public void testQuick() throws Exception {
+      POIFSFileSystem simple = new POIFSFileSystem(
+            new FileInputStream(samples.getFile("quick.msg"))
+      );
+      MAPIMessage msg = new MAPIMessage(simple);
+      
+      OutlookTextExtactor ext = new OutlookTextExtactor(msg);
+      String text = ext.getText();
+      
+      assertContains(text, "From: Kevin Roast\n");
+      assertContains(text, "To: Kevin Roast\n");
+      assertEquals(-1, text.indexOf("CC:"));
+      assertEquals(-1, text.indexOf("BCC:"));
+      assertContains(text, "Subject: Test the content transformer\n");
+      assertContains(text, "Date: Thu, 14 Jun 2007 09:42:55\n");
+      assertContains(text, "The quick brown fox jumps over the lazy dog");
+   }
+   
+   public void testSimple() throws Exception {
+      MAPIMessage msg = new MAPIMessage(new POIFSFileSystem(
+            new FileInputStream(samples.getFile("simple_test_msg.msg"))
+      ));
+      
+      OutlookTextExtactor ext = new OutlookTextExtactor(msg);
+      String text = ext.getText();
+      
+      assertContains(text, "From: Travis Ferguson\n");
+      assertContains(text, "To: travis@overwrittenstack.com\n");
+      assertEquals(-1, text.indexOf("CC:"));
+      assertEquals(-1, text.indexOf("BCC:"));
+      assertContains(text, "Subject: test message\n");
+      assertEquals(-1, text.indexOf("Date:"));
+      assertContains(text, "This is a test message.");
+   }
+
+   public void testConstructors() throws Exception {
+      String inp = (new OutlookTextExtactor(new FileInputStream(
+            samples.getFile("simple_test_msg.msg")
+      )).getText());
+      String poifs = (new OutlookTextExtactor(new POIFSFileSystem(new FileInputStream(
+            samples.getFile("simple_test_msg.msg")
+      ))).getText());
+      String mapi = (new OutlookTextExtactor(new MAPIMessage(new FileInputStream(
+            samples.getFile("simple_test_msg.msg")
+      ))).getText());
+      
+      assertEquals(inp, poifs);
+      assertEquals(inp, mapi);
+   }
+}



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org


Mime
View raw message