poi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From n...@apache.org
Subject svn commit: r684276 - in /poi/branches/ooxml/src: documentation/content/xdocs/ ooxml/java/org/apache/poi/xwpf/extractor/ ooxml/java/org/apache/poi/xwpf/usermodel/ ooxml/testcases/org/apache/poi/xwpf/extractor/ ooxml/testcases/org/apache/poi/xwpf/model/
Date Sat, 09 Aug 2008 15:08:11 GMT
Author: nick
Date: Sat Aug  9 08:08:11 2008
New Revision: 684276

URL: http://svn.apache.org/viewvc?rev=684276&view=rev
Log:
Have XWPFWordExtractor extract headers and footers

Modified:
    poi/branches/ooxml/src/documentation/content/xdocs/changes.xml
    poi/branches/ooxml/src/documentation/content/xdocs/status.xml
    poi/branches/ooxml/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
    poi/branches/ooxml/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
    poi/branches/ooxml/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
    poi/branches/ooxml/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java

Modified: poi/branches/ooxml/src/documentation/content/xdocs/changes.xml
URL: http://svn.apache.org/viewvc/poi/branches/ooxml/src/documentation/content/xdocs/changes.xml?rev=684276&r1=684275&r2=684276&view=diff
==============================================================================
--- poi/branches/ooxml/src/documentation/content/xdocs/changes.xml (original)
+++ poi/branches/ooxml/src/documentation/content/xdocs/changes.xml Sat Aug  9 08:08:11 2008
@@ -37,6 +37,7 @@
 
 		<!-- Don't forget to update status.xml too! -->
         <release version="3.5.1-beta2" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="add">45539 - Improve XWPFWordExtractor
to extract headers and footers</action>
            <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph
text</action>
            <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers
and footers</action>
            <action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction
to include tables always, and picture text where possible</action>

Modified: poi/branches/ooxml/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/branches/ooxml/src/documentation/content/xdocs/status.xml?rev=684276&r1=684275&r2=684276&view=diff
==============================================================================
--- poi/branches/ooxml/src/documentation/content/xdocs/status.xml (original)
+++ poi/branches/ooxml/src/documentation/content/xdocs/status.xml Sat Aug  9 08:08:11 2008
@@ -34,6 +34,7 @@
 	<!-- Don't forget to update changes.xml too! -->
     <changes>
         <release version="3.5.1-beta2" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="add">45539 - Improve XWPFWordExtractor
to extract headers and footers</action>
            <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph
text</action>
            <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers
and footers</action>
            <action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction
to include tables always, and picture text where possible</action>

Modified: poi/branches/ooxml/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
URL: http://svn.apache.org/viewvc/poi/branches/ooxml/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java?rev=684276&r1=684275&r2=684276&view=diff
==============================================================================
--- poi/branches/ooxml/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
(original)
+++ poi/branches/ooxml/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
Sat Aug  9 08:08:11 2008
@@ -23,6 +23,7 @@
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.xwpf.XWPFDocument;
 import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
+import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
 import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator;
 import org.apache.poi.xwpf.model.XWPFParagraphDecorator;
 import org.apache.poi.xwpf.usermodel.XWPFParagraph;
@@ -70,21 +71,46 @@
 	
 	public String getText() {
 		StringBuffer text = new StringBuffer();
+		XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
 		
-			
+		// Start out with all headers
+		// TODO - put them in where they're needed
+		if(hfPolicy.getFirstPageHeader() != null) {
+			text.append( hfPolicy.getFirstPageHeader().getText() );
+		}
+		if(hfPolicy.getEvenPageHeader() != null) {
+			text.append( hfPolicy.getEvenPageHeader().getText() );
+		}
+		if(hfPolicy.getDefaultHeader() != null) {
+			text.append( hfPolicy.getDefaultHeader().getText() );
+		}
+		
+		// First up, all our paragraph based text
 		Iterator<XWPFParagraph> i = document.getParagraphsIterator();
 		while(i.hasNext()) {
 			XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
 					new XWPFHyperlinkDecorator(i.next(), null, fetchHyperlinks));
 			text.append(decorator.getText()+"\n");
 		}
-			
+
+		// Then our table based text
 		Iterator<XWPFTable> j = document.getTablesIterator();
-		while(j.hasNext())
-		{
+		while(j.hasNext()) {
 			text.append(j.next().getText()+"\n");
 		}
 		
+		// Finish up with all the footers
+		// TODO - put them in where they're needed
+		if(hfPolicy.getFirstPageFooter() != null) {
+			text.append( hfPolicy.getFirstPageFooter().getText() );
+		}
+		if(hfPolicy.getEvenPageFooter() != null) {
+			text.append( hfPolicy.getEvenPageFooter().getText() );
+		}
+		if(hfPolicy.getDefaultFooter() != null) {
+			text.append( hfPolicy.getDefaultFooter().getText() );
+		}
+		
 		return text.toString();
 	}
 }

Modified: poi/branches/ooxml/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
URL: http://svn.apache.org/viewvc/poi/branches/ooxml/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java?rev=684276&r1=684275&r2=684276&view=diff
==============================================================================
--- poi/branches/ooxml/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
(original)
+++ poi/branches/ooxml/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
Sat Aug  9 08:08:11 2008
@@ -81,15 +81,21 @@
 		XWPFParagraph[] paras = getParagraphs();
 		for(int i=0; i<paras.length; i++) {
 			if(! paras[i].isEmpty()) {
-				t.append(paras[i].getText());
-				t.append('\n');
+				String text = paras[i].getText();
+				if(text != null && text.length() > 0) {
+					t.append(text);
+					t.append('\n');
+				}
 			}
 		}
 		
 		XWPFTable[] tables = getTables();
 		for(int i=0; i<tables.length; i++) {
-			t.append(tables[i].getText());
-			t.append('\n');
+			String text = tables[i].getText();
+			if(text != null && text.length() > 0) {
+				t.append(text);
+				t.append('\n');
+			}
 		}
 		
 		return t.toString(); 

Modified: poi/branches/ooxml/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
URL: http://svn.apache.org/viewvc/poi/branches/ooxml/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java?rev=684276&r1=684275&r2=684276&view=diff
==============================================================================
--- poi/branches/ooxml/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
(original)
+++ poi/branches/ooxml/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
Sat Aug  9 08:08:11 2008
@@ -37,12 +37,22 @@
 	 */
 	private XWPFDocument xmlB;
 	private File fileB;
-	
 	/**
-	 * File with hyperlinks
+	 * With a simplish header+footer
 	 */
 	private XWPFDocument xmlC;
 	private File fileC;
+	/**
+	 * With different header+footer on first/rest
+	 */
+	private XWPFDocument xmlD;
+	private File fileD;
+	
+	/**
+	 * File with hyperlinks
+	 */
+	private XWPFDocument xmlE;
+	private File fileE;
 
 	protected void setUp() throws Exception {
 		super.setUp();
@@ -57,15 +67,27 @@
 		);
 		fileC = new File(
 				System.getProperty("HWPF.testdata.path") +
+				File.separator + "ThreeColHeadFoot.docx"
+		);
+		fileD = new File(
+				System.getProperty("HWPF.testdata.path") +
+				File.separator + "DiffFirstPageHeadFoot.docx"
+		);
+		fileE = new File(
+				System.getProperty("HWPF.testdata.path") +
 				File.separator + "TestDocument.docx"
 		);
 		assertTrue(fileA.exists());
 		assertTrue(fileB.exists());
 		assertTrue(fileC.exists());
+		assertTrue(fileD.exists());
+		assertTrue(fileE.exists());
 		
 		xmlA = new XWPFDocument(POIXMLDocument.openPackage(fileA.toString()));
 		xmlB = new XWPFDocument(POIXMLDocument.openPackage(fileB.toString()));
 		xmlC = new XWPFDocument(POIXMLDocument.openPackage(fileC.toString()));
+		xmlD = new XWPFDocument(POIXMLDocument.openPackage(fileD.toString()));
+		xmlE = new XWPFDocument(POIXMLDocument.openPackage(fileE.toString()));
 	}
 
 	/**
@@ -135,7 +157,7 @@
 	
 	public void testGetWithHyperlinks() throws Exception {
 		XWPFWordExtractor extractor = 
-			new XWPFWordExtractor(xmlC);
+			new XWPFWordExtractor(xmlE);
 		extractor.getText();
 		extractor.setFetchHyperlinks(true);
 		extractor.getText();
@@ -160,4 +182,47 @@
 				extractor.getText()
 		);
 	}
+	
+	public void testHeadersFooters() throws Exception {
+		XWPFWordExtractor extractor = 
+			new XWPFWordExtractor(xmlC);
+		extractor.getText();
+		
+		assertEquals(
+				"First header column!\tMid header\tRight header!\n" +
+				"This is a sample word document. It has two pages. It has a three column heading, and
a three column footer\n" +
+				"\n" +
+				"HEADING TEXT\n" + 
+				"\n" +
+				"More on page one\n" + 
+				"\n\n" + 
+				"End of page 1\n\n" +
+				"This is page two. It also has a three column heading, and a three column footer.\n"
+
+				"Footer Left\tFooter Middle\tFooter Right\n",
+				extractor.getText()
+		);
+		
+		
+		// Now another file, expect multiple headers
+		//  and multiple footers
+		extractor = 
+			new XWPFWordExtractor(xmlD);
+		extractor.getText();
+		
+		assertEquals(
+				"I am the header on the first page, and I" + '\u2019' + "m nice and simple\n" +
+				"First header column!\tMid header\tRight header!\n" +
+				"This is a sample word document. It has two pages. It has a simple header and footer,
which is different to all the other pages.\n" +
+				"\n" +
+				"HEADING TEXT\n" + 
+				"\n" +
+				"More on page one\n" + 
+				"\n\n" + 
+				"End of page 1\n\n" +
+				"This is page two. It also has a three column heading, and a three column footer.\n"
+
+				"The footer of the first page\n" +
+				"Footer Left\tFooter Middle\tFooter Right\n",
+				extractor.getText()
+		);
+	}
 }

Modified: poi/branches/ooxml/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
URL: http://svn.apache.org/viewvc/poi/branches/ooxml/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java?rev=684276&r1=684275&r2=684276&view=diff
==============================================================================
--- poi/branches/ooxml/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
(original)
+++ poi/branches/ooxml/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
Sat Aug  9 08:08:11 2008
@@ -182,12 +182,12 @@
 		policy = oddEven.getHeaderFooterPolicy();
 		
 		assertEquals(
-			"\n[]ODD Page Header text\n\n",
+			"[]ODD Page Header text\n\n",
 			policy.getDefaultHeader().getText()
 		);
 		assertEquals(
-				"\n[This is an Even Page, with a Header]\n\n", 
-				policy.getEvenPageHeader().getText()
+			"[This is an Even Page, with a Header]\n\n", 
+			policy.getEvenPageHeader().getText()
 		);
 	}
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org


Mime
View raw message