poi-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ye...@apache.org
Subject svn commit: r1135414 - in /poi/trunk/src: documentation/content/xdocs/ scratchpad/src/org/apache/poi/hwpf/extractor/
Date Tue, 14 Jun 2011 08:53:01 GMT
Author: yegor
Date: Tue Jun 14 08:53:00 2011
New Revision: 1135414

URL: http://svn.apache.org/viewvc?rev=1135414&view=rev
Log:
Bugzilla 51351: Word to XSL-FO converter

Added:
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/NumberFormatter.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoUtils.java
Modified:
    poi/trunk/src/documentation/content/xdocs/status.xml

Modified: poi/trunk/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/status.xml?rev=1135414&r1=1135413&r2=1135414&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/status.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/status.xml Tue Jun 14 08:53:00 2011
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.8-beta4" date="2011-??-??">
+           <action dev="poi-developers" type="add">51351 - Word to XSL-FO converter</action>
            <action dev="poi-developers" type="add">50458 - Fixed missing shapeId in
XSSF drawings </action>
            <action dev="poi-developers" type="add">51339 - Fixed arithmetic rounding
in formula evaluation </action>
            <action dev="poi-developers" type="add">51356 - Support autoSizeColumn in
SXSSF</action>

Added: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/NumberFormatter.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/NumberFormatter.java?rev=1135414&view=auto
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/NumberFormatter.java (added)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/NumberFormatter.java Tue Jun
14 08:53:00 2011
@@ -0,0 +1,62 @@
+/*
+ *  ====================================================================
+ *    Licensed to the Apache Software Foundation (ASF) under one or more
+ *    contributor license agreements.  See the NOTICE file distributed with
+ *    this work for additional information regarding copyright ownership.
+ *    The ASF licenses this file to You under the Apache License, Version 2.0
+ *    (the "License"); you may not use this file except in compliance with
+ *    the License.  You may obtain a copy of the License at
+ *
+ *        http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ * ====================================================================
+ */
+
+package org.apache.poi.hwpf.extractor;
+
+/**
+ * Comment me
+ *
+ * @author Ryan Ackley
+ */
+public final class NumberFormatter {
+
+    private static String[] C_LETTERS = new String[] { "a", "b", "c", "d", "e", "f", "g",
"h", "i",
+            "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "x", "y", "z"
};
+
+    private static String[] C_ROMAN = new String[] { "i", "ii", "iii", "iv", "v", "vi", "vii",
+            "viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii",
"xix",
+            "xx", "xxi", "xxii", "xxiii", "xxiv", "xxv", "xxvi", "xxvii", "xxviii", "xxix",
"xxx",
+            "xxxi", "xxxii", "xxxiii", "xxxiv", "xxxv", "xxxvi", "xxxvii", "xxxvii", "xxxviii",
+            "xxxix", "xl", "xli", "xlii", "xliii", "xliv", "xlv", "xlvi", "xlvii", "xlviii",
+            "xlix", "l" };
+
+    private final static int T_ARABIC = 0;
+    private final static int T_LOWER_LETTER = 4;
+    private final static int T_LOWER_ROMAN = 2;
+    private final static int T_ORDINAL = 5;
+    private final static int T_UPPER_LETTER = 3;
+    private final static int T_UPPER_ROMAN = 1;
+
+    public static String getNumber(int num, int style) {
+        switch (style) {
+        case T_UPPER_ROMAN:
+            return C_ROMAN[num - 1].toUpperCase();
+        case T_LOWER_ROMAN:
+            return C_ROMAN[num - 1];
+        case T_UPPER_LETTER:
+            return C_LETTERS[num - 1].toUpperCase();
+        case T_LOWER_LETTER:
+            return C_LETTERS[num - 1];
+        case T_ARABIC:
+        case T_ORDINAL:
+        default:
+            return String.valueOf(num);
+        }
+    }
+}

Added: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java?rev=1135414&view=auto
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java (added)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoExtractor.java Tue
Jun 14 08:53:00 2011
@@ -0,0 +1,642 @@
+/*
+ *  ====================================================================
+ *    Licensed to the Apache Software Foundation (ASF) under one or more
+ *    contributor license agreements.  See the NOTICE file distributed with
+ *    this work for additional information regarding copyright ownership.
+ *    The ASF licenses this file to You under the Apache License, Version 2.0
+ *    (the "License"); you may not use this file except in compliance with
+ *    the License.  You may obtain a copy of the License at
+ *
+ *        http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *    Unless required by applicable law or agreed to in writing, software
+ *    distributed under the License is distributed on an "AS IS" BASIS,
+ *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *    See the License for the specific language governing permissions and
+ *    limitations under the License.
+ * ====================================================================
+ */
+
+package org.apache.poi.hwpf.extractor;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.model.ListFormatOverride;
+import org.apache.poi.hwpf.model.ListTables;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.hwpf.usermodel.Section;
+import org.apache.poi.hwpf.usermodel.SectionProperties;
+import org.apache.poi.hwpf.usermodel.Table;
+import org.apache.poi.hwpf.usermodel.TableCell;
+import org.apache.poi.hwpf.usermodel.TableIterator;
+import org.apache.poi.hwpf.usermodel.TableRow;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Text;
+
+import static org.apache.poi.hwpf.extractor.WordToFoUtils.TWIPS_PER_INCH;
+
+/**
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
+public class WordToFoExtractor {
+
+    private static final byte BEL_MARK = 7;
+
+    private static final byte FIELD_BEGIN_MARK = 19;
+
+    private static final byte FIELD_END_MARK = 21;
+
+    private static final byte FIELD_SEPARATOR_MARK = 20;
+
+    private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
+
+    private static HWPFDocument loadDoc(File docFile) throws IOException {
+	final FileInputStream istream = new FileInputStream(docFile);
+	try {
+	    return new HWPFDocument(istream);
+	} finally {
+	    try {
+		istream.close();
+	    } catch (Exception exc) {
+		// no op
+	    }
+	}
+    }
+
+    static Document process(File docFile) throws Exception {
+	final HWPFDocument hwpfDocument = loadDoc(docFile);
+	WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
+		DocumentBuilderFactory.newInstance().newDocumentBuilder()
+			.newDocument());
+	wordToFoExtractor.processDocument(hwpfDocument);
+	return wordToFoExtractor.getDocument();
+    }
+
+    private final Document document;
+
+    private final Element layoutMasterSet;
+
+    private final Element root;
+
+    public WordToFoExtractor(Document document) throws Exception {
+	this.document = document;
+
+	root = document.createElementNS(NS_XSLFO, "fo:root");
+	document.appendChild(root);
+
+	layoutMasterSet = document.createElementNS(NS_XSLFO,
+		"fo:layout-master-set");
+	root.appendChild(layoutMasterSet);
+    }
+
+    protected Element addFlowToPageSequence(final Element pageSequence,
+	    String flowName) {
+	final Element flow = document.createElementNS(NS_XSLFO, "fo:flow");
+	flow.setAttribute("flow-name", flowName);
+	pageSequence.appendChild(flow);
+
+	return flow;
+    }
+
+    protected Element addListItem(Element listBlock) {
+	Element result = createListItem();
+	listBlock.appendChild(result);
+	return result;
+    }
+
+    protected Element addListItemBody(Element listItem) {
+	Element result = createListItemBody();
+	listItem.appendChild(result);
+	return result;
+    }
+
+    protected Element addListItemLabel(Element listItem, String text) {
+	Element result = createListItemLabel(text);
+	listItem.appendChild(result);
+	return result;
+    }
+
+    protected Element addPageSequence(String pageMaster) {
+	final Element pageSequence = document.createElementNS(NS_XSLFO,
+		"fo:page-sequence");
+	pageSequence.setAttribute("master-reference", pageMaster);
+	root.appendChild(pageSequence);
+	return pageSequence;
+    }
+
+    protected Element addRegionBody(Element pageMaster) {
+	final Element regionBody = document.createElementNS(NS_XSLFO,
+		"fo:region-body");
+	pageMaster.appendChild(regionBody);
+
+	return regionBody;
+    }
+
+    protected Element addSimplePageMaster(String masterName) {
+	final Element simplePageMaster = document.createElementNS(NS_XSLFO,
+		"fo:simple-page-master");
+	simplePageMaster.setAttribute("master-name", masterName);
+	layoutMasterSet.appendChild(simplePageMaster);
+
+	return simplePageMaster;
+    }
+
+    protected Element addTable(Element flow) {
+	final Element table = document.createElementNS(NS_XSLFO, "fo:table");
+	flow.appendChild(table);
+	return table;
+    }
+
+    protected Element createBlock() {
+	return document.createElementNS(NS_XSLFO, "fo:block");
+    }
+
+    protected Element createExternalGraphic(String source) {
+	Element result = document.createElementNS(NS_XSLFO,
+		"fo:external-graphic");
+	result.setAttribute("src", "url('" + source + "')");
+	return result;
+    }
+
+    protected Element createInline() {
+	return document.createElementNS(NS_XSLFO, "fo:inline");
+    }
+
+    protected Element createLeader() {
+	return document.createElementNS(NS_XSLFO, "fo:leader");
+    }
+
+    protected Element createListBlock() {
+	return document.createElementNS(NS_XSLFO, "fo:list-block");
+    }
+
+    protected Element createListItem() {
+	return document.createElementNS(NS_XSLFO, "fo:list-item");
+    }
+
+    protected Element createListItemBody() {
+	return document.createElementNS(NS_XSLFO, "fo:list-item-body");
+    }
+
+    protected Element createListItemLabel(String text) {
+	Element result = document.createElementNS(NS_XSLFO,
+		"fo:list-item-label");
+	Element block = createBlock();
+	block.appendChild(document.createTextNode(text));
+	result.appendChild(block);
+	return result;
+    }
+
+    protected String createPageMaster(SectionProperties sep, String type,
+	    int section) {
+	float height = sep.getYaPage() / TWIPS_PER_INCH;
+	float width = sep.getXaPage() / TWIPS_PER_INCH;
+	float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH;
+	float rightMargin = sep.getDxaRight() / TWIPS_PER_INCH;
+	float topMargin = sep.getDyaTop() / TWIPS_PER_INCH;
+	float bottomMargin = sep.getDyaBottom() / TWIPS_PER_INCH;
+
+	// add these to the header
+	String pageMasterName = type + "-page" + section;
+
+	Element pageMaster = addSimplePageMaster(pageMasterName);
+	pageMaster.setAttribute("page-height", height + "in");
+	pageMaster.setAttribute("page-width", width + "in");
+
+	Element regionBody = addRegionBody(pageMaster);
+	regionBody.setAttribute("margin", topMargin + "in " + rightMargin
+		+ "in " + bottomMargin + "in " + leftMargin + "in");
+
+	/*
+	 * 6.4.14 fo:region-body
+	 *
+	 * The values of the padding and border-width traits must be "0".
+	 */
+	// WordToFoUtils.setBorder(regionBody, sep.getBrcTop(), "top");
+	// WordToFoUtils.setBorder(regionBody, sep.getBrcBottom(), "bottom");
+	// WordToFoUtils.setBorder(regionBody, sep.getBrcLeft(), "left");
+	// WordToFoUtils.setBorder(regionBody, sep.getBrcRight(), "right");
+
+	if (sep.getCcolM1() > 0) {
+	    regionBody.setAttribute("column-count", "" + (sep.getCcolM1() + 1));
+	    if (sep.getFEvenlySpaced()) {
+		regionBody.setAttribute("column-gap",
+			(sep.getDxaColumns() / TWIPS_PER_INCH) + "in");
+	    } else {
+		regionBody.setAttribute("column-gap", "0.25in");
+	    }
+	}
+
+	return pageMasterName;
+    }
+
+    protected Element createTableBody() {
+	return document.createElementNS(NS_XSLFO, "fo:table-body");
+    }
+
+    protected Element createTableCell() {
+	return document.createElementNS(NS_XSLFO, "fo:table-cell");
+    }
+
+    protected Element createTableHeader() {
+	return document.createElementNS(NS_XSLFO, "fo:table-header");
+    }
+
+    protected Element createTableRow() {
+	return document.createElementNS(NS_XSLFO, "fo:table-row");
+    }
+
+    protected Text createText(String data) {
+	return document.createTextNode(data);
+    }
+
+    public Document getDocument() {
+	return document;
+    }
+
+    public void processDocument(HWPFDocument hwpfDocument) {
+	final Range range = hwpfDocument.getRange();
+
+	for (int s = 0; s < range.numSections(); s++) {
+	    processSection(hwpfDocument, range.getSection(s), s);
+	}
+    }
+
+    @SuppressWarnings("unused")
+    protected void processImage(Element currentBlock, Picture picture) {
+	// no default implementation -- skip
+    }
+
+    protected void processParagraph(HWPFDocument hwpfDocument,
+	    Element parentFopElement, int currentTableLevel,
+	    Paragraph paragraph, String bulletText) {
+	final Element block = createBlock();
+	parentFopElement.appendChild(block);
+
+	WordToFoUtils.setParagraphProperties(paragraph, block);
+
+	final int charRuns = paragraph.numCharacterRuns();
+
+	if (charRuns == 0) {
+	    return;
+	}
+
+	final String pFontName;
+	final int pFontSize;
+	final boolean pBold;
+	final boolean pItalic;
+	{
+	    CharacterRun characterRun = paragraph.getCharacterRun(0);
+	    pFontSize = characterRun.getFontSize() / 2;
+	    pFontName = characterRun.getFontName();
+	    pBold = characterRun.isBold();
+	    pItalic = characterRun.isItalic();
+	}
+	WordToFoUtils.setFontFamily(block, pFontName);
+	WordToFoUtils.setFontSize(block, pFontSize);
+	WordToFoUtils.setBold(block, pBold);
+	WordToFoUtils.setItalic(block, pItalic);
+
+	StringBuilder lineText = new StringBuilder();
+
+	if (WordToFoUtils.isNotEmpty(bulletText)) {
+	    Element inline = createInline();
+	    block.appendChild(inline);
+
+	    Text textNode = createText(bulletText);
+	    inline.appendChild(textNode);
+
+	    lineText.append(bulletText);
+	}
+
+	for (int c = 0; c < charRuns; c++) {
+	    CharacterRun characterRun = paragraph.getCharacterRun(c);
+
+	    String text = characterRun.text();
+	    if (text.getBytes().length == 0)
+		continue;
+
+	    if (text.getBytes()[0] == FIELD_BEGIN_MARK) {
+		int skipTo = tryImageWithinField(hwpfDocument, paragraph, c,
+			block);
+
+		if (skipTo != c) {
+		    c = skipTo;
+		    continue;
+		}
+		continue;
+	    }
+	    if (text.getBytes()[0] == FIELD_SEPARATOR_MARK) {
+		continue;
+	    }
+	    if (text.getBytes()[0] == FIELD_END_MARK) {
+		continue;
+	    }
+
+	    if (characterRun.isSpecialCharacter() || characterRun.isObj()
+		    || characterRun.isOle2()) {
+		continue;
+	    }
+
+	    Element inline = createInline();
+	    if (characterRun.isBold() != pBold) {
+		WordToFoUtils.setBold(inline, characterRun.isBold());
+	    }
+	    if (characterRun.isItalic() != pItalic) {
+		WordToFoUtils.setItalic(inline, characterRun.isItalic());
+	    }
+	    if (!WordToFoUtils.equals(characterRun.getFontName(), pFontName)) {
+		WordToFoUtils.setFontFamily(inline, characterRun.getFontName());
+	    }
+	    if (characterRun.getFontSize() / 2 != pFontSize) {
+		WordToFoUtils.setFontSize(inline,
+			characterRun.getFontSize() / 2);
+	    }
+	    WordToFoUtils.setCharactersProperties(characterRun, inline);
+	    block.appendChild(inline);
+
+	    if (text.endsWith("\r")
+		    || (text.charAt(text.length() - 1) == BEL_MARK && currentTableLevel != 0))
+		text = text.substring(0, text.length() - 1);
+
+	    Text textNode = createText(text);
+	    inline.appendChild(textNode);
+
+	    lineText.append(text);
+	}
+
+	if (lineText.toString().trim().length() == 0) {
+	    Element leader = createLeader();
+	    block.appendChild(leader);
+	}
+
+	return;
+    }
+
+    protected void processSection(HWPFDocument hwpfDocument, Section section,
+	    int sectionCounter) {
+	String regularPage = createPageMaster(
+		WordToFoUtils.getSectionProperties(section), "page",
+		sectionCounter);
+
+	Element pageSequence = addPageSequence(regularPage);
+	Element flow = addFlowToPageSequence(pageSequence, "xsl-region-body");
+
+	processSectionParagraphes(hwpfDocument, flow, section, 0);
+    }
+
+    protected void processSectionParagraphes(HWPFDocument hwpfDocument,
+	    Element flow, Range range, int currentTableLevel) {
+	final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
+	for (TableIterator tableIterator = WordToFoUtils.newTableIterator(
+		range, currentTableLevel + 1); tableIterator.hasNext();) {
+	    Table next = tableIterator.next();
+	    allTables.put(Integer.valueOf(next.getStartOffset()), next);
+	}
+
+	final ListTables listTables = hwpfDocument.getListTables();
+	int currentListInfo = 0;
+
+	final int paragraphs = range.numParagraphs();
+	for (int p = 0; p < paragraphs; p++) {
+	    Paragraph paragraph = range.getParagraph(p);
+
+	    if (allTables.containsKey(Integer.valueOf(paragraph
+		    .getStartOffset()))) {
+		Table table = allTables.get(Integer.valueOf(paragraph
+			.getStartOffset()));
+		processTable(hwpfDocument, flow, table, currentTableLevel + 1);
+		continue;
+	    }
+
+	    if (paragraph.isInTable()
+		    && paragraph.getTableLevel() != currentTableLevel) {
+		continue;
+	    }
+
+	    if (paragraph.getIlfo() != currentListInfo) {
+		currentListInfo = paragraph.getIlfo();
+	    }
+
+	    if (currentListInfo != 0) {
+		final ListFormatOverride listFormatOverride = listTables
+			.getOverride(paragraph.getIlfo());
+
+		String label = WordToFoUtils.getBulletText(listTables,
+			paragraph, listFormatOverride.getLsid());
+
+		processParagraph(hwpfDocument, flow, currentTableLevel,
+			paragraph, label);
+	    } else {
+		processParagraph(hwpfDocument, flow, currentTableLevel,
+			paragraph, WordToFoUtils.EMPTY);
+	    }
+	}
+
+    }
+
+    protected void processTable(HWPFDocument hwpfDocument, Element flow,
+	    Table table, int thisTableLevel) {
+	Element tableElement = addTable(flow);
+
+	Element tableHeader = createTableHeader();
+	Element tableBody = createTableBody();
+
+	final int tableRows = table.numRows();
+
+	int maxColumns = Integer.MIN_VALUE;
+	for (int r = 0; r < tableRows; r++) {
+	    maxColumns = Math.max(maxColumns, table.getRow(r).numCells());
+	}
+
+	for (int r = 0; r < tableRows; r++) {
+	    TableRow tableRow = table.getRow(r);
+
+	    Element tableRowElement = createTableRow();
+	    WordToFoUtils.setTableRowProperties(tableRow, tableRowElement);
+
+	    final int rowCells = tableRow.numCells();
+	    for (int c = 0; c < rowCells; c++) {
+		TableCell tableCell = tableRow.getCell(c);
+
+		if (tableCell.isMerged() && !tableCell.isFirstMerged())
+		    continue;
+
+		if (tableCell.isVerticallyMerged()
+			&& !tableCell.isFirstVerticallyMerged())
+		    continue;
+
+		Element tableCellElement = createTableCell();
+		WordToFoUtils.setTableCellProperties(tableRow, tableCell,
+			tableCellElement, r == 0, r == tableRows - 1, c == 0,
+			c == rowCells - 1);
+
+		if (tableCell.isFirstMerged()) {
+		    int count = 0;
+		    for (int c1 = c; c1 < rowCells; c1++) {
+			TableCell nextCell = tableRow.getCell(c1);
+			if (nextCell.isMerged())
+			    count++;
+			if (!nextCell.isMerged())
+			    break;
+		    }
+		    tableCellElement.setAttribute("number-columns-spanned", ""
+			    + count);
+		} else {
+		    if (c == rowCells - 1 && c != maxColumns - 1) {
+			tableCellElement.setAttribute("number-columns-spanned",
+				"" + (maxColumns - c));
+		    }
+		}
+
+		if (tableCell.isFirstVerticallyMerged()) {
+		    int count = 0;
+		    for (int r1 = r; r1 < tableRows; r1++) {
+			TableRow nextRow = table.getRow(r1);
+			if (nextRow.numCells() < c)
+			    break;
+			TableCell nextCell = nextRow.getCell(c);
+			if (nextCell.isVerticallyMerged())
+			    count++;
+			if (!nextCell.isVerticallyMerged())
+			    break;
+		    }
+		    tableCellElement.setAttribute("number-rows-spanned", ""
+			    + count);
+		}
+
+		processSectionParagraphes(hwpfDocument, tableCellElement,
+			tableCell, thisTableLevel);
+
+		if (!tableCellElement.hasChildNodes()) {
+		    tableCellElement.appendChild(createBlock());
+		}
+
+		tableRowElement.appendChild(tableCellElement);
+	    }
+
+	    if (tableRow.isTableHeader()) {
+		tableHeader.appendChild(tableRowElement);
+	    } else {
+		tableBody.appendChild(tableRowElement);
+	    }
+	}
+
+	if (tableHeader.hasChildNodes()) {
+	    tableElement.appendChild(tableHeader);
+	}
+	if (tableBody.hasChildNodes()) {
+	    tableElement.appendChild(tableBody);
+	} else {
+	    System.err.println("Table without body");
+	}
+    }
+
+    protected int tryImageWithinField(HWPFDocument hwpfDocument,
+	    Paragraph paragraph, int beginMark, Element currentBlock) {
+	int separatorMark = -1;
+	int pictureMark = -1;
+	int endMark = -1;
+	for (int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++) {
+	    CharacterRun characterRun = paragraph.getCharacterRun(c);
+
+	    String text = characterRun.text();
+	    if (text.getBytes().length == 0)
+		continue;
+
+	    if (text.getBytes()[0] == FIELD_SEPARATOR_MARK) {
+		if (separatorMark != -1) {
+		    // double;
+		    return beginMark;
+		}
+
+		separatorMark = c;
+		continue;
+	    }
+
+	    if (text.getBytes()[0] == FIELD_END_MARK) {
+		if (endMark != -1) {
+		    // double;
+		    return beginMark;
+		}
+
+		endMark = c;
+		break;
+	    }
+
+	    if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) {
+		if (pictureMark != -1) {
+		    // double;
+		    return beginMark;
+		}
+
+		pictureMark = c;
+		continue;
+	    }
+	}
+
+	if (separatorMark == -1 || pictureMark == -1 || endMark == -1)
+	    return beginMark;
+
+	final CharacterRun pictureRun = paragraph.getCharacterRun(pictureMark);
+	final Picture picture = hwpfDocument.getPicturesTable().extractPicture(
+		pictureRun, true);
+	processImage(currentBlock, picture);
+
+	return endMark;
+    }
+
+
+    /**
+     * Java main() interface to interact with WordToFoExtractor
+     *
+     * <p>
+     *     Usage: WordToFoExtractor infile outfile
+     * </p>
+     * Where infile is an input .doc file ( Word 97-2007)
+     * which will be rendered as XSL-FO into outfile
+     *
+     */
+    public static void main(String[] args) {
+        if (args.length < 2) {
+            System.err.println("Usage: WordToFoExtractor <inputFile.doc> <saveTo.fo>");
+            return;
+        }
+
+        System.out.println("Converting " + args[0]);
+        System.out.println("Saving output to " + args[1]);
+        try {
+            Document doc = WordToFoExtractor.process(new File(args[0]));
+
+            FileWriter out = new FileWriter(args[1]);
+            DOMSource domSource = new DOMSource(doc);
+            StreamResult streamResult = new StreamResult(out);
+            TransformerFactory tf = TransformerFactory.newInstance();
+            Transformer serializer = tf.newTransformer();
+            serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");  // TODO set encoding
from a command argument
+            serializer.setOutputProperty(OutputKeys.INDENT, "yes");
+            serializer.transform(domSource, streamResult);
+            out.close();
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+}

Added: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoUtils.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoUtils.java?rev=1135414&view=auto
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoUtils.java (added)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordToFoUtils.java Tue Jun
14 08:53:00 2011
@@ -0,0 +1,443 @@
+package org.apache.poi.hwpf.extractor;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+
+import org.apache.poi.hwpf.model.ListLevel;
+import org.apache.poi.hwpf.model.ListTables;
+import org.apache.poi.hwpf.usermodel.BorderCode;
+import org.apache.poi.hwpf.usermodel.CharacterProperties;
+import org.apache.poi.hwpf.usermodel.CharacterRun;
+import org.apache.poi.hwpf.usermodel.Paragraph;
+import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.hwpf.usermodel.Section;
+import org.apache.poi.hwpf.usermodel.SectionProperties;
+import org.apache.poi.hwpf.usermodel.TableCell;
+import org.apache.poi.hwpf.usermodel.TableIterator;
+import org.apache.poi.hwpf.usermodel.TableRow;
+import org.w3c.dom.Element;
+
+public class WordToFoUtils {
+    static final String EMPTY = "";
+
+    public static final float TWIPS_PER_INCH = 1440.0f;
+
+    public static final int TWIPS_PER_PT = 20;
+
+    static boolean equals(String str1, String str2) {
+	return str1 == null ? str2 == null : str1.equals(str2);
+    }
+
+    public static String getBorderType(BorderCode borderCode) {
+	if (borderCode == null)
+	    throw new IllegalArgumentException("borderCode is null");
+
+	switch (borderCode.getBorderType()) {
+	case 1:
+	case 2:
+	    return "solid";
+	case 3:
+	    return "double";
+	case 5:
+	    return "solid";
+	case 6:
+	    return "dotted";
+	case 7:
+	case 8:
+	    return "dashed";
+	case 9:
+	    return "dotted";
+	case 10:
+	case 11:
+	case 12:
+	case 13:
+	case 14:
+	case 15:
+	case 16:
+	case 17:
+	case 18:
+	case 19:
+	    return "double";
+	case 20:
+	    return "solid";
+	case 21:
+	    return "double";
+	case 22:
+	    return "dashed";
+	case 23:
+	    return "dashed";
+	case 24:
+	    return "ridge";
+	case 25:
+	    return "grooved";
+	default:
+	    return "solid";
+	}
+    }
+
+    public static String getBorderWidth(BorderCode borderCode) {
+	int lineWidth = borderCode.getLineWidth();
+	int pt = lineWidth / 8;
+	int pte = lineWidth - pt * 8;
+
+	StringBuilder stringBuilder = new StringBuilder();
+	stringBuilder.append(pt);
+	stringBuilder.append(".");
+	stringBuilder.append(1000 / 8 * pte);
+	stringBuilder.append("pt");
+	return stringBuilder.toString();
+    }
+
+    public static String getBulletText(ListTables listTables,
+	    Paragraph paragraph, int listId) {
+	final ListLevel listLevel = listTables.getLevel(listId,
+		paragraph.getIlvl());
+
+	if (listLevel.getNumberText() == null)
+	    return EMPTY;
+
+	StringBuffer bulletBuffer = new StringBuffer();
+	char[] xst = listLevel.getNumberText().toCharArray();
+	for (char element : xst) {
+	    if (element < 9) {
+		ListLevel numLevel = listTables.getLevel(listId, element);
+
+		int num = numLevel.getStartAt();
+		bulletBuffer.append(NumberFormatter.getNumber(num,
+			listLevel.getNumberFormat()));
+
+		if (numLevel == listLevel) {
+		    numLevel.setStartAt(numLevel.getStartAt() + 1);
+		}
+
+	    } else {
+		bulletBuffer.append(element);
+	    }
+	}
+
+	byte follow = getIxchFollow(listLevel);
+	switch (follow) {
+	case 0:
+	    bulletBuffer.append("\t");
+	    break;
+	case 1:
+	    bulletBuffer.append(" ");
+	    break;
+	default:
+	    break;
+	}
+
+	return bulletBuffer.toString();
+    }
+
+    public static String getColor(int ico) {
+	switch (ico) {
+	case 1:
+	    return "black";
+	case 2:
+	    return "blue";
+	case 3:
+	    return "cyan";
+	case 4:
+	    return "green";
+	case 5:
+	    return "magenta";
+	case 6:
+	    return "red";
+	case 7:
+	    return "yellow";
+	case 8:
+	    return "white";
+	case 9:
+	    return "darkblue";
+	case 10:
+	    return "darkcyan";
+	case 11:
+	    return "darkgreen";
+	case 12:
+	    return "darkmagenta";
+	case 13:
+	    return "darkred";
+	case 14:
+	    return "darkyellow";
+	case 15:
+	    return "darkgray";
+	case 16:
+	    return "lightgray";
+	default:
+	    return "black";
+	}
+    }
+
+    public static byte getIxchFollow(ListLevel listLevel) {
+	try {
+	    Field field = ListLevel.class.getDeclaredField("_ixchFollow");
+	    field.setAccessible(true);
+	    return ((Byte) field.get(listLevel)).byteValue();
+	} catch (Exception exc) {
+	    throw new Error(exc);
+	}
+    }
+
+    public static String getListItemNumberLabel(int number, int format) {
+
+	if (format != 0)
+	    System.err.println("NYI: toListItemNumberLabel(): " + format);
+
+	return String.valueOf(number);
+    }
+
+    public static SectionProperties getSectionProperties(Section section) {
+	try {
+	    Field field = Section.class.getDeclaredField("_props");
+	    field.setAccessible(true);
+	    return (SectionProperties) field.get(section);
+	} catch (Exception exc) {
+	    throw new Error(exc);
+	}
+    }
+
+    static boolean isEmpty(String str) {
+	return str == null || str.length() == 0;
+    }
+
+    static boolean isNotEmpty(String str) {
+	return !isEmpty(str);
+    }
+
+    public static TableIterator newTableIterator(Range range, int level) {
+	try {
+	    Constructor<TableIterator> constructor = TableIterator.class
+		    .getDeclaredConstructor(Range.class, int.class);
+	    constructor.setAccessible(true);
+	    return constructor.newInstance(range, Integer.valueOf(level));
+	} catch (Exception exc) {
+	    throw new Error(exc);
+	}
+    }
+
+    public static void setBold(final Element element, final boolean bold) {
+	element.setAttribute("font-weight", bold ? "bold" : "normal");
+    }
+
+    public static void setBorder(Element element, BorderCode borderCode,
+	    String where) {
+	if (element == null)
+	    throw new IllegalArgumentException("element is null");
+
+	if (borderCode == null)
+	    return;
+
+	if (isEmpty(where)) {
+	    element.setAttribute("border-style", getBorderType(borderCode));
+	    element.setAttribute("border-color",
+		    getColor(borderCode.getColor()));
+	    element.setAttribute("border-width", getBorderWidth(borderCode));
+	} else {
+	    element.setAttribute("border-" + where + "-style",
+		    getBorderType(borderCode));
+	    element.setAttribute("border-" + where + "-color",
+		    getColor(borderCode.getColor()));
+	    element.setAttribute("border-" + where + "-width",
+		    getBorderWidth(borderCode));
+	}
+    }
+
+    public static void setCharactersProperties(final CharacterRun characterRun,
+	    final Element inline) {
+	final CharacterProperties clonedProperties = characterRun
+		.cloneProperties();
+	StringBuilder textDecorations = new StringBuilder();
+
+	setBorder(inline, clonedProperties.getBrc(), EMPTY);
+
+	if (characterRun.isCapitalized()) {
+	    inline.setAttribute("text-transform", "uppercase");
+	}
+	if (characterRun.isHighlighted()) {
+	    inline.setAttribute("background-color",
+		    getColor(clonedProperties.getIcoHighlight()));
+	}
+	if (characterRun.isStrikeThrough()) {
+	    if (textDecorations.length() > 0)
+		textDecorations.append(" ");
+	    textDecorations.append("line-through");
+	}
+	if (characterRun.isShadowed()) {
+	    inline.setAttribute("text-shadow", characterRun.getFontSize() / 24
+		    + "pt");
+	}
+	if (characterRun.isSmallCaps()) {
+	    inline.setAttribute("font-variant", "small-caps");
+	}
+	if (characterRun.getSubSuperScriptIndex() == 1) {
+	    inline.setAttribute("baseline-shift", "super");
+	    inline.setAttribute("font-size", "smaller");
+	}
+	if (characterRun.getSubSuperScriptIndex() == 2) {
+	    inline.setAttribute("baseline-shift", "sub");
+	    inline.setAttribute("font-size", "smaller");
+	}
+	if (characterRun.getUnderlineCode() > 0) {
+	    if (textDecorations.length() > 0)
+		textDecorations.append(" ");
+	    textDecorations.append("underline");
+	}
+	if (textDecorations.length() > 0) {
+	    inline.setAttribute("text-decoration", textDecorations.toString());
+	}
+    }
+
+    public static void setFontFamily(final Element element,
+	    final String fontFamily) {
+	element.setAttribute("font-family", fontFamily);
+    }
+
+    public static void setFontSize(final Element element, final int fontSize) {
+	element.setAttribute("font-size", String.valueOf(fontSize));
+    }
+
+    public static void setIndent(Paragraph paragraph, Element block) {
+	if (paragraph.getFirstLineIndent() != 0) {
+	    block.setAttribute(
+		    "text-indent",
+		    String.valueOf(paragraph.getFirstLineIndent()
+			    / TWIPS_PER_PT)
+			    + "pt");
+	}
+	if (paragraph.getIndentFromLeft() != 0) {
+	    block.setAttribute(
+		    "start-indent",
+		    String.valueOf(paragraph.getIndentFromLeft() / TWIPS_PER_PT)
+			    + "pt");
+	}
+	if (paragraph.getIndentFromRight() != 0) {
+	    block.setAttribute(
+		    "end-indent",
+		    String.valueOf(paragraph.getIndentFromRight()
+			    / TWIPS_PER_PT)
+			    + "pt");
+	}
+	if (paragraph.getSpacingBefore() != 0) {
+	    block.setAttribute("space-before",
+		    String.valueOf(paragraph.getSpacingBefore() / TWIPS_PER_PT)
+			    + "pt");
+	}
+	if (paragraph.getSpacingAfter() != 0) {
+	    block.setAttribute("space-after",
+		    String.valueOf(paragraph.getSpacingAfter() / TWIPS_PER_PT)
+			    + "pt");
+	}
+    }
+
+    public static void setItalic(final Element element, final boolean italic) {
+	element.setAttribute("font-style", italic ? "italic" : "normal");
+    }
+
+    public static void setJustification(Paragraph paragraph,
+	    final Element element) {
+	final int justification = paragraph.getJustification();
+	switch (justification) {
+	case 0:
+	    element.setAttribute("text-align", "start");
+	    break;
+	case 1:
+	    element.setAttribute("text-align", "center");
+	    break;
+	case 2:
+	    element.setAttribute("text-align", "end");
+	    break;
+	case 3:
+	    element.setAttribute("text-align", "justify");
+	    break;
+	case 4:
+	    element.setAttribute("text-align", "justify");
+	    break;
+	case 5:
+	    element.setAttribute("text-align", "center");
+	    break;
+	case 6:
+	    element.setAttribute("text-align", "left");
+	    break;
+	case 7:
+	    element.setAttribute("text-align", "start");
+	    break;
+	case 8:
+	    element.setAttribute("text-align", "end");
+	    break;
+	case 9:
+	    element.setAttribute("text-align", "justify");
+	    break;
+	}
+    }
+
+    public static void setParagraphProperties(Paragraph paragraph, Element block) {
+	setIndent(paragraph, block);
+	setJustification(paragraph, block);
+
+	setBorder(block, paragraph.getBottomBorder(), "bottom");
+	setBorder(block, paragraph.getLeftBorder(), "left");
+	setBorder(block, paragraph.getRightBorder(), "right");
+	setBorder(block, paragraph.getTopBorder(), "top");
+
+	if (paragraph.pageBreakBefore()) {
+	    block.setAttribute("break-before", "page");
+	}
+
+	block.setAttribute("hyphenate",
+		String.valueOf(paragraph.isAutoHyphenated()));
+
+	if (paragraph.keepOnPage()) {
+	    block.setAttribute("keep-together.within-page", "always");
+	}
+
+	if (paragraph.keepWithNext()) {
+	    block.setAttribute("keep-with-next.within-page", "always");
+	}
+
+	block.setAttribute("linefeed-treatment", "preserve");
+	block.setAttribute("white-space-collapse", "false");
+    }
+
+    public static void setTableCellProperties(TableRow tableRow,
+	    TableCell tableCell, Element element, boolean toppest,
+	    boolean bottomest, boolean leftest, boolean rightest) {
+	element.setAttribute("width", (tableCell.getWidth() / TWIPS_PER_INCH)
+		+ "in");
+	element.setAttribute("padding-start",
+		(tableRow.getGapHalf() / TWIPS_PER_INCH) + "in");
+	element.setAttribute("padding-end",
+		(tableRow.getGapHalf() / TWIPS_PER_INCH) + "in");
+
+	BorderCode top = tableCell.getBrcTop() != null ? tableCell.getBrcTop()
+		: toppest ? tableRow.getTopBorder() : tableRow
+			.getHorizontalBorder();
+	BorderCode bottom = tableCell.getBrcBottom() != null ? tableCell
+		.getBrcBottom() : bottomest ? tableRow.getBottomBorder()
+		: tableRow.getHorizontalBorder();
+
+	BorderCode left = tableCell.getBrcLeft() != null ? tableCell
+		.getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
+		.getVerticalBorder();
+	BorderCode right = tableCell.getBrcRight() != null ? tableCell
+		.getBrcRight() : rightest ? tableRow.getRightBorder()
+		: tableRow.getVerticalBorder();
+
+	setBorder(element, bottom, "bottom");
+	setBorder(element, left, "left");
+	setBorder(element, right, "right");
+	setBorder(element, top, "top");
+    }
+
+    public static void setTableRowProperties(TableRow tableRow,
+	    Element tableRowElement) {
+	if (tableRow.getRowHeight() > 0) {
+	    tableRowElement.setAttribute("height",
+		    (tableRow.getRowHeight() / TWIPS_PER_INCH) + "in");
+	}
+	if (!tableRow.cantSplit()) {
+	    tableRowElement.setAttribute("keep-together", "always");
+	}
+    }
+
+}



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org


Mime
View raw message