corinthia-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pmke...@apache.org
Subject [86/92] [abbrv] incubator-corinthia git commit: Move text package functions into DocFormats
Date Wed, 17 Dec 2014 13:29:36 GMT
Move text package functions into DocFormats

The Word and HTML test code that currently lives in dfutil (but not for
much longer) uses plain text representations of "packages", which are
collections of files e.g. like you might find in a .docx file or a
directory with a HTML file and some images. With the new directory
organisation where we keep tests in the DocFormats tree itself (so as to
not require access to public APIs from an external program), we need
this functionality in the library before we can move the test functions
in.

Currently, the actual test functions which use these plain text
representations are still in dfutil; however they will soon be moved to
the tests directory within the the html and ooxml directories.


Project: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/commit/4701058d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/tree/4701058d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-corinthia/diff/4701058d

Branch: refs/heads/stable
Commit: 4701058dc91b4528f55d920facf030d597e4005e
Parents: 1da91ff
Author: Peter Kelly <peter@uxproductivity.com>
Authored: Wed Dec 10 00:22:00 2014 +0700
Committer: Peter Kelly <peter@uxproductivity.com>
Committed: Wed Dec 10 00:22:00 2014 +0700

----------------------------------------------------------------------
 DocFormats/core/CMakeLists.txt                  |   6 +-
 DocFormats/core/src/lib/DFBuffer.c              |  56 ++
 DocFormats/core/src/lib/DFBuffer.h              |   3 +
 DocFormats/core/src/lib/TextPackage.c           | 163 +++++
 DocFormats/core/src/lib/TextPackage.h           |  41 ++
 DocFormats/core/tests/html/HTMLPlain.c          | 124 ++++
 DocFormats/core/tests/html/HTMLPlain.h          |  26 +
 DocFormats/filters/ooxml/CMakeLists.txt         |   2 +
 DocFormats/filters/ooxml/tests/word/WordPlain.c | 589 ++++++++++++++++
 DocFormats/filters/ooxml/tests/word/WordPlain.h |  26 +
 consumers/dfutil/src/CMakeLists.txt             |   8 +-
 consumers/dfutil/src/Commands.c                 |  54 +-
 consumers/dfutil/src/Commands.h                 |   2 -
 consumers/dfutil/src/Plain.c                    | 689 -------------------
 consumers/dfutil/src/Plain.h                    |  28 -
 consumers/dfutil/src/TestCase.c                 |   3 +-
 consumers/dfutil/src/TestFunctions.c            |   3 +-
 consumers/dfutil/src/TextPackage.c              | 163 -----
 consumers/dfutil/src/TextPackage.h              |  41 --
 consumers/dfutil/src/main.c                     |   3 +-
 20 files changed, 1046 insertions(+), 984 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/DocFormats/core/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/DocFormats/core/CMakeLists.txt b/DocFormats/core/CMakeLists.txt
index a08c0e3..02debe0 100644
--- a/DocFormats/core/CMakeLists.txt
+++ b/DocFormats/core/CMakeLists.txt
@@ -60,6 +60,8 @@ set(GroupSrcHTML
     src/html/DFTidyWrapper.h)
 
 set(GroupTestsHTML
+    tests/html/HTMLPlain.c
+    tests/html/HTMLPlain.h
     tests/html/HTMLTests.c)
 
 set(GroupSrcLib
@@ -82,7 +84,9 @@ set(GroupSrcLib
     src/lib/DFString.h
     src/lib/DFStorage.c
     src/lib/DFZipFile.c
-    src/lib/DFZipFile.h)
+    src/lib/DFZipFile.h
+    src/lib/TextPackage.c
+    src/lib/TextPackage.h)
 
 set(GroupTestsLib
     tests/lib/LibTests.c)

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/DocFormats/core/src/lib/DFBuffer.c
----------------------------------------------------------------------
diff --git a/DocFormats/core/src/lib/DFBuffer.c b/DocFormats/core/src/lib/DFBuffer.c
index 19b67c3..c23b66b 100644
--- a/DocFormats/core/src/lib/DFBuffer.c
+++ b/DocFormats/core/src/lib/DFBuffer.c
@@ -170,3 +170,59 @@ int DFWriteDataToFile(const void *data, size_t len, const char *filename, DFErro
     fclose(file);
     return 1;
 }
+
+// This file isn't really a great place for binaryToString and stringToBinary, but they needed
+// to go somewhere after being moved into the DocFormats library from dfutil. At the time of
+// writing, they're only used for by the test functions - perhaps we can have a TestLib.c file
+// or similar?
+
+char *binaryToString(DFBuffer *input)
+{
+    const char *hexchars = "0123456789ABCDEF";
+    DFBuffer *charBuf = DFBufferNew();
+    for (size_t pos = 0; pos < input->len; pos++) {
+        if ((pos > 0) && (pos % 40 == 0))
+            DFBufferAppendChar(charBuf,'\n');
+        unsigned char hi = ((unsigned char *)input->data)[pos] >> 4;
+        unsigned char lo = ((unsigned char *)input->data)[pos] & 0x0F;
+        DFBufferAppendChar(charBuf,hexchars[hi]);
+        DFBufferAppendChar(charBuf,hexchars[lo]);
+    }
+    if ((input->len % 40) != 0)
+        DFBufferAppendChar(charBuf,'\n');
+    char *result = strdup(charBuf->data);
+
+    DFBufferRelease(charBuf);
+    return result;
+}
+
+DFBuffer *stringToBinary(const char *str)
+{
+    size_t length = strlen(str);
+    DFBuffer *outbuf = DFBufferNew();
+
+    int wantHi = 1;
+    unsigned char hi = 0;
+
+    for (size_t inpos = 0; inpos < length; inpos++) {
+        char c = str[inpos];
+        unsigned char nibble = 0;
+
+        if ((c >= '0') && (c <= '9'))
+            nibble = c - '0';
+        else if ((c >= 'a') && (c <= 'f'))
+            nibble = 10 + (c - 'a');
+        else if ((c >= 'A') && (c <= 'F'))
+            nibble = 10 + (c - 'A');
+        else
+            continue;
+
+        if (wantHi)
+            hi = nibble << 4;
+        else
+            DFBufferAppendChar(outbuf,hi | nibble);
+        wantHi = !wantHi;
+    }
+    
+    return outbuf;
+}

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/DocFormats/core/src/lib/DFBuffer.h
----------------------------------------------------------------------
diff --git a/DocFormats/core/src/lib/DFBuffer.h b/DocFormats/core/src/lib/DFBuffer.h
index fa013d4..5103003 100644
--- a/DocFormats/core/src/lib/DFBuffer.h
+++ b/DocFormats/core/src/lib/DFBuffer.h
@@ -51,4 +51,7 @@ int DFBufferWriteToStorage(DFBuffer *buf, DFStorage *storage, const char *filena
 
 int DFWriteDataToFile(const void *data, size_t len, const char *filename, DFError **error);
 
+char *binaryToString(DFBuffer *input);
+DFBuffer *stringToBinary(const char *str);
+
 #endif

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/DocFormats/core/src/lib/TextPackage.c
----------------------------------------------------------------------
diff --git a/DocFormats/core/src/lib/TextPackage.c b/DocFormats/core/src/lib/TextPackage.c
new file mode 100644
index 0000000..031bb1e
--- /dev/null
+++ b/DocFormats/core/src/lib/TextPackage.c
@@ -0,0 +1,163 @@
+// Copyright 2012-2014 UX Productivity Pty Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "TextPackage.h"
+#include "DFBuffer.h"
+#include "DFString.h"
+#include "DFFilesystem.h"
+#include "DFCommon.h"
+#include <stdlib.h>
+#include <string.h>
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                                                                                //
+//                                           TextPackage                                          //
+//                                                                                                //
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+static TextPackage *TextPackageNew(void)
+{
+    TextPackage *package = (TextPackage *)calloc(1,sizeof(TextPackage));
+    package->retainCount = 1;
+    package->items = DFHashTableNew((DFCopyFunction)strdup,free);
+    package->keys = (char **)calloc(1,sizeof(char *));
+    return package;
+}
+
+TextPackage *TextPackageRetain(TextPackage *package)
+{
+    if (package != NULL)
+        package->retainCount++;
+    return package;
+}
+
+void TextPackageRelease(TextPackage *package)
+{
+    if ((package == NULL) || (--package->retainCount > 0))
+        return;
+
+    for (size_t i = 0; i < package->nkeys; i++)
+        free(package->keys[i]);
+    free(package->keys);
+    DFHashTableRelease(package->items);
+    free(package);
+}
+
+static int processIncludes(TextPackage *package, const char *input, DFBuffer *output, const char *path, DFError **error)
+{
+    int ok = 1;
+    const char **lines = DFStringSplit(input,"\n",0);
+    for (int lineno = 0; lines[lineno] && ok; lineno++) {
+        const char *line = lines[lineno];
+        if (DFStringHasPrefix(line,"#include \"") && DFStringHasSuffix(line,"\"")) {
+            char *inclRelPath = DFSubstring(line,10,strlen(line)-1);
+            char *inclAbsPath = DFAppendPathComponent(path,inclRelPath);
+            char *inclDirName = DFPathDirName(inclAbsPath);
+            char *inclContent = DFStringReadFromFile(inclAbsPath,error);
+            if (inclContent == NULL) {
+                DFErrorFormat(error,"%s: %s",inclRelPath,DFErrorMessage(error));
+                ok = 0;
+            }
+            else if (!processIncludes(package,inclContent,output,inclDirName,error)) {
+                ok = 0;
+            }
+            free(inclRelPath);
+            free(inclAbsPath);
+            free(inclDirName);
+            free(inclContent);
+        }
+        else {
+            DFBufferFormat(output,"%s\n",line);
+        }
+    }
+    free(lines);
+    return ok;
+}
+
+static int parsePackage(TextPackage *package, const char *string, const char *path, DFError **error)
+{
+    DFBuffer *replaced = DFBufferNew();
+    if (!strcmp(path,""))
+        path = ".";
+
+    if (!processIncludes(package,string,replaced,path,error)) {
+        DFBufferRelease(replaced);
+        return 0;
+    }
+
+
+    char *currentKey = strdup("");
+    DFBuffer *currentValue = DFBufferNew();
+    const char **lines = DFStringSplit(replaced->data,"\n",0);
+    for (int lineno = 0; lines[lineno]; lineno++) {
+        const char *line = lines[lineno];
+
+        if (!DFStringHasPrefix(line,"#")) {
+            DFBufferFormat(currentValue,"%s\n",line);
+        }
+        else if (DFStringHasPrefix(line,"#item ")) {
+            package->keys = (char **)realloc(package->keys,(package->nkeys+2)*sizeof(char *));
+            package->keys[package->nkeys++] = strdup(currentKey);
+            package->keys[package->nkeys] = NULL;
+            DFHashTableAdd(package->items,currentKey,currentValue->data);
+            free(currentKey);
+            DFBufferRelease(currentValue);
+            currentKey = DFSubstring(line,6,strlen(line));
+            currentValue = DFBufferNew();
+        }
+        else if (DFStringHasPrefix(line,"##")) {
+            DFBufferFormat(currentValue,"%s\n",&line[1]);
+        }
+        else {
+            DFErrorFormat(error,"Unknown command: %s on line %d",line,(lineno+1));
+            return 0;
+        }
+    }
+    package->keys = (char **)realloc(package->keys,(package->nkeys+2)*sizeof(char *));
+    package->keys[package->nkeys++] = strdup(currentKey);
+    package->keys[package->nkeys] = NULL;
+    DFHashTableAdd(package->items,currentKey,currentValue->data);
+
+    free(lines);
+    free(currentKey);
+    DFBufferRelease(currentValue);
+    DFBufferRelease(replaced);
+    return 1;
+}
+
+TextPackage *TextPackageNewWithFile(const char *filename, DFError **error)
+{
+    char *contents = DFStringReadFromFile(filename,error);
+    if (contents == NULL) {
+        DFErrorFormat(error,"%s: %s",filename,DFErrorMessage(error));
+        return NULL;
+    }
+
+    char *path = DFPathDirName(filename);
+    TextPackage *result = TextPackageNewWithString(contents,path,error);
+    free(path);
+    free(contents);
+    return result;
+}
+
+TextPackage *TextPackageNewWithString(const char *string, const char *path, DFError **error)
+{
+    TextPackage *package = TextPackageNew();
+    if (!parsePackage(package,string,path,error)) {
+        TextPackageRelease(package);
+        return NULL;
+    }
+    else
+        return package;
+}

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/DocFormats/core/src/lib/TextPackage.h
----------------------------------------------------------------------
diff --git a/DocFormats/core/src/lib/TextPackage.h b/DocFormats/core/src/lib/TextPackage.h
new file mode 100644
index 0000000..077f84e
--- /dev/null
+++ b/DocFormats/core/src/lib/TextPackage.h
@@ -0,0 +1,41 @@
+// Copyright 2012-2014 UX Productivity Pty Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef dfutil_TextPackage_h
+#define dfutil_TextPackage_h
+
+#include <DocFormats/DFError.h>
+#include "DFHashTable.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+//                                                                                                //
+//                                           TextPackage                                          //
+//                                                                                                //
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+typedef struct TextPackage TextPackage;
+
+struct TextPackage {
+    size_t retainCount;
+    char **keys;
+    size_t nkeys;
+    DFHashTable *items;
+};
+
+TextPackage *TextPackageNewWithFile(const char *filename, DFError **error);
+TextPackage *TextPackageNewWithString(const char *string, const char *path, DFError **error);
+TextPackage *TextPackageRetain(TextPackage *package);
+void TextPackageRelease(TextPackage *package);
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/DocFormats/core/tests/html/HTMLPlain.c
----------------------------------------------------------------------
diff --git a/DocFormats/core/tests/html/HTMLPlain.c b/DocFormats/core/tests/html/HTMLPlain.c
new file mode 100644
index 0000000..8e00e11
--- /dev/null
+++ b/DocFormats/core/tests/html/HTMLPlain.c
@@ -0,0 +1,124 @@
+// Copyright 2012-2014 UX Productivity Pty Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "HTMLPlain.h"
+#include "TextPackage.h"
+#include "DFXML.h"
+#include "DFHashTable.h"
+#include "DFString.h"
+#include "DFFilesystem.h"
+#include "DFHTML.h"
+#include "DFCommon.h"
+#include "DFZipFile.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static void HTML_getImageSourcesRecursive(DFNode *node, DFHashTable *result)
+{
+    if (node->tag == HTML_IMG) {
+        const char *src = DFGetAttribute(node,HTML_SRC);
+        if (src != NULL)
+            DFHashTableAdd(result,src,"");
+    }
+
+    for (DFNode *child = node->first; child != NULL; child = child->next)
+        HTML_getImageSourcesRecursive(child,result);
+}
+
+static const char **HTML_getImageSources(DFDocument *doc)
+{
+    DFHashTable *set = DFHashTableNew(NULL,NULL);
+    HTML_getImageSourcesRecursive(doc->docNode,set);
+    const char **result = DFHashTableCopyKeys(set);
+    DFHashTableRelease(set);
+    return result;
+}
+
+char *HTML_toPlain(DFDocument *doc, DFStorage *storage, DFError **error)
+{
+    DFBuffer *output = DFBufferNew();
+    char *docStr = DFSerializeXMLString(doc,0,0);
+    DFBufferFormat(output,"%s",docStr);
+    free(docStr);
+    const char **imageSources = HTML_getImageSources(doc);
+    DFSortStringsCaseInsensitive(imageSources);
+    for (size_t i = 0; imageSources[i]; i++) {
+        const char *src = imageSources[i];
+        if (DFStringHasPrefix(src,"images/"))
+            DFBufferFormat(output,"#item %s\n",src);
+        else
+            DFBufferFormat(output,"#item images/%s\n",src);
+        DFBuffer *imageData = DFBufferReadFromStorage(storage,src,error);
+        if (imageData == NULL) {
+            DFErrorFormat(error,"%s: %s",src,DFErrorMessage(error));
+            return NULL;
+        }
+        char *imageStr = binaryToString(imageData);
+        DFBufferFormat(output,"%s",imageStr);
+        free(imageStr);
+        DFBufferRelease(imageData);
+    }
+    free(imageSources);
+
+    char *str = strdup(output->data);
+    DFBufferRelease(output);
+    return str;
+}
+
+static DFDocument *HTML_fromTextPackage(TextPackage *textPackage, DFStorage *htmlStorage, DFError **error)
+{
+    const char *html = DFHashTableLookup(textPackage->items,"");
+    if (html == NULL) {
+        DFErrorFormat(error,"No HTML data");
+        return NULL;
+    }
+
+    DFDocument *doc = DFParseHTMLString(html,0,error);
+    if (doc == NULL)
+        return NULL;
+
+    for (size_t ki = 0; ki < textPackage->nkeys; ki++) {
+        const char *key = textPackage->keys[ki];
+        if (strlen(key) == 0)
+            continue;
+
+        int ok = 1;
+
+        const char *str = DFHashTableLookup(textPackage->items,key);
+        DFBuffer *data = stringToBinary(str);
+        if (!DFBufferWriteToStorage(data,htmlStorage,key,error)) {
+            DFErrorFormat(error,"%s: %s",key,DFErrorMessage(error));
+            DFDocumentRelease(doc);
+            ok = 0;
+        }
+
+        DFBufferRelease(data);
+
+        if (!ok)
+            return NULL;
+    }
+
+    return doc;
+}
+
+DFDocument *HTML_fromPlain(const char *plain, const char *path, DFStorage *htmlStorage, DFError **error)
+{
+    TextPackage *textPackage = TextPackageNewWithString(plain,path,error);
+    if (textPackage == NULL)
+        return NULL;;
+    DFDocument *result = HTML_fromTextPackage(textPackage,htmlStorage,error);
+    TextPackageRelease(textPackage);
+    return result;
+}

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/DocFormats/core/tests/html/HTMLPlain.h
----------------------------------------------------------------------
diff --git a/DocFormats/core/tests/html/HTMLPlain.h b/DocFormats/core/tests/html/HTMLPlain.h
new file mode 100644
index 0000000..4d4fb74
--- /dev/null
+++ b/DocFormats/core/tests/html/HTMLPlain.h
@@ -0,0 +1,26 @@
+// Copyright 2012-2014 UX Productivity Pty Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DocFormats_HTMLPlain_h
+#define DocFormats_HTMLPlain_h
+
+#include <DocFormats/DFError.h>
+#include <DocFormats/DFStorage.h>
+#include "DFHashTable.h"
+#include "DFDOM.h"
+
+char *HTML_toPlain(DFDocument *doc, DFStorage *storage, DFError **error);
+DFDocument *HTML_fromPlain(const char *plain, const char *path, DFStorage *htmlStorage, DFError **error);
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/DocFormats/filters/ooxml/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/DocFormats/filters/ooxml/CMakeLists.txt b/DocFormats/filters/ooxml/CMakeLists.txt
index 37dc8b1..d5fcfce 100644
--- a/DocFormats/filters/ooxml/CMakeLists.txt
+++ b/DocFormats/filters/ooxml/CMakeLists.txt
@@ -95,6 +95,8 @@ set(GroupOOXMLWordLenses
     src/word/lenses/WordTable.c)
 
 set(GroupOOXMLWordTests
+    tests/word/WordPlain.c
+    tests/word/WordPlain.h
     tests/word/WordTests.c)
 
 add_library(ooxml OBJECT

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/DocFormats/filters/ooxml/tests/word/WordPlain.c
----------------------------------------------------------------------
diff --git a/DocFormats/filters/ooxml/tests/word/WordPlain.c b/DocFormats/filters/ooxml/tests/word/WordPlain.c
new file mode 100644
index 0000000..b9a2f0c
--- /dev/null
+++ b/DocFormats/filters/ooxml/tests/word/WordPlain.c
@@ -0,0 +1,589 @@
+// Copyright 2012-2014 UX Productivity Pty Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "WordPlain.h"
+#include "TextPackage.h"
+#include "OPC.h"
+#include "WordConverter.h"
+#include "DFXML.h"
+#include "DFHashTable.h"
+#include "DFString.h"
+#include "DFFilesystem.h"
+#include "DFHTML.h"
+#include "DFCommon.h"
+#include "DFZipFile.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static void addStrippedSerializedDoc(DFBuffer *result, DFDocument *doc, const char *filename)
+{
+    if (doc != NULL) {
+        DFStripWhitespace(doc->docNode);
+        char *str = DFSerializeXMLString(doc,0,1);
+        DFBufferFormat(result,"#item %s\n",filename);
+        DFBufferFormat(result,"%s",str);
+        free(str);
+    }
+}
+
+static void addSerializedDoc(DFBuffer *result, DFDocument *doc, const char *filename)
+{
+    if (doc != NULL) {
+        char *str = DFSerializeXMLString(doc,0,1);
+        DFBufferFormat(result,"#item %s\n",filename);
+        DFBufferFormat(result,"%s",str);
+        free(str);
+    }
+}
+
+static void addSerializedBinary(DFBuffer *result, DFBuffer *data, const char *filename)
+{
+    if (data != NULL) {
+        char *str = binaryToString(data);
+        DFBufferFormat(result,"#item %s\n",filename);
+        DFBufferFormat(result,"%s",str);
+        free(str);
+    }
+}
+
+static char *findDocumentPath(DFStorage *storage, DFError **error)
+{
+    int ok = 0;
+    DFDocument *relsDoc = NULL;
+    char *result = NULL;
+
+    relsDoc = DFParseXMLStorage(storage,"/_rels/.rels",error);
+    if (relsDoc == NULL) {
+        DFErrorFormat(error,"_rels/.rels: %s",DFErrorMessage(error));
+        goto end;
+    }
+
+    for (DFNode *child = relsDoc->root->first; child != NULL; child = child->next) {
+        if (child->tag != REL_RELATIONSHIP)
+            continue;
+
+        const char *type = DFGetAttribute(child,NULL_Type);
+        const char *target = DFGetAttribute(child,NULL_TARGET);
+        if ((type == NULL) || (target == NULL))
+            continue;
+
+        if (strcmp(type,"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"))
+            continue;
+
+        result = strdup(target);
+        ok = 1;
+        break;
+    }
+
+end:
+    DFDocumentRelease(relsDoc);
+    if (ok)
+        return result;
+    free(result);
+    return NULL;
+}
+
+static char *computeDocumentRelsPath(const char *documentPath)
+{
+    char *documentParent = DFPathDirName(documentPath);
+    char *documentFilename = DFPathBaseName(documentPath);
+    char *documentRelsPath = DFFormatString("%s/_rels/%s.rels",documentParent,documentFilename);
+    free(documentParent);
+    free(documentFilename);
+    return documentRelsPath;
+}
+
+static void parseDocumentRels(DFDocument *relsDoc, DFHashTable *rels, DFError **error)
+{
+    if (relsDoc == NULL)
+        return;
+    for (DFNode *child = relsDoc->root->first; child != NULL; child = child->next) {
+        if (child->tag != REL_RELATIONSHIP)
+            continue;
+        const char *type = DFGetAttribute(child,NULL_Type);
+        const char *target = DFGetAttribute(child,NULL_TARGET);
+        if ((type == NULL) || (target == NULL))
+            continue;
+
+        DFHashTableAdd(rels,type,target);
+    }
+}
+
+static int addRelatedDoc(DFHashTable *parts, DFHashTable *documentRels, const char *relName, const char *filename,
+                         DFBuffer *output, DFHashTable *includeTypes, DFStorage *storage, DFError **error)
+{
+    const char *relPath = DFHashTableLookup(documentRels,relName);
+    if (relPath == NULL)
+        return 1;;
+
+    DFDocument *doc = DFParseXMLStorage(storage,relPath,error);
+    if (doc == NULL) {
+        DFErrorFormat(error,"%s: %s",relPath,DFErrorMessage(error));
+        return 0;
+    }
+
+    if (doc->root->first != NULL) {
+        addStrippedSerializedDoc(output,doc,filename);
+        DFHashTableAdd(includeTypes,relName,"");
+    }
+
+    DFDocumentRelease(doc);
+    return 1;
+}
+
+static int processParts(DFHashTable *parts, const char *documentPath, DFDocument *relsDoc,
+                        DFHashTable *documentRels,
+                        DFBuffer *output, DFStorage *storage, DFError **error)
+{
+    int ok = 0;
+    DFHashTable *includeTypes = DFHashTableNew((DFCopyFunction)strdup,free);
+    DFHashTableAdd(includeTypes,WORDREL_HYPERLINK,"");
+    DFHashTableAdd(includeTypes,WORDREL_IMAGE,"");
+
+    if ((parts == NULL) || (DFHashTableLookup(parts,"document") != NULL)) {
+        DFDocument *doc = DFParseXMLStorage(storage,documentPath,error);
+        if (doc == NULL)
+            goto end;
+        addStrippedSerializedDoc(output,doc,"document.xml");
+        DFDocumentRelease(doc);
+    }
+
+    if ((parts == NULL) || (DFHashTableLookup(parts,"styles") != NULL)) {
+        if (!addRelatedDoc(parts,documentRels,WORDREL_STYLES,"styles.xml",output,includeTypes,storage,error))
+            goto end;
+    }
+    if ((parts == NULL) || (DFHashTableLookup(parts,"numbering") != NULL)) {
+        if (!addRelatedDoc(parts,documentRels,WORDREL_NUMBERING,"numbering.xml",output,includeTypes,storage,error))
+            goto end;
+    }
+    if ((parts == NULL) || (DFHashTableLookup(parts,"footnotes") != NULL)) {
+        if (!addRelatedDoc(parts,documentRels,WORDREL_FOOTNOTES,"footnotes.xml",output,includeTypes,storage,error))
+            goto end;
+    }
+    if ((parts == NULL) || (DFHashTableLookup(parts,"endnotes") != NULL)) {
+        if (!addRelatedDoc(parts,documentRels,WORDREL_ENDNOTES,"endnotes.xml",output,includeTypes,storage,error))
+            goto end;
+    }
+    if ((parts != NULL) && (DFHashTableLookup(parts,"settings") != NULL)) {
+        if (!addRelatedDoc(parts,documentRels,WORDREL_SETTINGS,"settings.xml",output,includeTypes,storage,error))
+            goto end;
+    }
+    if ((parts != NULL) && (DFHashTableLookup(parts,"theme") != NULL)) {
+        if (!addRelatedDoc(parts,documentRels,WORDREL_THEME,"theme.xml",output,includeTypes,storage,error))
+            goto end;
+    }
+
+    if ((DFHashTableLookup(documentRels,WORDREL_HYPERLINK) != NULL) ||
+        (DFHashTableLookup(documentRels,WORDREL_IMAGE) != NULL) ||
+        ((parts != NULL) && (DFHashTableLookup(parts,"documentRels") != NULL))) {
+        if (relsDoc == NULL) {
+            DFErrorFormat(error,"document.xml.rels does not exist");
+            goto end;
+        }
+        DFNode *next;
+        for (DFNode *child = relsDoc->root->first; child != NULL; child = next) {
+            next = child->next;
+            if (child->tag != REL_RELATIONSHIP)
+                continue;
+            const char *type = DFGetAttribute(child,NULL_Type);
+            if ((type != NULL) && (DFHashTableLookup(includeTypes,type) == NULL)) {
+                DFRemoveNode(child);
+            }
+        }
+        addSerializedDoc(output,relsDoc,"document.xml.rels");
+    }
+
+    const char **entries = DFStorageList(storage,NULL);
+    if (entries != NULL) { // FIXME: Should really report an error if this is not the case
+        for (int i = 0; entries[i]; i++) {
+            const char *filename = entries[i];
+            char *extension = DFPathExtension(filename);
+            if (DFStringEqualsCI(extension,"png") || DFStringEqualsCI(extension,"jpg")) {
+                char *absFilename;
+                if (!DFStringHasSuffix(filename,"/"))
+                    absFilename = DFFormatString("/%s",filename);
+                else
+                    absFilename = strdup(filename);
+                DFBuffer *data = DFBufferReadFromStorage(storage,absFilename,NULL);
+                addSerializedBinary(output,data,absFilename);
+                DFBufferRelease(data);
+                free(absFilename);
+            }
+            free(extension);
+        }
+    }
+    free(entries);
+    DFHashTableRelease(includeTypes);
+
+    ok = 1;
+
+end:
+    return ok;
+}
+
+static char *Word_toPlainFromDir(DFStorage *storage, DFHashTable *parts, DFError **error)
+{
+    char *documentPath = NULL;
+    DFHashTable *rels = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
+    DFBuffer *output = DFBufferNew();
+    char *relsPathRel = NULL;
+    DFDocument *relsDoc = NULL;
+    int ok = 0;
+
+
+    documentPath = findDocumentPath(storage,error);
+    if (documentPath == NULL) {
+        DFErrorFormat(error,"findDocumentPath: %s",DFErrorMessage(error));
+        goto end;
+    }
+
+    relsPathRel = computeDocumentRelsPath(documentPath);
+    if (DFStorageExists(storage,relsPathRel) && ((relsDoc = DFParseXMLStorage(storage,relsPathRel,error)) == NULL)) {
+        DFErrorFormat(error,"%s: %s",relsPathRel,DFErrorMessage(error));
+        goto end;
+    }
+
+    parseDocumentRels(relsDoc,rels,error);
+
+    if (!processParts(parts,documentPath,relsDoc,rels,output,storage,error))
+        goto end;
+
+    ok = 1;
+
+end:
+    free(relsPathRel);
+    free(documentPath);
+    DFHashTableRelease(rels);
+    DFDocumentRelease(relsDoc);
+    if (!ok) {
+        DFBufferRelease(output);
+        return NULL;
+    }
+    else {
+        char *result = strdup(output->data);
+        DFBufferRelease(output);
+        return result;
+    }
+}
+
+char *Word_toPlain(DFStorage *rawStorage, DFHashTable *parts)
+{
+    DFError *error = NULL;
+    char *result = Word_toPlainFromDir(rawStorage,parts,&error);
+    if (result == NULL) {
+        result = DFFormatString("%s\n",DFErrorMessage(&error));
+        DFErrorRelease(error);
+    }
+    return result;
+}
+
+static int saveXMLDocument(DFStorage *storage, const char *filename, DFDocument *doc, NamespaceID defaultNS, DFError **error)
+{
+    char *parentPath = DFPathDirName(filename);
+    int ok = 0;
+
+    if (!DFSerializeXMLStorage(doc,defaultNS,0,storage,filename,error)) {
+        DFErrorFormat(error,"serialize %s: %s",filename,DFErrorMessage(error));
+        goto end;
+    }
+
+    ok = 1;
+
+end:
+    free(parentPath);
+    return ok;
+}
+
+static int saveStrippedXMLText(DFStorage *storage, const char *filename,
+                               const char *input, NamespaceID defaultNS, DFError **error)
+{
+    DFDocument *doc = DFParseXMLString(input,error);
+    if (doc == NULL)
+        return 0;
+    DFStripWhitespace(doc->docNode);
+    int ok = saveXMLDocument(storage,filename,doc,defaultNS,error);
+    DFDocumentRelease(doc);
+    return ok;
+}
+
+typedef struct PartInfo {
+    const char *filename;
+    const char *path;
+    const char *rel;
+    const char *type;
+} PartInfo;
+
+static int saveContentTypes(DFStorage *storage, DFHashTable *ctDefaults, DFHashTable *ctOverrides, DFError **error)
+{
+    DFDocument *doc = DFDocumentNewWithRoot(CT_TYPES);
+
+    const char **keys = DFHashTableCopyKeys(ctDefaults);
+    DFSortStringsCaseInsensitive(keys);
+    for (int i = 0; keys[i]; i++) {
+        const char *extension = keys[i];
+        const char *contentType = DFHashTableLookup(ctDefaults,extension);
+        DFNode *deflt = DFCreateChildElement(doc->root,CT_DEFAULT);
+        DFSetAttribute(deflt,NULL_EXTENSION,extension);
+        DFSetAttribute(deflt,NULL_CONTENTTYPE,contentType);
+    }
+    free(keys);
+    keys = DFHashTableCopyKeys(ctOverrides);
+    DFSortStringsCaseInsensitive(keys);
+    for (int i = 0; keys[i]; i++) {
+        const char *partName = keys[i];
+        const char *contentType = DFHashTableLookup(ctOverrides,partName);
+        DFNode *override = DFCreateChildElement(doc->root,CT_OVERRIDE);
+        DFSetAttribute(override,NULL_PARTNAME,partName);
+        DFSetAttribute(override,NULL_CONTENTTYPE,contentType);
+    }
+    free(keys);
+
+    int ok = saveXMLDocument(storage,"[Content_Types].xml",doc,NAMESPACE_CT,error);
+    DFDocumentRelease(doc);
+    return ok;
+}
+
+static int saveDocRels(DFStorage *storage,
+                       DFHashTable *docRelURIs,
+                       DFHashTable *docRelTypes,
+                       DFHashTable *docRelModes,
+                       DFError **error)
+{
+    if (DFHashTableCount(docRelURIs) == 0)
+        return 1;;
+
+    DFDocument *doc = DFDocumentNewWithRoot(REL_RELATIONSHIPS);
+
+    const char **sortedIds = DFHashTableCopyKeys(docRelURIs);
+    DFSortStringsCaseInsensitive(sortedIds);
+    for (int i = 0; sortedIds[i]; i++) {
+        const char *rId = sortedIds[i];
+        const char *URI = DFHashTableLookup(docRelURIs,rId);
+        const char *type = DFHashTableLookup(docRelTypes,rId);
+        const char *mode = DFHashTableLookup(docRelModes,rId); // may be NULL
+        DFNode *child = DFCreateChildElement(doc->root,REL_RELATIONSHIP);
+        DFSetAttribute(child,NULL_Id,rId);
+        DFSetAttribute(child,NULL_Type,type);
+        DFSetAttribute(child,NULL_TARGET,URI);
+        DFSetAttribute(child,NULL_TARGETMODE,mode);
+    }
+    free(sortedIds);
+
+    int ok = saveXMLDocument(storage,"/word/_rels/document.xml.rels",doc,NAMESPACE_REL,error);
+    DFDocumentRelease(doc);
+    return ok;
+}
+
+static int saveRootRels(DFStorage *storage, DFError **error)
+{
+    DFDocument *doc = DFDocumentNewWithRoot(REL_RELATIONSHIPS);
+    DFNode *rel = DFCreateChildElement(doc->root,REL_RELATIONSHIP);
+    DFSetAttribute(rel,NULL_Id,"rId1");
+    DFSetAttribute(rel,NULL_Type,"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument");
+    DFSetAttribute(rel,NULL_TARGET,"/word/document.xml");
+    int ok = saveXMLDocument(storage,"/_rels/.rels",doc,NAMESPACE_REL,error);
+    DFDocumentRelease(doc);
+    return ok;
+}
+
+static int Word_fromStorage(TextPackage *tp, DFStorage *storage, DFError **error)
+{
+    PartInfo parts[7] = {
+        { "numbering.xml", "/word/numbering.xml", WORDREL_NUMBERING, WORDTYPE_NUMBERING },
+        { "styles.xml", "/word/styles.xml", WORDREL_STYLES, WORDTYPE_STYLES },
+        { "settings.xml", "/word/settings.xml", WORDREL_SETTINGS, WORDTYPE_SETTINGS },
+        { "theme.xml", "/word/theme.xml", WORDREL_THEME, WORDTYPE_THEME },
+        { "footnotes.xml", "/word/footnotes.xml", WORDREL_FOOTNOTES, WORDTYPE_FOOTNOTES },
+        { "endnotes.xml", "/word/endnotes.xml", WORDREL_ENDNOTES, WORDTYPE_ENDNOTES },
+        { NULL, NULL, NULL, NULL },
+    };
+
+    int ok = 0;
+
+    const char *documentStr = DFHashTableLookup(tp->items,"document.xml");
+    const char **allFilenames = NULL;
+    DFHashTable *ctDefaults = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
+    DFHashTable *ctOverrides = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
+    DFHashTable *docRelURIs = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
+    DFHashTable *docRelTypes = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
+    DFHashTable *docRelModes = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
+
+
+    if (documentStr == NULL) {
+        DFErrorFormat(error,"No document.xml");
+        goto end;
+    }
+
+    DFHashTableAdd(ctDefaults,"rels","application/vnd.openxmlformats-package.relationships+xml");
+    DFHashTableAdd(ctOverrides,"/word/document.xml",
+                   "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml");
+
+
+    if (documentStr != NULL) {
+        if (!saveStrippedXMLText(storage,"/word/document.xml",documentStr,NAMESPACE_NULL,error))
+            goto end;
+    }
+
+    int rIdNext = 1;
+    for (int i = 0; parts[i].filename; i++) {
+        const char *content = DFHashTableLookup(tp->items,parts[i].filename);
+        if (content == NULL)
+            continue;
+
+        if (!saveStrippedXMLText(storage,parts[i].path,content,NAMESPACE_NULL,error))
+            goto end;
+
+        char rIdStr[100];
+        snprintf(rIdStr,100,"rId%d",rIdNext++);
+        DFHashTableAdd(docRelURIs,rIdStr,parts[i].path);
+        DFHashTableAdd(docRelTypes,rIdStr,parts[i].rel);
+        DFHashTableAdd(ctOverrides,parts[i].path,parts[i].type);
+    }
+
+    allFilenames = DFHashTableCopyKeys(tp->items);
+    for (int i = 0; allFilenames[i]; i++) {
+        const char *curFilename = allFilenames[i];
+        char *ext = DFPathExtension(curFilename);
+
+        int isImage = 0;
+
+        if (DFStringEqualsCI(ext,"png")) {
+            DFHashTableAdd(ctDefaults,"png","image/png");
+            isImage = 1;
+        }
+
+        if (DFStringEqualsCI(ext,"jpg")) {
+            DFHashTableAdd(ctDefaults,"jpg","image/png");
+            isImage = 1;
+        }
+
+        if (DFStringEqualsCI(ext,"jpeg")) {
+            DFHashTableAdd(ctDefaults,"jpeg","image/png");
+            isImage = 1;
+        }
+
+        free(ext);
+
+        if (isImage) {
+            const char *str = DFHashTableLookup(tp->items,curFilename);
+            char *parentRel = DFPathDirName(curFilename);
+            int fileok = 1;
+
+            DFBuffer *data = stringToBinary(str);
+            if (!DFBufferWriteToStorage(data,storage,curFilename,error)) {
+                DFErrorFormat(error,"%s: %s",curFilename,DFErrorMessage(error));
+                fileok = 0;
+            }
+
+            DFBufferRelease(data);
+            free(parentRel);
+
+            if (!fileok)
+                goto end;
+        }
+    }
+
+    if (!saveContentTypes(storage,ctDefaults,ctOverrides,error)) {
+        DFErrorFormat(error,"saveContentTypes: %s",DFErrorMessage(error));
+        goto end;
+    }
+
+    const char *relsStr = DFHashTableLookup(tp->items,"document.xml.rels");
+    if (relsStr != NULL) {
+        DFDocument *doc = DFParseXMLString(relsStr,error);
+        if (doc == NULL)
+            goto end;
+
+        DFHashTableRelease(docRelURIs);
+        DFHashTableRelease(docRelTypes);
+        DFHashTableRelease(docRelModes);
+        docRelURIs = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
+        docRelTypes = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
+        docRelModes = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
+
+        for (DFNode *child = doc->root->first; child != NULL; child = child->next) {
+            if (child->tag == REL_RELATIONSHIP) {
+                const char *rId = DFGetAttribute(child,NULL_Id);
+                const char *type = DFGetAttribute(child,NULL_Type);
+                const char *target = DFGetAttribute(child,NULL_TARGET);
+                const char *mode = DFGetAttribute(child,NULL_TARGETMODE);
+
+                if ((rId != NULL) && (type != NULL) && (target != NULL)) {
+                    DFHashTableAdd(docRelURIs,rId,target);
+                    DFHashTableAdd(docRelTypes,rId,type);
+                    if (mode != NULL)
+                        DFHashTableAdd(docRelModes,rId,mode);
+                }
+            }
+        }
+
+        DFDocumentRelease(doc);
+    }
+
+    if (!saveDocRels(storage,docRelURIs,docRelTypes,docRelModes,error)) {
+        DFErrorFormat(error,"saveDocRels: %s",DFErrorMessage(error));
+        goto end;
+    }
+
+    if (!saveRootRels(storage,error)) {
+        DFErrorFormat(error,"saveRootRels: %s",DFErrorMessage(error));
+        goto end;
+    }
+
+    ok = 1;
+
+end:
+    DFHashTableRelease(ctDefaults);
+    DFHashTableRelease(ctOverrides);
+    DFHashTableRelease(docRelURIs);
+    DFHashTableRelease(docRelTypes);
+    DFHashTableRelease(docRelModes);
+    free(allFilenames);
+    return ok;
+}
+
+DFStorage *Word_fromPlain(const char *plain, const char *plainPath, DFError **error)
+{
+    int ok = 0;
+    DFStorage *concreteStorage = NULL;
+    TextPackage *textPackage = NULL;
+
+    textPackage = TextPackageNewWithString(plain,plainPath,error);
+    if (textPackage == NULL)
+        goto end;
+
+    concreteStorage = DFStorageNewMemory(DFFileFormatDocx);
+
+    if (!Word_fromStorage(textPackage,concreteStorage,error)) {
+        DFErrorFormat(error,"Word_fromStorage: %s",DFErrorMessage(error));
+        printf("%s\n",DFErrorMessage(error));
+        goto end;
+    }
+    
+    if (!DFStorageSave(concreteStorage,error))
+        goto end;
+    
+    ok = 1;
+    
+end:
+    TextPackageRelease(textPackage);
+    if (ok) {
+        return concreteStorage;
+    }
+    else {
+        DFStorageRelease(concreteStorage);
+        return NULL;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/DocFormats/filters/ooxml/tests/word/WordPlain.h
----------------------------------------------------------------------
diff --git a/DocFormats/filters/ooxml/tests/word/WordPlain.h b/DocFormats/filters/ooxml/tests/word/WordPlain.h
new file mode 100644
index 0000000..41aed2d
--- /dev/null
+++ b/DocFormats/filters/ooxml/tests/word/WordPlain.h
@@ -0,0 +1,26 @@
+// Copyright 2012-2014 UX Productivity Pty Ltd
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DocFormats_WordPlain_h
+#define DocFormats_WordPlain_h
+
+#include <DocFormats/DFError.h>
+#include <DocFormats/DFStorage.h>
+#include "DFHashTable.h"
+#include "DFDOM.h"
+
+char *Word_toPlain(DFStorage *rawStorage, DFHashTable *parts);
+DFStorage *Word_fromPlain(const char *plain, const char *plainPath, DFError **error);
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/consumers/dfutil/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/consumers/dfutil/src/CMakeLists.txt b/consumers/dfutil/src/CMakeLists.txt
index 2fad267..c6d19e9 100644
--- a/consumers/dfutil/src/CMakeLists.txt
+++ b/consumers/dfutil/src/CMakeLists.txt
@@ -8,8 +8,6 @@ set(SOURCES
     DFChanges.c
     DFChanges.h
     FunctionTests.h
-    Plain.c
-    Plain.h
     StringTests.h
     Test.c
     Test.h
@@ -17,8 +15,6 @@ set(SOURCES
     TestCase.h
     TestFunctions.c
     TestFunctions.h
-    TextPackage.c
-    TextPackage.h
     main.c
 )
 
@@ -32,13 +28,15 @@ include_directories(../../../DocFormats/core/src/css)
 include_directories(../../../DocFormats/core/src/html)
 include_directories(../../../DocFormats/core/src/lib)
 include_directories(../../../DocFormats/core/src/names)
+include_directories(../../../DocFormats/core/src/xml)
+include_directories(../../../DocFormats/core/tests/html)
 include_directories(../../../DocFormats/filters/latex/src)
 include_directories(../../../DocFormats/filters/odf/src)
 include_directories(../../../DocFormats/filters/ooxml/src/common)
 include_directories(../../../DocFormats/filters/ooxml/src/word)
 include_directories(../../../DocFormats/filters/ooxml/src/word/formatting)
 include_directories(../../../DocFormats/filters/ooxml/src/word/lenses)
-include_directories(../../../DocFormats/core/src/xml)
+include_directories(../../../DocFormats/filters/ooxml/tests/word)
 include_directories(/usr/include/libxml2)
 link_directories(${LIB_DIRS})
 

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/consumers/dfutil/src/Commands.c
----------------------------------------------------------------------
diff --git a/consumers/dfutil/src/Commands.c b/consumers/dfutil/src/Commands.c
index 6c9e04c..07d84dd 100644
--- a/consumers/dfutil/src/Commands.c
+++ b/consumers/dfutil/src/Commands.c
@@ -15,7 +15,8 @@
 #include "Commands.h"
 #include "BDTTest.h"
 #include "Test.h"
-#include "Plain.h"
+#include "WordPlain.h"
+#include "HTMLPlain.h"
 #include "TextPackage.h"
 #include "StringTests.h"
 #include "DFChanges.h"
@@ -479,54 +480,3 @@ char *createTempDir(DFError **error)
     return ctemplate;
 #endif
 }
-
-char *binaryToString(DFBuffer *input)
-{
-    const char *hexchars = "0123456789ABCDEF";
-    DFBuffer *charBuf = DFBufferNew();
-    for (size_t pos = 0; pos < input->len; pos++) {
-        if ((pos > 0) && (pos % 40 == 0))
-            DFBufferAppendChar(charBuf,'\n');
-        unsigned char hi = ((unsigned char *)input->data)[pos] >> 4;
-        unsigned char lo = ((unsigned char *)input->data)[pos] & 0x0F;
-        DFBufferAppendChar(charBuf,hexchars[hi]);
-        DFBufferAppendChar(charBuf,hexchars[lo]);
-    }
-    if ((input->len % 40) != 0)
-        DFBufferAppendChar(charBuf,'\n');
-    char *result = strdup(charBuf->data);
-
-    DFBufferRelease(charBuf);
-    return result;
-}
-
-DFBuffer *stringToBinary(const char *str)
-{
-    size_t length = strlen(str);
-    DFBuffer *outbuf = DFBufferNew();
-
-    int wantHi = 1;
-    unsigned char hi = 0;
-
-    for (size_t inpos = 0; inpos < length; inpos++) {
-        char c = str[inpos];
-        unsigned char nibble = 0;
-
-        if ((c >= '0') && (c <= '9'))
-            nibble = c - '0';
-        else if ((c >= 'a') && (c <= 'f'))
-            nibble = 10 + (c - 'a');
-        else if ((c >= 'A') && (c <= 'F'))
-            nibble = 10 + (c - 'A');
-        else
-            continue;
-
-        if (wantHi)
-            hi = nibble << 4;
-        else
-            DFBufferAppendChar(outbuf,hi | nibble);
-        wantHi = !wantHi;
-    }
-
-    return outbuf;
-}

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/consumers/dfutil/src/Commands.h
----------------------------------------------------------------------
diff --git a/consumers/dfutil/src/Commands.h b/consumers/dfutil/src/Commands.h
index 8933f08..e46cdde 100644
--- a/consumers/dfutil/src/Commands.h
+++ b/consumers/dfutil/src/Commands.h
@@ -35,7 +35,5 @@ int escapeCSSIdent(const char *filename, DFError **error);
 int unescapeCSSIdent(const char *filename, DFError **error);
 
 char *createTempDir(DFError **error);
-char *binaryToString(DFBuffer *input);
-DFBuffer *stringToBinary(const char *str);
 
 #endif

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/consumers/dfutil/src/Plain.c
----------------------------------------------------------------------
diff --git a/consumers/dfutil/src/Plain.c b/consumers/dfutil/src/Plain.c
deleted file mode 100644
index 777148d..0000000
--- a/consumers/dfutil/src/Plain.c
+++ /dev/null
@@ -1,689 +0,0 @@
-// Copyright 2012-2014 UX Productivity Pty Ltd
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "Plain.h"
-#include "TextPackage.h"
-#include "Plain.h"
-#include "Commands.h"
-#include "OPC.h"
-#include "WordConverter.h"
-#include "DFXML.h"
-#include "DFHashTable.h"
-#include "DFString.h"
-#include "DFFilesystem.h"
-#include "DFHTML.h"
-#include "DFCommon.h"
-#include "DFZipFile.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-static void addStrippedSerializedDoc(DFBuffer *result, DFDocument *doc, const char *filename)
-{
-    if (doc != NULL) {
-        DFStripWhitespace(doc->docNode);
-        char *str = DFSerializeXMLString(doc,0,1);
-        DFBufferFormat(result,"#item %s\n",filename);
-        DFBufferFormat(result,"%s",str);
-        free(str);
-    }
-}
-
-static void addSerializedDoc(DFBuffer *result, DFDocument *doc, const char *filename)
-{
-    if (doc != NULL) {
-        char *str = DFSerializeXMLString(doc,0,1);
-        DFBufferFormat(result,"#item %s\n",filename);
-        DFBufferFormat(result,"%s",str);
-        free(str);
-    }
-}
-
-static void addSerializedBinary(DFBuffer *result, DFBuffer *data, const char *filename)
-{
-    if (data != NULL) {
-        char *str = binaryToString(data);
-        DFBufferFormat(result,"#item %s\n",filename);
-        DFBufferFormat(result,"%s",str);
-        free(str);
-    }
-}
-
-static char *findDocumentPath(DFStorage *storage, DFError **error)
-{
-    int ok = 0;
-    DFDocument *relsDoc = NULL;
-    char *result = NULL;
-
-    relsDoc = DFParseXMLStorage(storage,"/_rels/.rels",error);
-    if (relsDoc == NULL) {
-        DFErrorFormat(error,"_rels/.rels: %s",DFErrorMessage(error));
-        goto end;
-    }
-
-    for (DFNode *child = relsDoc->root->first; child != NULL; child = child->next) {
-        if (child->tag != REL_RELATIONSHIP)
-            continue;
-
-        const char *type = DFGetAttribute(child,NULL_Type);
-        const char *target = DFGetAttribute(child,NULL_TARGET);
-        if ((type == NULL) || (target == NULL))
-            continue;
-
-        if (strcmp(type,"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"))
-            continue;
-
-        result = strdup(target);
-        ok = 1;
-        break;
-    }
-
-end:
-    DFDocumentRelease(relsDoc);
-    if (ok)
-        return result;
-    free(result);
-    return NULL;
-}
-
-static char *computeDocumentRelsPath(const char *documentPath)
-{
-    char *documentParent = DFPathDirName(documentPath);
-    char *documentFilename = DFPathBaseName(documentPath);
-    char *documentRelsPath = DFFormatString("%s/_rels/%s.rels",documentParent,documentFilename);
-    free(documentParent);
-    free(documentFilename);
-    return documentRelsPath;
-}
-
-static void parseDocumentRels(DFDocument *relsDoc, DFHashTable *rels, DFError **error)
-{
-    if (relsDoc == NULL)
-        return;
-    for (DFNode *child = relsDoc->root->first; child != NULL; child = child->next) {
-        if (child->tag != REL_RELATIONSHIP)
-            continue;
-        const char *type = DFGetAttribute(child,NULL_Type);
-        const char *target = DFGetAttribute(child,NULL_TARGET);
-        if ((type == NULL) || (target == NULL))
-            continue;
-
-        DFHashTableAdd(rels,type,target);
-    }
-}
-
-static int addRelatedDoc(DFHashTable *parts, DFHashTable *documentRels, const char *relName, const char *filename,
-                         DFBuffer *output, DFHashTable *includeTypes, DFStorage *storage, DFError **error)
-{
-    const char *relPath = DFHashTableLookup(documentRels,relName);
-    if (relPath == NULL)
-        return 1;;
-
-    DFDocument *doc = DFParseXMLStorage(storage,relPath,error);
-    if (doc == NULL) {
-        DFErrorFormat(error,"%s: %s",relPath,DFErrorMessage(error));
-        return 0;
-    }
-
-    if (doc->root->first != NULL) {
-        addStrippedSerializedDoc(output,doc,filename);
-        DFHashTableAdd(includeTypes,relName,"");
-    }
-
-    DFDocumentRelease(doc);
-    return 1;
-}
-
-static int processParts(DFHashTable *parts, const char *documentPath, DFDocument *relsDoc,
-                        DFHashTable *documentRels,
-                        DFBuffer *output, DFStorage *storage, DFError **error)
-{
-    int ok = 0;
-    DFHashTable *includeTypes = DFHashTableNew((DFCopyFunction)strdup,free);
-    DFHashTableAdd(includeTypes,WORDREL_HYPERLINK,"");
-    DFHashTableAdd(includeTypes,WORDREL_IMAGE,"");
-
-    if ((parts == NULL) || (DFHashTableLookup(parts,"document") != NULL)) {
-        DFDocument *doc = DFParseXMLStorage(storage,documentPath,error);
-        if (doc == NULL)
-            goto end;
-        addStrippedSerializedDoc(output,doc,"document.xml");
-        DFDocumentRelease(doc);
-    }
-
-    if ((parts == NULL) || (DFHashTableLookup(parts,"styles") != NULL)) {
-        if (!addRelatedDoc(parts,documentRels,WORDREL_STYLES,"styles.xml",output,includeTypes,storage,error))
-            goto end;
-    }
-    if ((parts == NULL) || (DFHashTableLookup(parts,"numbering") != NULL)) {
-        if (!addRelatedDoc(parts,documentRels,WORDREL_NUMBERING,"numbering.xml",output,includeTypes,storage,error))
-            goto end;
-    }
-    if ((parts == NULL) || (DFHashTableLookup(parts,"footnotes") != NULL)) {
-        if (!addRelatedDoc(parts,documentRels,WORDREL_FOOTNOTES,"footnotes.xml",output,includeTypes,storage,error))
-            goto end;
-    }
-    if ((parts == NULL) || (DFHashTableLookup(parts,"endnotes") != NULL)) {
-        if (!addRelatedDoc(parts,documentRels,WORDREL_ENDNOTES,"endnotes.xml",output,includeTypes,storage,error))
-            goto end;
-    }
-    if ((parts != NULL) && (DFHashTableLookup(parts,"settings") != NULL)) {
-        if (!addRelatedDoc(parts,documentRels,WORDREL_SETTINGS,"settings.xml",output,includeTypes,storage,error))
-            goto end;
-    }
-    if ((parts != NULL) && (DFHashTableLookup(parts,"theme") != NULL)) {
-        if (!addRelatedDoc(parts,documentRels,WORDREL_THEME,"theme.xml",output,includeTypes,storage,error))
-            goto end;
-    }
-
-    if ((DFHashTableLookup(documentRels,WORDREL_HYPERLINK) != NULL) ||
-        (DFHashTableLookup(documentRels,WORDREL_IMAGE) != NULL) ||
-        ((parts != NULL) && (DFHashTableLookup(parts,"documentRels") != NULL))) {
-        if (relsDoc == NULL) {
-            DFErrorFormat(error,"document.xml.rels does not exist");
-            goto end;
-        }
-        DFNode *next;
-        for (DFNode *child = relsDoc->root->first; child != NULL; child = next) {
-            next = child->next;
-            if (child->tag != REL_RELATIONSHIP)
-                continue;
-            const char *type = DFGetAttribute(child,NULL_Type);
-            if ((type != NULL) && (DFHashTableLookup(includeTypes,type) == NULL)) {
-                DFRemoveNode(child);
-            }
-        }
-        addSerializedDoc(output,relsDoc,"document.xml.rels");
-    }
-
-    const char **entries = DFStorageList(storage,NULL);
-    if (entries != NULL) { // FIXME: Should really report an error if this is not the case
-        for (int i = 0; entries[i]; i++) {
-            const char *filename = entries[i];
-            char *extension = DFPathExtension(filename);
-            if (DFStringEqualsCI(extension,"png") || DFStringEqualsCI(extension,"jpg")) {
-                char *absFilename;
-                if (!DFStringHasSuffix(filename,"/"))
-                    absFilename = DFFormatString("/%s",filename);
-                else
-                    absFilename = strdup(filename);
-                DFBuffer *data = DFBufferReadFromStorage(storage,absFilename,NULL);
-                addSerializedBinary(output,data,absFilename);
-                DFBufferRelease(data);
-                free(absFilename);
-            }
-            free(extension);
-        }
-    }
-    free(entries);
-    DFHashTableRelease(includeTypes);
-
-    ok = 1;
-
-end:
-    return ok;
-}
-
-static char *Word_toPlainFromDir(DFStorage *storage, DFHashTable *parts, DFError **error)
-{
-    char *documentPath = NULL;
-    DFHashTable *rels = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
-    DFBuffer *output = DFBufferNew();
-    char *relsPathRel = NULL;
-    DFDocument *relsDoc = NULL;
-    int ok = 0;
-
-
-    documentPath = findDocumentPath(storage,error);
-    if (documentPath == NULL) {
-        DFErrorFormat(error,"findDocumentPath: %s",DFErrorMessage(error));
-        goto end;
-    }
-
-    relsPathRel = computeDocumentRelsPath(documentPath);
-    if (DFStorageExists(storage,relsPathRel) && ((relsDoc = DFParseXMLStorage(storage,relsPathRel,error)) == NULL)) {
-        DFErrorFormat(error,"%s: %s",relsPathRel,DFErrorMessage(error));
-        goto end;
-    }
-
-    parseDocumentRels(relsDoc,rels,error);
-
-    if (!processParts(parts,documentPath,relsDoc,rels,output,storage,error))
-        goto end;
-
-    ok = 1;
-
-end:
-    free(relsPathRel);
-    free(documentPath);
-    DFHashTableRelease(rels);
-    DFDocumentRelease(relsDoc);
-    if (!ok) {
-        DFBufferRelease(output);
-        return NULL;
-    }
-    else {
-        char *result = strdup(output->data);
-        DFBufferRelease(output);
-        return result;
-    }
-}
-
-char *Word_toPlain(DFStorage *rawStorage, DFHashTable *parts)
-{
-    DFError *error = NULL;
-    char *result = Word_toPlainFromDir(rawStorage,parts,&error);
-    if (result == NULL) {
-        result = DFFormatString("%s\n",DFErrorMessage(&error));
-        DFErrorRelease(error);
-    }
-    return result;
-}
-
-static int saveXMLDocument(DFStorage *storage, const char *filename, DFDocument *doc, NamespaceID defaultNS, DFError **error)
-{
-    char *parentPath = DFPathDirName(filename);
-    int ok = 0;
-
-    if (!DFSerializeXMLStorage(doc,defaultNS,0,storage,filename,error)) {
-        DFErrorFormat(error,"serialize %s: %s",filename,DFErrorMessage(error));
-        goto end;
-    }
-
-    ok = 1;
-
-end:
-    free(parentPath);
-    return ok;
-}
-
-static int saveStrippedXMLText(DFStorage *storage, const char *filename,
-                               const char *input, NamespaceID defaultNS, DFError **error)
-{
-    DFDocument *doc = DFParseXMLString(input,error);
-    if (doc == NULL)
-        return 0;
-    DFStripWhitespace(doc->docNode);
-    int ok = saveXMLDocument(storage,filename,doc,defaultNS,error);
-    DFDocumentRelease(doc);
-    return ok;
-}
-
-typedef struct PartInfo {
-    const char *filename;
-    const char *path;
-    const char *rel;
-    const char *type;
-} PartInfo;
-
-static int saveContentTypes(DFStorage *storage, DFHashTable *ctDefaults, DFHashTable *ctOverrides, DFError **error)
-{
-    DFDocument *doc = DFDocumentNewWithRoot(CT_TYPES);
-
-    const char **keys = DFHashTableCopyKeys(ctDefaults);
-    DFSortStringsCaseInsensitive(keys);
-    for (int i = 0; keys[i]; i++) {
-        const char *extension = keys[i];
-        const char *contentType = DFHashTableLookup(ctDefaults,extension);
-        DFNode *deflt = DFCreateChildElement(doc->root,CT_DEFAULT);
-        DFSetAttribute(deflt,NULL_EXTENSION,extension);
-        DFSetAttribute(deflt,NULL_CONTENTTYPE,contentType);
-    }
-    free(keys);
-    keys = DFHashTableCopyKeys(ctOverrides);
-    DFSortStringsCaseInsensitive(keys);
-    for (int i = 0; keys[i]; i++) {
-        const char *partName = keys[i];
-        const char *contentType = DFHashTableLookup(ctOverrides,partName);
-        DFNode *override = DFCreateChildElement(doc->root,CT_OVERRIDE);
-        DFSetAttribute(override,NULL_PARTNAME,partName);
-        DFSetAttribute(override,NULL_CONTENTTYPE,contentType);
-    }
-    free(keys);
-
-    int ok = saveXMLDocument(storage,"[Content_Types].xml",doc,NAMESPACE_CT,error);
-    DFDocumentRelease(doc);
-    return ok;
-}
-
-static int saveDocRels(DFStorage *storage,
-                       DFHashTable *docRelURIs,
-                       DFHashTable *docRelTypes,
-                       DFHashTable *docRelModes,
-                       DFError **error)
-{
-    if (DFHashTableCount(docRelURIs) == 0)
-        return 1;;
-
-    DFDocument *doc = DFDocumentNewWithRoot(REL_RELATIONSHIPS);
-
-    const char **sortedIds = DFHashTableCopyKeys(docRelURIs);
-    DFSortStringsCaseInsensitive(sortedIds);
-    for (int i = 0; sortedIds[i]; i++) {
-        const char *rId = sortedIds[i];
-        const char *URI = DFHashTableLookup(docRelURIs,rId);
-        const char *type = DFHashTableLookup(docRelTypes,rId);
-        const char *mode = DFHashTableLookup(docRelModes,rId); // may be NULL
-        DFNode *child = DFCreateChildElement(doc->root,REL_RELATIONSHIP);
-        DFSetAttribute(child,NULL_Id,rId);
-        DFSetAttribute(child,NULL_Type,type);
-        DFSetAttribute(child,NULL_TARGET,URI);
-        DFSetAttribute(child,NULL_TARGETMODE,mode);
-    }
-    free(sortedIds);
-
-    int ok = saveXMLDocument(storage,"/word/_rels/document.xml.rels",doc,NAMESPACE_REL,error);
-    DFDocumentRelease(doc);
-    return ok;
-}
-
-static int saveRootRels(DFStorage *storage, DFError **error)
-{
-    DFDocument *doc = DFDocumentNewWithRoot(REL_RELATIONSHIPS);
-    DFNode *rel = DFCreateChildElement(doc->root,REL_RELATIONSHIP);
-    DFSetAttribute(rel,NULL_Id,"rId1");
-    DFSetAttribute(rel,NULL_Type,"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument");
-    DFSetAttribute(rel,NULL_TARGET,"/word/document.xml");
-    int ok = saveXMLDocument(storage,"/_rels/.rels",doc,NAMESPACE_REL,error);
-    DFDocumentRelease(doc);
-    return ok;
-}
-
-static int Word_fromStorage(TextPackage *tp, DFStorage *storage, DFError **error)
-{
-    PartInfo parts[7] = {
-        { "numbering.xml", "/word/numbering.xml", WORDREL_NUMBERING, WORDTYPE_NUMBERING },
-        { "styles.xml", "/word/styles.xml", WORDREL_STYLES, WORDTYPE_STYLES },
-        { "settings.xml", "/word/settings.xml", WORDREL_SETTINGS, WORDTYPE_SETTINGS },
-        { "theme.xml", "/word/theme.xml", WORDREL_THEME, WORDTYPE_THEME },
-        { "footnotes.xml", "/word/footnotes.xml", WORDREL_FOOTNOTES, WORDTYPE_FOOTNOTES },
-        { "endnotes.xml", "/word/endnotes.xml", WORDREL_ENDNOTES, WORDTYPE_ENDNOTES },
-        { NULL, NULL, NULL, NULL },
-    };
-
-    int ok = 0;
-
-    const char *documentStr = DFHashTableLookup(tp->items,"document.xml");
-    const char **allFilenames = NULL;
-    DFHashTable *ctDefaults = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
-    DFHashTable *ctOverrides = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
-    DFHashTable *docRelURIs = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
-    DFHashTable *docRelTypes = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
-    DFHashTable *docRelModes = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
-
-
-    if (documentStr == NULL) {
-        DFErrorFormat(error,"No document.xml");
-        goto end;
-    }
-
-    DFHashTableAdd(ctDefaults,"rels","application/vnd.openxmlformats-package.relationships+xml");
-    DFHashTableAdd(ctOverrides,"/word/document.xml",
-                   "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml");
-
-
-    if (documentStr != NULL) {
-        if (!saveStrippedXMLText(storage,"/word/document.xml",documentStr,NAMESPACE_NULL,error))
-            goto end;
-    }
-
-    int rIdNext = 1;
-    for (int i = 0; parts[i].filename; i++) {
-        const char *content = DFHashTableLookup(tp->items,parts[i].filename);
-        if (content == NULL)
-            continue;
-
-        if (!saveStrippedXMLText(storage,parts[i].path,content,NAMESPACE_NULL,error))
-            goto end;
-
-        char rIdStr[100];
-        snprintf(rIdStr,100,"rId%d",rIdNext++);
-        DFHashTableAdd(docRelURIs,rIdStr,parts[i].path);
-        DFHashTableAdd(docRelTypes,rIdStr,parts[i].rel);
-        DFHashTableAdd(ctOverrides,parts[i].path,parts[i].type);
-    }
-
-    allFilenames = DFHashTableCopyKeys(tp->items);
-    for (int i = 0; allFilenames[i]; i++) {
-        const char *curFilename = allFilenames[i];
-        char *ext = DFPathExtension(curFilename);
-
-        int isImage = 0;
-
-        if (DFStringEqualsCI(ext,"png")) {
-            DFHashTableAdd(ctDefaults,"png","image/png");
-            isImage = 1;
-        }
-
-        if (DFStringEqualsCI(ext,"jpg")) {
-            DFHashTableAdd(ctDefaults,"jpg","image/png");
-            isImage = 1;
-        }
-
-        if (DFStringEqualsCI(ext,"jpeg")) {
-            DFHashTableAdd(ctDefaults,"jpeg","image/png");
-            isImage = 1;
-        }
-
-        free(ext);
-
-        if (isImage) {
-            const char *str = DFHashTableLookup(tp->items,curFilename);
-            char *parentRel = DFPathDirName(curFilename);
-            int fileok = 1;
-
-            DFBuffer *data = stringToBinary(str);
-            if (!DFBufferWriteToStorage(data,storage,curFilename,error)) {
-                DFErrorFormat(error,"%s: %s",curFilename,DFErrorMessage(error));
-                fileok = 0;
-            }
-
-            DFBufferRelease(data);
-            free(parentRel);
-
-            if (!fileok)
-                goto end;
-        }
-    }
-
-    if (!saveContentTypes(storage,ctDefaults,ctOverrides,error)) {
-        DFErrorFormat(error,"saveContentTypes: %s",DFErrorMessage(error));
-        goto end;
-    }
-
-    const char *relsStr = DFHashTableLookup(tp->items,"document.xml.rels");
-    if (relsStr != NULL) {
-        DFDocument *doc = DFParseXMLString(relsStr,error);
-        if (doc == NULL)
-            goto end;
-
-        DFHashTableRelease(docRelURIs);
-        DFHashTableRelease(docRelTypes);
-        DFHashTableRelease(docRelModes);
-        docRelURIs = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
-        docRelTypes = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
-        docRelModes = DFHashTableNew((DFCopyFunction)strdup,(DFFreeFunction)free);
-
-        for (DFNode *child = doc->root->first; child != NULL; child = child->next) {
-            if (child->tag == REL_RELATIONSHIP) {
-                const char *rId = DFGetAttribute(child,NULL_Id);
-                const char *type = DFGetAttribute(child,NULL_Type);
-                const char *target = DFGetAttribute(child,NULL_TARGET);
-                const char *mode = DFGetAttribute(child,NULL_TARGETMODE);
-
-                if ((rId != NULL) && (type != NULL) && (target != NULL)) {
-                    DFHashTableAdd(docRelURIs,rId,target);
-                    DFHashTableAdd(docRelTypes,rId,type);
-                    if (mode != NULL)
-                        DFHashTableAdd(docRelModes,rId,mode);
-                }
-            }
-        }
-
-        DFDocumentRelease(doc);
-    }
-
-    if (!saveDocRels(storage,docRelURIs,docRelTypes,docRelModes,error)) {
-        DFErrorFormat(error,"saveDocRels: %s",DFErrorMessage(error));
-        goto end;
-    }
-
-    if (!saveRootRels(storage,error)) {
-        DFErrorFormat(error,"saveRootRels: %s",DFErrorMessage(error));
-        goto end;
-    }
-
-    ok = 1;
-
-end:
-    DFHashTableRelease(ctDefaults);
-    DFHashTableRelease(ctOverrides);
-    DFHashTableRelease(docRelURIs);
-    DFHashTableRelease(docRelTypes);
-    DFHashTableRelease(docRelModes);
-    free(allFilenames);
-    return ok;
-}
-
-DFStorage *Word_fromPlain(const char *plain, const char *plainPath, DFError **error)
-{
-    int ok = 0;
-    DFStorage *concreteStorage = NULL;
-    TextPackage *textPackage = NULL;
-
-    textPackage = TextPackageNewWithString(plain,plainPath,error);
-    if (textPackage == NULL)
-        goto end;
-
-    concreteStorage = DFStorageNewMemory(DFFileFormatDocx);
-
-    if (!Word_fromStorage(textPackage,concreteStorage,error)) {
-        DFErrorFormat(error,"Word_fromStorage: %s",DFErrorMessage(error));
-        printf("%s\n",DFErrorMessage(error));
-        goto end;
-    }
-
-    if (!DFStorageSave(concreteStorage,error))
-        goto end;
-
-    ok = 1;
-
-end:
-    TextPackageRelease(textPackage);
-    if (ok) {
-        return concreteStorage;
-    }
-    else {
-        DFStorageRelease(concreteStorage);
-        return NULL;
-    }
-}
-
-static void HTML_getImageSourcesRecursive(DFNode *node, DFHashTable *result)
-{
-    if (node->tag == HTML_IMG) {
-        const char *src = DFGetAttribute(node,HTML_SRC);
-        if (src != NULL)
-            DFHashTableAdd(result,src,"");
-    }
-
-    for (DFNode *child = node->first; child != NULL; child = child->next)
-        HTML_getImageSourcesRecursive(child,result);
-}
-
-static const char **HTML_getImageSources(DFDocument *doc)
-{
-    DFHashTable *set = DFHashTableNew(NULL,NULL);
-    HTML_getImageSourcesRecursive(doc->docNode,set);
-    const char **result = DFHashTableCopyKeys(set);
-    DFHashTableRelease(set);
-    return result;
-}
-
-char *HTML_toPlain(DFDocument *doc, DFStorage *storage, DFError **error)
-{
-    DFBuffer *output = DFBufferNew();
-    char *docStr = DFSerializeXMLString(doc,0,0);
-    DFBufferFormat(output,"%s",docStr);
-    free(docStr);
-    const char **imageSources = HTML_getImageSources(doc);
-    DFSortStringsCaseInsensitive(imageSources);
-    for (size_t i = 0; imageSources[i]; i++) {
-        const char *src = imageSources[i];
-        if (DFStringHasPrefix(src,"images/"))
-            DFBufferFormat(output,"#item %s\n",src);
-        else
-            DFBufferFormat(output,"#item images/%s\n",src);
-        DFBuffer *imageData = DFBufferReadFromStorage(storage,src,error);
-        if (imageData == NULL) {
-            DFErrorFormat(error,"%s: %s",src,DFErrorMessage(error));
-            return NULL;
-        }
-        char *imageStr = binaryToString(imageData);
-        DFBufferFormat(output,"%s",imageStr);
-        free(imageStr);
-        DFBufferRelease(imageData);
-    }
-    free(imageSources);
-
-    char *str = strdup(output->data);
-    DFBufferRelease(output);
-    return str;
-}
-
-static DFDocument *HTML_fromTextPackage(TextPackage *textPackage, DFStorage *htmlStorage, DFError **error)
-{
-    const char *html = DFHashTableLookup(textPackage->items,"");
-    if (html == NULL) {
-        DFErrorFormat(error,"No HTML data");
-        return NULL;
-    }
-
-    DFDocument *doc = DFParseHTMLString(html,0,error);
-    if (doc == NULL)
-        return NULL;
-
-    for (size_t ki = 0; ki < textPackage->nkeys; ki++) {
-        const char *key = textPackage->keys[ki];
-        if (strlen(key) == 0)
-            continue;
-
-        int ok = 1;
-
-        const char *str = DFHashTableLookup(textPackage->items,key);
-        DFBuffer *data = stringToBinary(str);
-        if (!DFBufferWriteToStorage(data,htmlStorage,key,error)) {
-            DFErrorFormat(error,"%s: %s",key,DFErrorMessage(error));
-            DFDocumentRelease(doc);
-            ok = 0;
-        }
-
-        DFBufferRelease(data);
-
-        if (!ok)
-            return NULL;
-    }
-
-    return doc;
-}
-
-DFDocument *HTML_fromPlain(const char *plain, const char *path, DFStorage *htmlStorage, DFError **error)
-{
-    TextPackage *textPackage = TextPackageNewWithString(plain,path,error);
-    if (textPackage == NULL)
-        return NULL;;
-    DFDocument *result = HTML_fromTextPackage(textPackage,htmlStorage,error);
-    TextPackageRelease(textPackage);
-    return result;
-}

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/consumers/dfutil/src/Plain.h
----------------------------------------------------------------------
diff --git a/consumers/dfutil/src/Plain.h b/consumers/dfutil/src/Plain.h
deleted file mode 100644
index 96ad5bb..0000000
--- a/consumers/dfutil/src/Plain.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2012-2014 UX Productivity Pty Ltd
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef dfutil_Plain_h
-#define dfutil_Plain_h
-
-#include <DocFormats/DFError.h>
-#include "DFHashTable.h"
-#include "DFDOM.h"
-#include <DocFormats/DFStorage.h>
-
-char *Word_toPlain(DFStorage *rawStorage, DFHashTable *parts);
-DFStorage *Word_fromPlain(const char *plain, const char *plainPath, DFError **error);
-char *HTML_toPlain(DFDocument *doc, DFStorage *storage, DFError **error);
-DFDocument *HTML_fromPlain(const char *plain, const char *path, DFStorage *htmlStorage, DFError **error);
-
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/consumers/dfutil/src/TestCase.c
----------------------------------------------------------------------
diff --git a/consumers/dfutil/src/TestCase.c b/consumers/dfutil/src/TestCase.c
index be2794d..b2b6e2d 100644
--- a/consumers/dfutil/src/TestCase.c
+++ b/consumers/dfutil/src/TestCase.c
@@ -14,7 +14,8 @@
 
 #include "TestCase.h"
 #include "TestFunctions.h"
-#include "Plain.h"
+#include "WordPlain.h"
+#include "HTMLPlain.h"
 #include "DFCommon.h"
 #include "DFString.h"
 #include "DFFilesystem.h"

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/consumers/dfutil/src/TestFunctions.c
----------------------------------------------------------------------
diff --git a/consumers/dfutil/src/TestFunctions.c b/consumers/dfutil/src/TestFunctions.c
index 7c7a3f8..be540ec 100644
--- a/consumers/dfutil/src/TestFunctions.c
+++ b/consumers/dfutil/src/TestFunctions.c
@@ -15,7 +15,8 @@
 #include "TestFunctions.h"
 #include "DFBDT.h"
 #include "BDTTest.h"
-#include "Plain.h"
+#include "WordPlain.h"
+#include "HTMLPlain.h"
 #include "Commands.h"
 #include "DFChanges.h"
 #include "WordConverter.h"

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/consumers/dfutil/src/TextPackage.c
----------------------------------------------------------------------
diff --git a/consumers/dfutil/src/TextPackage.c b/consumers/dfutil/src/TextPackage.c
deleted file mode 100644
index 031bb1e..0000000
--- a/consumers/dfutil/src/TextPackage.c
+++ /dev/null
@@ -1,163 +0,0 @@
-// Copyright 2012-2014 UX Productivity Pty Ltd
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "TextPackage.h"
-#include "DFBuffer.h"
-#include "DFString.h"
-#include "DFFilesystem.h"
-#include "DFCommon.h"
-#include <stdlib.h>
-#include <string.h>
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                                                                                //
-//                                           TextPackage                                          //
-//                                                                                                //
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-static TextPackage *TextPackageNew(void)
-{
-    TextPackage *package = (TextPackage *)calloc(1,sizeof(TextPackage));
-    package->retainCount = 1;
-    package->items = DFHashTableNew((DFCopyFunction)strdup,free);
-    package->keys = (char **)calloc(1,sizeof(char *));
-    return package;
-}
-
-TextPackage *TextPackageRetain(TextPackage *package)
-{
-    if (package != NULL)
-        package->retainCount++;
-    return package;
-}
-
-void TextPackageRelease(TextPackage *package)
-{
-    if ((package == NULL) || (--package->retainCount > 0))
-        return;
-
-    for (size_t i = 0; i < package->nkeys; i++)
-        free(package->keys[i]);
-    free(package->keys);
-    DFHashTableRelease(package->items);
-    free(package);
-}
-
-static int processIncludes(TextPackage *package, const char *input, DFBuffer *output, const char *path, DFError **error)
-{
-    int ok = 1;
-    const char **lines = DFStringSplit(input,"\n",0);
-    for (int lineno = 0; lines[lineno] && ok; lineno++) {
-        const char *line = lines[lineno];
-        if (DFStringHasPrefix(line,"#include \"") && DFStringHasSuffix(line,"\"")) {
-            char *inclRelPath = DFSubstring(line,10,strlen(line)-1);
-            char *inclAbsPath = DFAppendPathComponent(path,inclRelPath);
-            char *inclDirName = DFPathDirName(inclAbsPath);
-            char *inclContent = DFStringReadFromFile(inclAbsPath,error);
-            if (inclContent == NULL) {
-                DFErrorFormat(error,"%s: %s",inclRelPath,DFErrorMessage(error));
-                ok = 0;
-            }
-            else if (!processIncludes(package,inclContent,output,inclDirName,error)) {
-                ok = 0;
-            }
-            free(inclRelPath);
-            free(inclAbsPath);
-            free(inclDirName);
-            free(inclContent);
-        }
-        else {
-            DFBufferFormat(output,"%s\n",line);
-        }
-    }
-    free(lines);
-    return ok;
-}
-
-static int parsePackage(TextPackage *package, const char *string, const char *path, DFError **error)
-{
-    DFBuffer *replaced = DFBufferNew();
-    if (!strcmp(path,""))
-        path = ".";
-
-    if (!processIncludes(package,string,replaced,path,error)) {
-        DFBufferRelease(replaced);
-        return 0;
-    }
-
-
-    char *currentKey = strdup("");
-    DFBuffer *currentValue = DFBufferNew();
-    const char **lines = DFStringSplit(replaced->data,"\n",0);
-    for (int lineno = 0; lines[lineno]; lineno++) {
-        const char *line = lines[lineno];
-
-        if (!DFStringHasPrefix(line,"#")) {
-            DFBufferFormat(currentValue,"%s\n",line);
-        }
-        else if (DFStringHasPrefix(line,"#item ")) {
-            package->keys = (char **)realloc(package->keys,(package->nkeys+2)*sizeof(char *));
-            package->keys[package->nkeys++] = strdup(currentKey);
-            package->keys[package->nkeys] = NULL;
-            DFHashTableAdd(package->items,currentKey,currentValue->data);
-            free(currentKey);
-            DFBufferRelease(currentValue);
-            currentKey = DFSubstring(line,6,strlen(line));
-            currentValue = DFBufferNew();
-        }
-        else if (DFStringHasPrefix(line,"##")) {
-            DFBufferFormat(currentValue,"%s\n",&line[1]);
-        }
-        else {
-            DFErrorFormat(error,"Unknown command: %s on line %d",line,(lineno+1));
-            return 0;
-        }
-    }
-    package->keys = (char **)realloc(package->keys,(package->nkeys+2)*sizeof(char *));
-    package->keys[package->nkeys++] = strdup(currentKey);
-    package->keys[package->nkeys] = NULL;
-    DFHashTableAdd(package->items,currentKey,currentValue->data);
-
-    free(lines);
-    free(currentKey);
-    DFBufferRelease(currentValue);
-    DFBufferRelease(replaced);
-    return 1;
-}
-
-TextPackage *TextPackageNewWithFile(const char *filename, DFError **error)
-{
-    char *contents = DFStringReadFromFile(filename,error);
-    if (contents == NULL) {
-        DFErrorFormat(error,"%s: %s",filename,DFErrorMessage(error));
-        return NULL;
-    }
-
-    char *path = DFPathDirName(filename);
-    TextPackage *result = TextPackageNewWithString(contents,path,error);
-    free(path);
-    free(contents);
-    return result;
-}
-
-TextPackage *TextPackageNewWithString(const char *string, const char *path, DFError **error)
-{
-    TextPackage *package = TextPackageNew();
-    if (!parsePackage(package,string,path,error)) {
-        TextPackageRelease(package);
-        return NULL;
-    }
-    else
-        return package;
-}

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/consumers/dfutil/src/TextPackage.h
----------------------------------------------------------------------
diff --git a/consumers/dfutil/src/TextPackage.h b/consumers/dfutil/src/TextPackage.h
deleted file mode 100644
index 077f84e..0000000
--- a/consumers/dfutil/src/TextPackage.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2012-2014 UX Productivity Pty Ltd
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef dfutil_TextPackage_h
-#define dfutil_TextPackage_h
-
-#include <DocFormats/DFError.h>
-#include "DFHashTable.h"
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//                                                                                                //
-//                                           TextPackage                                          //
-//                                                                                                //
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-typedef struct TextPackage TextPackage;
-
-struct TextPackage {
-    size_t retainCount;
-    char **keys;
-    size_t nkeys;
-    DFHashTable *items;
-};
-
-TextPackage *TextPackageNewWithFile(const char *filename, DFError **error);
-TextPackage *TextPackageNewWithString(const char *string, const char *path, DFError **error);
-TextPackage *TextPackageRetain(TextPackage *package);
-void TextPackageRelease(TextPackage *package);
-
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/4701058d/consumers/dfutil/src/main.c
----------------------------------------------------------------------
diff --git a/consumers/dfutil/src/main.c b/consumers/dfutil/src/main.c
index 8f9db82..6a0ac69 100644
--- a/consumers/dfutil/src/main.c
+++ b/consumers/dfutil/src/main.c
@@ -15,7 +15,8 @@
 #include "Commands.h"
 #include "BDTTest.h"
 #include "Test.h"
-#include "Plain.h"
+#include "WordPlain.h"
+#include "HTMLPlain.h"
 #include "FunctionTests.h"
 #include "StringTests.h"
 #include "DFZipFile.h"


Mime
View raw message