pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From til...@apache.org
Subject svn commit: r1635064 - /pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Date Wed, 29 Oct 2014 07:41:55 GMT
Author: tilman
Date: Wed Oct 29 07:41:54 2014
New Revision: 1635064

URL: http://svn.apache.org/r1635064
Log:
PDFBOX-2417, PDFBOX-2418: find namespaces that are not at top level to avoid "Schema is not set in this document" error, as suggested by Ralf Hauser; use isEmpty()

Modified:
    pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java

Modified: pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java?rev=1635064&r1=1635063&r2=1635064&view=diff
==============================================================================
--- pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java (original)
+++ pdfbox/trunk/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java Wed Oct 29 07:41:54 2014
@@ -1,877 +1,879 @@
-/*****************************************************************************
- * 
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- * 
- ****************************************************************************/
-
-package org.apache.xmpbox.xml;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Stack;
-import java.util.StringTokenizer;
-
-import javax.xml.XMLConstants;
-import javax.xml.namespace.QName;
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.xmpbox.XMPMetadata;
-import org.apache.xmpbox.XmpConstants;
-import org.apache.xmpbox.schema.XMPSchema;
-import org.apache.xmpbox.schema.XmpSchemaException;
-import org.apache.xmpbox.type.AbstractField;
-import org.apache.xmpbox.type.AbstractSimpleProperty;
-import org.apache.xmpbox.type.AbstractStructuredType;
-import org.apache.xmpbox.type.ArrayProperty;
-import org.apache.xmpbox.type.Attribute;
-import org.apache.xmpbox.type.BadFieldValueException;
-import org.apache.xmpbox.type.Cardinality;
-import org.apache.xmpbox.type.ComplexPropertyContainer;
-import org.apache.xmpbox.type.PropertiesDescription;
-import org.apache.xmpbox.type.PropertyType;
-import org.apache.xmpbox.type.TypeMapping;
-import org.apache.xmpbox.type.Types;
-import org.apache.xmpbox.xml.XmpParsingException.ErrorType;
-import org.w3c.dom.Attr;
-import org.w3c.dom.Comment;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.NamedNodeMap;
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
-import org.w3c.dom.ProcessingInstruction;
-import org.w3c.dom.Text;
-import org.xml.sax.SAXException;
-
-public class DomXmpParser
-{
-
-    /**
-     * Log instance.
-     */
-    private static final Log LOG = LogFactory.getLog(DomXmpParser.class);
-
-
-    private DocumentBuilder dBuilder;
-
-    private NamespaceFinder nsFinder;
-
-    private boolean strictParsing = true;
-
-    public DomXmpParser() throws XmpParsingException
-    {
-        try
-        {
-            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
-            dbFactory.setNamespaceAware(true);
-            dBuilder = dbFactory.newDocumentBuilder();
-            nsFinder = new NamespaceFinder();
-        }
-        catch (ParserConfigurationException e)
-        {
-            throw new XmpParsingException(ErrorType.Configuration, "Failed to initilalize", e);
-        }
-
-    }
-
-    public boolean isStrictParsing()
-    {
-        return strictParsing;
-    }
-
-    public void setStrictParsing(boolean strictParsing)
-    {
-        this.strictParsing = strictParsing;
-    }
-
-    public XMPMetadata parse(byte[] xmp) throws XmpParsingException
-    {
-        ByteArrayInputStream input = new ByteArrayInputStream(xmp);
-        return parse(input);
-    }
-
-    public XMPMetadata parse(InputStream input) throws XmpParsingException
-    {
-        Document document = null;
-        try
-        {
-            dBuilder.setErrorHandler(null); // prevents validation messages polluting the console
-            document = dBuilder.parse(input);
-        }
-        catch (SAXException e)
-        {
-            throw new XmpParsingException(ErrorType.Undefined, "Failed to parse", e);
-        }
-        catch (IOException e)
-        {
-            throw new XmpParsingException(ErrorType.Undefined, "Failed to parse", e);
-        }
-        // document.normalizeDocument();
-        XMPMetadata xmp = null;
-
-        // Start reading
-        removeComments(document);
-        Node node = document.getFirstChild();
-
-        // expect xpacket processing instruction
-        if (!(node instanceof ProcessingInstruction))
-        {
-            throw new XmpParsingException(ErrorType.XpacketBadStart, "xmp should start with a processing instruction");
-        }
-        else
-        {
-            xmp = parseInitialXpacket((ProcessingInstruction) node);
-            node = node.getNextSibling();
-        }
-        // forget other processing instruction
-        while (node instanceof ProcessingInstruction)
-        {
-            node = node.getNextSibling();
-        }
-        // expect root element
-        Element root = null;
-        if (!(node instanceof Element))
-        {
-            throw new XmpParsingException(ErrorType.NoRootElement, "xmp should contain a root element");
-        }
-        else
-        {
-            // use this element as root
-            root = (Element) node;
-            node = node.getNextSibling();
-        }
-        // expect xpacket end
-        if (!(node instanceof ProcessingInstruction))
-        {
-            throw new XmpParsingException(ErrorType.XpacketBadEnd, "xmp should end with a processing instruction");
-        }
-        else
-        {
-            parseEndPacket(xmp, (ProcessingInstruction) node);
-            node = node.getNextSibling();
-        }
-        // should be null
-        if (node != null)
-        {
-            throw new XmpParsingException(ErrorType.XpacketBadEnd,
-                    "xmp should end after xpacket end processing instruction");
-        }
-        // xpacket is OK and the is no more nodes
-        // Now, parse the content of root
-        Element rdfRdf = findDescriptionsParent(root);
-        List<Element> descriptions = DomHelper.getElementChildren(rdfRdf);
-        List<Element> dataDescriptions = new ArrayList<Element>(descriptions.size());
-        for (Element description : descriptions)
-        {
-            Element first = DomHelper.getFirstChildElement(description);
-            if (first != null && "pdfaExtension".equals(first.getPrefix()))
-            {
-                PdfaExtensionHelper.validateNaming(xmp, description);
-                parseDescriptionRoot(xmp, description);
-            }
-            else
-            {
-                dataDescriptions.add(description);
-            }
-        }
-        // find schema description
-        PdfaExtensionHelper.populateSchemaMapping(xmp);
-        // parse data description
-        for (Element description : dataDescriptions)
-        {
-            parseDescriptionRoot(xmp, description);
-        }
-
-        return xmp;
-    }
-
-    private void parseDescriptionRoot(XMPMetadata xmp, Element description) throws XmpParsingException
-    {
-        nsFinder.push(description);
-        TypeMapping tm = xmp.getTypeMapping();
-        try
-        {
-            List<Element> properties = DomHelper.getElementChildren(description);
-            // parse attributes as properties
-            NamedNodeMap nnm = description.getAttributes();
-            for (int i = 0; i < nnm.getLength(); i++)
-            {
-                Attr attr = (Attr) nnm.item(i);
-                if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.getPrefix()))
-                {
-                    // do nothing
-                }
-                else if (XmpConstants.DEFAULT_RDF_PREFIX.equals(attr.getPrefix())
-                        && XmpConstants.ABOUT_NAME.equals(attr.getLocalName()))
-                {
-                    // do nothing
-                }
-                else if (attr.getPrefix() == null && XmpConstants.ABOUT_NAME.equals(attr.getLocalName()))
-                {
-                    // do nothing
-                }
-                else
-                {
-                    String namespace = attr.getNamespaceURI();
-                    XMPSchema schema = xmp.getSchema(namespace);
-                    if (schema == null && tm.getSchemaFactory(namespace) != null)
-                    {
-                        schema = tm.getSchemaFactory(namespace).createXMPSchema(xmp, attr.getPrefix());
-                        loadAttributes(schema, description);
-                    }
-                    // Only process when a schema was successfully found
-                    if( schema != null )
-                    {
-                        ComplexPropertyContainer container = schema.getContainer();
-                        PropertyType type = checkPropertyDefinition(xmp,
-                                new QName(attr.getNamespaceURI(), attr.getLocalName()));
-                        
-                        //Default to text if no type is found
-                        if( type == null) 
-                        {
-                            type = TypeMapping.createPropertyType(Types.Text, Cardinality.Simple);
-                        }
-                                           
-                        try
-                        {
-                            AbstractSimpleProperty sp = tm.instanciateSimpleProperty(namespace, schema.getPrefix(),
-                                    attr.getLocalName(), attr.getValue(), type.type());
-                            container.addProperty(sp);
-                        }
-                        catch( IllegalArgumentException exception)
-                        {
-                            //Swallow, and continue adding additional properties
-                            LOG.warn("Unable to add property: "+ attr.getLocalName() + " value: "+attr.getValue(),exception);
-                        }
-                    }
-                }
-            }
-            // parse children elements as properties
-            for (Element property : properties)
-            {
-                String namespace = property.getNamespaceURI();
-                PropertyType type = checkPropertyDefinition(xmp, DomHelper.getQName(property));
-                // create the container
-                if (!tm.isDefinedSchema(namespace))
-                {
-                    throw new XmpParsingException(ErrorType.NoSchema,
-                            "This namespace is not a schema or a structured type : " + namespace);
-                }
-                XMPSchema schema = xmp.getSchema(namespace);
-                if (schema == null)
-                {
-                    schema = tm.getSchemaFactory(namespace).createXMPSchema(xmp, property.getPrefix());
-                    loadAttributes(schema, description);
-                }
-                ComplexPropertyContainer container = schema.getContainer();
-                // create property
-                createProperty(xmp, property, type, container);
-            }
-        }
-        catch (XmpSchemaException e)
-        {
-            throw new XmpParsingException(ErrorType.Undefined, "Parsing failed", e);
-        }
-        finally
-        {
-            nsFinder.pop();
-        }
-    }
-
-    private void createProperty(XMPMetadata xmp, Element property, PropertyType type, ComplexPropertyContainer container)
-            throws XmpParsingException
-    {
-        String prefix = property.getPrefix();
-        String name = property.getLocalName();
-        String namespace = property.getNamespaceURI();
-        // create property
-        nsFinder.push(property);
-        try
-        {
-            if (type == null)
-            {
-                if (strictParsing)
-                {
-                    throw new XmpParsingException(ErrorType.InvalidType, "No type defined for {" + namespace + "}"
-                            + name);
-                }
-                else
-                {
-                    // use it as string
-                    manageSimpleType(xmp, property, Types.Text, container);
-                }
-            }
-            else if (type.type() == Types.LangAlt)
-            {
-                manageLangAlt(xmp, property, container);
-            }
-            else if (type.card().isArray())
-            {
-                manageArray(xmp, property, type, container);
-            }
-            else if (type.type().isSimple())
-            {
-                manageSimpleType(xmp, property, type.type(), container);
-            }
-            else if (type.type().isStructured())
-            {
-                if (DomHelper.isParseTypeResource(property))
-                {
-                    AbstractStructuredType ast = parseLiDescription(xmp, DomHelper.getQName(property), property);
-                    ast.setPrefix(prefix);
-                    container.addProperty(ast);
-                }
-                else
-                {
-                    Element inner = DomHelper.getFirstChildElement(property);
-                    if (inner != null)
-                    {
-                        AbstractStructuredType ast = parseLiDescription(xmp, DomHelper.getQName(property), inner);
-                        ast.setPrefix(prefix);
-                        container.addProperty(ast);
-                    }
-                }
-            }
-            else if (type.type() == Types.DefinedType)
-            {
-                if (DomHelper.isParseTypeResource(property))
-                {
-                    AbstractStructuredType ast = parseLiDescription(xmp, DomHelper.getQName(property), property);
-                    ast.setPrefix(prefix);
-                    container.addProperty(ast);
-                }
-                else
-                {
-                    Element inner = DomHelper.getFirstChildElement(property);
-                    if (inner == null)
-                    {
-                        throw new XmpParsingException(ErrorType.Format, "property should contain child element : "
-                                + property);
-                    }
-                    AbstractStructuredType ast = parseLiDescription(xmp, DomHelper.getQName(property), inner);
-                    ast.setPrefix(prefix);
-                    container.addProperty(ast);
-                }
-            }
-        }
-        finally
-        {
-            nsFinder.pop();
-        }
-
-    }
-
-    private void manageSimpleType(XMPMetadata xmp, Element property, Types type, ComplexPropertyContainer container)
-            throws XmpParsingException
-    {
-        TypeMapping tm = xmp.getTypeMapping();
-        String prefix = property.getPrefix();
-        String name = property.getLocalName();
-        String namespace = property.getNamespaceURI();
-        AbstractSimpleProperty sp = tm.instanciateSimpleProperty(namespace, prefix, name, property.getTextContent(),
-                type);
-        loadAttributes(sp, property);
-        container.addProperty(sp);
-    }
-
-    private void manageArray(XMPMetadata xmp, Element property, PropertyType type, ComplexPropertyContainer container)
-            throws XmpParsingException
-    {
-        TypeMapping tm = xmp.getTypeMapping();
-        String prefix = property.getPrefix();
-        String name = property.getLocalName();
-        String namespace = property.getNamespaceURI();
-        Element bagOrSeq = DomHelper.getUniqueElementChild(property);
-        // ensure this is the good type of array
-        if (bagOrSeq == null)
-        {
-            // not an array
-            String whatFound = "nothing";
-            if (property.getFirstChild() != null)
-            {
-                whatFound = property.getFirstChild().getClass().getName();
-            }
-            throw new XmpParsingException(ErrorType.Format, "Invalid array definition, expecting " + type.card()
-                    + " and found "
-                    + whatFound
-                    + " [prefix=" + prefix + "; name=" + name + "]");
-        }
-        if (!bagOrSeq.getLocalName().equals(type.card().name()))
-        {
-            // not the good array type
-            throw new XmpParsingException(ErrorType.Format, "Invalid array type, expecting " + type.card()
-                    + " and found " + bagOrSeq.getLocalName() + " [prefix="+prefix+"; name="+name+"]");
-        }
-        ArrayProperty array = tm.createArrayProperty(namespace, prefix, name, type.card());
-        container.addProperty(array);
-        List<Element> lis = DomHelper.getElementChildren(bagOrSeq);
-
-        for (Element element : lis)
-        {
-            QName propertyQName = DomHelper.getQName(property);
-            AbstractField ast = parseLiElement(xmp, propertyQName, element);
-            if (ast != null)
-            {
-                array.addProperty(ast);
-            }
-        }
-    }
-
-    private void manageLangAlt(XMPMetadata xmp, Element property, ComplexPropertyContainer container)
-            throws XmpParsingException
-    {
-        manageArray(xmp, property, TypeMapping.createPropertyType(Types.LangAlt, Cardinality.Alt), container);
-    }
-
-    private void parseDescriptionInner(XMPMetadata xmp, Element description, ComplexPropertyContainer parentContainer)
-            throws XmpParsingException
-    {
-        nsFinder.push(description);
-        TypeMapping tm = xmp.getTypeMapping();
-        try
-        {
-            List<Element> properties = DomHelper.getElementChildren(description);
-            for (Element property : properties)
-            {
-                String name = property.getLocalName();
-                PropertyType dtype = checkPropertyDefinition(xmp, DomHelper.getQName(property));
-                PropertyType ptype = tm.getStructuredPropMapping(dtype.type()).getPropertyType(name);
-                // create property
-                createProperty(xmp, property, ptype, parentContainer);
-            }
-        }
-        finally
-        {
-            nsFinder.pop();
-        }
-    }
-
-    private AbstractField parseLiElement(XMPMetadata xmp, QName descriptor, Element liElement)
-            throws XmpParsingException
-    {
-        if (DomHelper.isParseTypeResource(liElement))
-        {
-            return parseLiDescription(xmp, descriptor, liElement);
-        }
-        // will find rdf:Description
-        Element liChild = DomHelper.getUniqueElementChild(liElement);
-        if (liChild != null)
-        {
-            return parseLiDescription(xmp, descriptor, liChild);
-        }
-        else
-        {
-            // no child, so consider as simple text
-            String text = liElement.getTextContent();
-            TypeMapping tm = xmp.getTypeMapping();
-            AbstractSimpleProperty sp = tm.instanciateSimpleProperty(descriptor.getNamespaceURI(),
-                    descriptor.getPrefix(), descriptor.getLocalPart(), text, Types.Text);
-            loadAttributes(sp, liElement);
-            return sp;
-        }
-    }
-
-    private void loadAttributes(AbstractField sp, Element element)
-    {
-        NamedNodeMap nnm = element.getAttributes();
-        for (int i = 0; i < nnm.getLength(); i++)
-        {
-            Attr attr = (Attr) nnm.item(i);
-            if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.getPrefix()))
-            {
-                // do nothing
-            }
-            else if (XmpConstants.DEFAULT_RDF_PREFIX.equals(attr.getPrefix())
-                    && XmpConstants.ABOUT_NAME.equals(attr.getLocalName()))
-            {
-                // set about
-                if (sp instanceof XMPSchema)
-                {
-                    ((XMPSchema) sp).setAboutAsSimple(attr.getValue());
-                }
-            }
-            else
-            {
-                Attribute attribute = new Attribute(XMLConstants.XML_NS_URI, attr.getLocalName(), attr.getValue());
-                sp.setAttribute(attribute);
-            }
-        }
-    }
-
-    private AbstractStructuredType parseLiDescription(XMPMetadata xmp, QName descriptor, Element liElement)
-            throws XmpParsingException
-    {
-        TypeMapping tm = xmp.getTypeMapping();
-        List<Element> elements = DomHelper.getElementChildren(liElement);
-        if (elements.size() == 0)
-        {
-            // The list is empty
-            return null;
-        }
-        // Instantiate abstract structured type with hint from first element
-        Element first = elements.get(0);
-        PropertyType ctype = checkPropertyDefinition(xmp, DomHelper.getQName(first));
-        Types tt = ctype.type();
-        AbstractStructuredType ast = instanciateStructured(tm, tt, descriptor.getLocalPart(), first.getNamespaceURI());
-
-        ast.setNamespace(descriptor.getNamespaceURI());
-        ast.setPrefix(descriptor.getPrefix());
-
-        PropertiesDescription pm;
-        if (tt.isStructured())
-        {
-            pm = tm.getStructuredPropMapping(tt);
-        }
-        else
-        {
-            pm = tm.getDefinedDescriptionByNamespace(first.getNamespaceURI());
-        }
-        for (Element element : elements)
-        {
-            String prefix = element.getPrefix();
-            String name = element.getLocalName();
-            String namespace = element.getNamespaceURI();
-            PropertyType type = pm.getPropertyType(name);
-            if (type == null)
-            {
-                // not defined
-                throw new XmpParsingException(ErrorType.NoType, "Type '" + name + "' not defined in "
-                        + element.getNamespaceURI());
-            }
-            else if (type.card().isArray())
-            {
-                ArrayProperty array = tm.createArrayProperty(namespace, prefix, name, type.card());
-                ast.getContainer().addProperty(array);
-                Element bagOrSeq = DomHelper.getUniqueElementChild(element);
-                List<Element> lis = DomHelper.getElementChildren(bagOrSeq);
-                for (Element element2 : lis)
-                {
-                    AbstractField ast2 = parseLiElement(xmp, descriptor, element2);
-                    if (ast2 != null)
-                    {
-                        array.addProperty(ast2);
-                    }
-                }
-            }
-            else if (type.type().isSimple())
-            {
-                AbstractSimpleProperty sp = tm.instanciateSimpleProperty(namespace, prefix, name,
-                        element.getTextContent(), type.type());
-                loadAttributes(sp, element);
-                ast.getContainer().addProperty(sp);
-            }
-            else if (type.type().isStructured())
-            {
-                // create a new structured type
-                AbstractStructuredType inner = instanciateStructured(tm, type.type(), name, null);
-                inner.setNamespace(namespace);
-                inner.setPrefix(prefix);
-                ast.getContainer().addProperty(inner);
-                ComplexPropertyContainer cpc = inner.getContainer();
-                if (DomHelper.isParseTypeResource(element))
-                {
-                    parseDescriptionInner(xmp, element, cpc);
-                }
-                else
-                {
-                    Element descElement = DomHelper.getFirstChildElement(element);
-                    if (descElement != null)
-                    {
-                        parseDescriptionInner(xmp, descElement, cpc);
-                    }
-                }
-            }
-            else
-            {
-                throw new XmpParsingException(ErrorType.NoType, "Unidentified element to parse " + element + " (type="
-                        + type + ")");
-            }
-
-        }
-        return ast;
-    }
-
-    private XMPMetadata parseInitialXpacket(ProcessingInstruction pi) throws XmpParsingException
-    {
-        if (!"xpacket".equals(pi.getNodeName()))
-        {
-            throw new XmpParsingException(ErrorType.XpacketBadStart, "Bad processing instruction name : "
-                    + pi.getNodeName());
-        }
-        String data = pi.getData();
-        StringTokenizer tokens = new StringTokenizer(data, " ");
-        String id = null;
-        String begin = null;
-        String bytes = null;
-        String encoding = null;
-        while (tokens.hasMoreTokens())
-        {
-            String token = tokens.nextToken();
-            if (!token.endsWith("\"") && !token.endsWith("\'"))
-            {
-                throw new XmpParsingException(ErrorType.XpacketBadStart, "Cannot understand PI data part : '" + token
-                        + "'");
-            }
-            String quote = token.substring(token.length() - 1);
-            int pos = token.indexOf("=" + quote);
-            if (pos <= 0)
-            {
-                throw new XmpParsingException(ErrorType.XpacketBadStart, "Cannot understand PI data part : '" + token
-                        + "'");
-            }
-            String name = token.substring(0, pos);
-            String value = token.substring(pos + 2, token.length() - 1);
-            if ("id".equals(name))
-            {
-                id = value;
-            }
-            else if ("begin".equals(name))
-            {
-                begin = value;
-            }
-            else if ("bytes".equals(name))
-            {
-                bytes = value;
-            }
-            else if ("encoding".equals(name))
-            {
-                encoding = value;
-            }
-            else
-            {
-                throw new XmpParsingException(ErrorType.XpacketBadStart, "Unknown attribute in xpacket PI : '" + token
-                        + "'");
-            }
-        }
-        return XMPMetadata.createXMPMetadata(begin, id, bytes, encoding);
-    }
-
-    private void parseEndPacket(XMPMetadata metadata, ProcessingInstruction pi) throws XmpParsingException
-    {
-        String xpackData = pi.getData();
-        // end attribute must be present and placed in first
-        // xmp spec says Other unrecognized attributes can follow, but
-        // should be ignored
-        if (xpackData.startsWith("end="))
-        {
-            char end = xpackData.charAt(5);
-            // check value (5 for end='X')
-            if (end != 'r' && end != 'w')
-            {
-                throw new XmpParsingException(ErrorType.XpacketBadEnd,
-                        "Excepted xpacket 'end' attribute with value 'r' or 'w' ");
-            }
-            else
-            {
-                metadata.setEndXPacket(Character.toString(end));
-            }
-        }
-        else
-        {
-            // should find end='r/w'
-            throw new XmpParsingException(ErrorType.XpacketBadEnd,
-                    "Excepted xpacket 'end' attribute (must be present and placed in first)");
-        }
-    }
-
-    private Element findDescriptionsParent(Element root) throws XmpParsingException
-    {
-        // always <x:xmpmeta xmlns:x="adobe:ns:meta/">
-        expectNaming(root, "adobe:ns:meta/", "x", "xmpmeta");
-        // should only have one child
-        NodeList nl = root.getChildNodes();
-        if (nl.getLength() == 0)
-        {
-            // empty description
-            throw new XmpParsingException(ErrorType.Format, "No rdf description found in xmp");
-        }
-        else if (nl.getLength() > 1)
-        {
-            // only expect one element
-            throw new XmpParsingException(ErrorType.Format, "More than one element found in x:xmpmeta");
-        }
-        else if (!(root.getFirstChild() instanceof Element))
-        {
-            // should be an element
-            throw new XmpParsingException(ErrorType.Format, "x:xmpmeta does not contains rdf:RDF element");
-        } // else let's parse
-        Element rdfRdf = (Element) root.getFirstChild();
-        // always <rdf:RDF
-        // xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
-        expectNaming(rdfRdf, XmpConstants.RDF_NAMESPACE, XmpConstants.DEFAULT_RDF_PREFIX,
-                XmpConstants.DEFAULT_RDF_LOCAL_NAME);
-        // return description parent
-        return rdfRdf;
-    }
-
-    private void expectNaming(Element element, String ns, String prefix, String ln) throws XmpParsingException
-    {
-        if ((ns != null) && !(ns.equals(element.getNamespaceURI())))
-        {
-            throw new XmpParsingException(ErrorType.Format, "Expecting namespace '" + ns + "' and found '"
-                    + element.getNamespaceURI() + "'");
-        }
-        else if ((prefix != null) && !(prefix.equals(element.getPrefix())))
-        {
-            throw new XmpParsingException(ErrorType.Format, "Expecting prefix '" + prefix + "' and found '"
-                    + element.getPrefix() + "'");
-        }
-        else if ((ln != null) && !(ln.equals(element.getLocalName())))
-        {
-            throw new XmpParsingException(ErrorType.Format, "Expecting local name '" + ln + "' and found '"
-                    + element.getLocalName() + "'");
-        } // else OK
-    }
-
-    /**
-     * Remove all the comments node in the parent element of the parameter
-     * 
-     * @param root
-     *            the first node of an element or document to clear
-     */
-    private void removeComments(Node root)
-    {
-        if (root.getChildNodes().getLength()<=1) {
-            // There is only one node so we do not remove it
-            return;
-        }
-        NodeList nl = root.getChildNodes();
-        for (int i=0; i < nl.getLength() ; i ++) {
-            Node node = nl.item(i);
-            if (node instanceof Comment)
-            {
-                // remove the comment
-                root.removeChild(node);
-            }
-            else if (node instanceof Text)
-            {
-                if (node.getTextContent().trim().length() == 0)
-                {
-                        root.removeChild(node);
-                }
-            }
-            else if (node instanceof Element)
-            {
-                // clean child
-                removeComments(node);
-            } // else do nothing
-        }
-    }
-
-    private AbstractStructuredType instanciateStructured(TypeMapping tm, Types type, String name,
-            String structuredNamespace) throws XmpParsingException
-    {
-        try
-        {
-            if (type.isStructured())
-            {
-                return tm.instanciateStructuredType(type, name);
-            }
-            else if (type.isDefined())
-            {
-                return tm.instanciateDefinedType(name, structuredNamespace);
-            }
-            else
-            {
-                throw new XmpParsingException(ErrorType.InvalidType, "Type not structured : " + type);
-            }
-        }
-        catch (BadFieldValueException e)
-        {
-            throw new XmpParsingException(ErrorType.InvalidType, "Parsing failed", e);
-        }
-    }
-
-    private PropertyType checkPropertyDefinition(XMPMetadata xmp, QName prop) throws XmpParsingException
-    {
-        TypeMapping tm = xmp.getTypeMapping();
-        // test if namespace is set in xml
-        if (!nsFinder.containsNamespace(prop.getNamespaceURI()))
-        {
-            throw new XmpParsingException(ErrorType.NoSchema, "Schema is not set in this document : "
-                    + prop.getNamespaceURI());
-        }
-        // test if namespace is defined
-        String nsuri = prop.getNamespaceURI();
-        if (!tm.isDefinedNamespace(nsuri))
-        {
-            throw new XmpParsingException(ErrorType.NoSchema, "Cannot find a definition for the namespace "
-                    + prop.getNamespaceURI());
-        }
-        try
-        {
-            return tm.getSpecifiedPropertyType(prop);
-        }
-        catch (BadFieldValueException e)
-        {
-            throw new XmpParsingException(ErrorType.InvalidType, "Failed to retreive property definition", e);
-        }
-    }
-
-    protected class NamespaceFinder
-    {
-
-        private Stack<Map<String, String>> stack = new Stack<Map<String, String>>();
-
-        protected void push(Element description)
-        {
-            NamedNodeMap nnm = description.getAttributes();
-            Map<String, String> map = new HashMap<String, String>(nnm.getLength());
-            for (int j = 0; j < nnm.getLength(); j++)
-            {
-                Attr no = (Attr) nnm.item(j);
-                // if ns definition add it
-                if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(no.getNamespaceURI()))
-                {
-                    map.put(no.getLocalName(), no.getValue());
-                }
-            }
-            stack.push(map);
-        }
-
-        protected Map<String, String> pop()
-        {
-            return stack.pop();
-        }
-
-        protected boolean containsNamespace(String namespace)
-        {
-            for (int i = stack.size() - 1; i >= 0; i--)
-            {
-                Map<String, String> map = stack.get(i);
-                if (map.containsValue(namespace))
-                {
-                    return true;
-                }
-            }
-            // else namespace not found
-            return false;
-        }
-
-    }
-
-}
+/*****************************************************************************
+ * 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * 
+ ****************************************************************************/
+
+package org.apache.xmpbox.xml;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+import java.util.StringTokenizer;
+
+import javax.xml.XMLConstants;
+import javax.xml.namespace.QName;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.xmpbox.XMPMetadata;
+import org.apache.xmpbox.XmpConstants;
+import org.apache.xmpbox.schema.XMPSchema;
+import org.apache.xmpbox.schema.XmpSchemaException;
+import org.apache.xmpbox.type.AbstractField;
+import org.apache.xmpbox.type.AbstractSimpleProperty;
+import org.apache.xmpbox.type.AbstractStructuredType;
+import org.apache.xmpbox.type.ArrayProperty;
+import org.apache.xmpbox.type.Attribute;
+import org.apache.xmpbox.type.BadFieldValueException;
+import org.apache.xmpbox.type.Cardinality;
+import org.apache.xmpbox.type.ComplexPropertyContainer;
+import org.apache.xmpbox.type.PropertiesDescription;
+import org.apache.xmpbox.type.PropertyType;
+import org.apache.xmpbox.type.TypeMapping;
+import org.apache.xmpbox.type.Types;
+import org.apache.xmpbox.xml.XmpParsingException.ErrorType;
+import org.w3c.dom.Attr;
+import org.w3c.dom.Comment;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.w3c.dom.ProcessingInstruction;
+import org.w3c.dom.Text;
+import org.xml.sax.SAXException;
+
+public class DomXmpParser
+{
+
+    /**
+     * Log instance.
+     */
+    private static final Log LOG = LogFactory.getLog(DomXmpParser.class);
+
+
+    private DocumentBuilder dBuilder;
+
+    private NamespaceFinder nsFinder;
+
+    private boolean strictParsing = true;
+
+    public DomXmpParser() throws XmpParsingException
+    {
+        try
+        {
+            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
+            dbFactory.setNamespaceAware(true);
+            dBuilder = dbFactory.newDocumentBuilder();
+            nsFinder = new NamespaceFinder();
+        }
+        catch (ParserConfigurationException e)
+        {
+            throw new XmpParsingException(ErrorType.Configuration, "Failed to initilalize", e);
+        }
+
+    }
+
+    public boolean isStrictParsing()
+    {
+        return strictParsing;
+    }
+
+    public void setStrictParsing(boolean strictParsing)
+    {
+        this.strictParsing = strictParsing;
+    }
+
+    public XMPMetadata parse(byte[] xmp) throws XmpParsingException
+    {
+        ByteArrayInputStream input = new ByteArrayInputStream(xmp);
+        return parse(input);
+    }
+
+    public XMPMetadata parse(InputStream input) throws XmpParsingException
+    {
+        Document document = null;
+        try
+        {
+            dBuilder.setErrorHandler(null); // prevents validation messages polluting the console
+            document = dBuilder.parse(input);
+        }
+        catch (SAXException e)
+        {
+            throw new XmpParsingException(ErrorType.Undefined, "Failed to parse", e);
+        }
+        catch (IOException e)
+        {
+            throw new XmpParsingException(ErrorType.Undefined, "Failed to parse", e);
+        }
+        // document.normalizeDocument();
+        XMPMetadata xmp = null;
+
+        // Start reading
+        removeComments(document);
+        Node node = document.getFirstChild();
+
+        // expect xpacket processing instruction
+        if (!(node instanceof ProcessingInstruction))
+        {
+            throw new XmpParsingException(ErrorType.XpacketBadStart, "xmp should start with a processing instruction");
+        }
+        else
+        {
+            xmp = parseInitialXpacket((ProcessingInstruction) node);
+            node = node.getNextSibling();
+        }
+        // forget other processing instruction
+        while (node instanceof ProcessingInstruction)
+        {
+            node = node.getNextSibling();
+        }
+        // expect root element
+        Element root = null;
+        if (!(node instanceof Element))
+        {
+            throw new XmpParsingException(ErrorType.NoRootElement, "xmp should contain a root element");
+        }
+        else
+        {
+            // use this element as root
+            root = (Element) node;
+            node = node.getNextSibling();
+        }
+        // expect xpacket end
+        if (!(node instanceof ProcessingInstruction))
+        {
+            throw new XmpParsingException(ErrorType.XpacketBadEnd, "xmp should end with a processing instruction");
+        }
+        else
+        {
+            parseEndPacket(xmp, (ProcessingInstruction) node);
+            node = node.getNextSibling();
+        }
+        // should be null
+        if (node != null)
+        {
+            throw new XmpParsingException(ErrorType.XpacketBadEnd,
+                    "xmp should end after xpacket end processing instruction");
+        }
+        // xpacket is OK and the is no more nodes
+        // Now, parse the content of root
+        Element rdfRdf = findDescriptionsParent(root);
+        List<Element> descriptions = DomHelper.getElementChildren(rdfRdf);
+        List<Element> dataDescriptions = new ArrayList<Element>(descriptions.size());
+        for (Element description : descriptions)
+        {
+            Element first = DomHelper.getFirstChildElement(description);
+            if (first != null && "pdfaExtension".equals(first.getPrefix()))
+            {
+                PdfaExtensionHelper.validateNaming(xmp, description);
+                parseDescriptionRoot(xmp, description);
+            }
+            else
+            {
+                dataDescriptions.add(description);
+            }
+        }
+        // find schema description
+        PdfaExtensionHelper.populateSchemaMapping(xmp);
+        // parse data description
+        for (Element description : dataDescriptions)
+        {
+            parseDescriptionRoot(xmp, description);
+        }
+
+        return xmp;
+    }
+
+    private void parseDescriptionRoot(XMPMetadata xmp, Element description) throws XmpParsingException
+    {
+        nsFinder.push(description);
+        TypeMapping tm = xmp.getTypeMapping();
+        try
+        {
+            List<Element> properties = DomHelper.getElementChildren(description);
+            // parse attributes as properties
+            NamedNodeMap nnm = description.getAttributes();
+            for (int i = 0; i < nnm.getLength(); i++)
+            {
+                Attr attr = (Attr) nnm.item(i);
+                if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.getPrefix()))
+                {
+                    // do nothing
+                }
+                else if (XmpConstants.DEFAULT_RDF_PREFIX.equals(attr.getPrefix())
+                        && XmpConstants.ABOUT_NAME.equals(attr.getLocalName()))
+                {
+                    // do nothing
+                }
+                else if (attr.getPrefix() == null && XmpConstants.ABOUT_NAME.equals(attr.getLocalName()))
+                {
+                    // do nothing
+                }
+                else
+                {
+                    String namespace = attr.getNamespaceURI();
+                    XMPSchema schema = xmp.getSchema(namespace);
+                    if (schema == null && tm.getSchemaFactory(namespace) != null)
+                    {
+                        schema = tm.getSchemaFactory(namespace).createXMPSchema(xmp, attr.getPrefix());
+                        loadAttributes(schema, description);
+                    }
+                    // Only process when a schema was successfully found
+                    if( schema != null )
+                    {
+                        ComplexPropertyContainer container = schema.getContainer();
+                        PropertyType type = checkPropertyDefinition(xmp,
+                                new QName(attr.getNamespaceURI(), attr.getLocalName()));
+                        
+                        //Default to text if no type is found
+                        if( type == null) 
+                        {
+                            type = TypeMapping.createPropertyType(Types.Text, Cardinality.Simple);
+                        }
+                                           
+                        try
+                        {
+                            AbstractSimpleProperty sp = tm.instanciateSimpleProperty(namespace, schema.getPrefix(),
+                                    attr.getLocalName(), attr.getValue(), type.type());
+                            container.addProperty(sp);
+                        }
+                        catch( IllegalArgumentException exception)
+                        {
+                            //Swallow, and continue adding additional properties
+                            LOG.warn("Unable to add property: "+ attr.getLocalName() + " value: "+attr.getValue(),exception);
+                        }
+                    }
+                }
+            }
+            // parse children elements as properties
+            for (Element property : properties)
+            {
+                String namespace = property.getNamespaceURI();
+                PropertyType type = checkPropertyDefinition(xmp, DomHelper.getQName(property));
+                // create the container
+                if (!tm.isDefinedSchema(namespace))
+                {
+                    throw new XmpParsingException(ErrorType.NoSchema,
+                            "This namespace is not a schema or a structured type : " + namespace);
+                }
+                XMPSchema schema = xmp.getSchema(namespace);
+                if (schema == null)
+                {
+                    schema = tm.getSchemaFactory(namespace).createXMPSchema(xmp, property.getPrefix());
+                    loadAttributes(schema, description);
+                }
+                ComplexPropertyContainer container = schema.getContainer();
+                // create property
+                createProperty(xmp, property, type, container);
+            }
+        }
+        catch (XmpSchemaException e)
+        {
+            throw new XmpParsingException(ErrorType.Undefined, "Parsing failed", e);
+        }
+        finally
+        {
+            nsFinder.pop();
+        }
+    }
+
+    private void createProperty(XMPMetadata xmp, Element property, PropertyType type, ComplexPropertyContainer container)
+            throws XmpParsingException
+    {
+        String prefix = property.getPrefix();
+        String name = property.getLocalName();
+        String namespace = property.getNamespaceURI();
+        // create property
+        nsFinder.push(property);
+        try
+        {
+            if (type == null)
+            {
+                if (strictParsing)
+                {
+                    throw new XmpParsingException(ErrorType.InvalidType, "No type defined for {" + namespace + "}"
+                            + name);
+                }
+                else
+                {
+                    // use it as string
+                    manageSimpleType(xmp, property, Types.Text, container);
+                }
+            }
+            else if (type.type() == Types.LangAlt)
+            {
+                manageLangAlt(xmp, property, container);
+            }
+            else if (type.card().isArray())
+            {
+                manageArray(xmp, property, type, container);
+            }
+            else if (type.type().isSimple())
+            {
+                manageSimpleType(xmp, property, type.type(), container);
+            }
+            else if (type.type().isStructured())
+            {
+                if (DomHelper.isParseTypeResource(property))
+                {
+                    AbstractStructuredType ast = parseLiDescription(xmp, DomHelper.getQName(property), property);
+                    ast.setPrefix(prefix);
+                    container.addProperty(ast);
+                }
+                else
+                {
+                    Element inner = DomHelper.getFirstChildElement(property);
+                    if (inner != null)
+                    {
+                        nsFinder.push(inner);
+                        AbstractStructuredType ast = parseLiDescription(xmp, DomHelper.getQName(property), inner);
+                        ast.setPrefix(prefix);
+                        container.addProperty(ast);
+                    }
+                }
+            }
+            else if (type.type() == Types.DefinedType)
+            {
+                if (DomHelper.isParseTypeResource(property))
+                {
+                    AbstractStructuredType ast = parseLiDescription(xmp, DomHelper.getQName(property), property);
+                    ast.setPrefix(prefix);
+                    container.addProperty(ast);
+                }
+                else
+                {
+                    Element inner = DomHelper.getFirstChildElement(property);
+                    if (inner == null)
+                    {
+                        throw new XmpParsingException(ErrorType.Format, "property should contain child element : "
+                                + property);
+                    }
+                    AbstractStructuredType ast = parseLiDescription(xmp, DomHelper.getQName(property), inner);
+                    ast.setPrefix(prefix);
+                    container.addProperty(ast);
+                }
+            }
+        }
+        finally
+        {
+            nsFinder.pop();
+        }
+
+    }
+
+    private void manageSimpleType(XMPMetadata xmp, Element property, Types type, ComplexPropertyContainer container)
+            throws XmpParsingException
+    {
+        TypeMapping tm = xmp.getTypeMapping();
+        String prefix = property.getPrefix();
+        String name = property.getLocalName();
+        String namespace = property.getNamespaceURI();
+        AbstractSimpleProperty sp = tm.instanciateSimpleProperty(namespace, prefix, name, property.getTextContent(),
+                type);
+        loadAttributes(sp, property);
+        container.addProperty(sp);
+    }
+
+    private void manageArray(XMPMetadata xmp, Element property, PropertyType type, ComplexPropertyContainer container)
+            throws XmpParsingException
+    {
+        TypeMapping tm = xmp.getTypeMapping();
+        String prefix = property.getPrefix();
+        String name = property.getLocalName();
+        String namespace = property.getNamespaceURI();
+        Element bagOrSeq = DomHelper.getUniqueElementChild(property);
+        // ensure this is the good type of array
+        if (bagOrSeq == null)
+        {
+            // not an array
+            String whatFound = "nothing";
+            if (property.getFirstChild() != null)
+            {
+                whatFound = property.getFirstChild().getClass().getName();
+            }
+            throw new XmpParsingException(ErrorType.Format, "Invalid array definition, expecting " + type.card()
+                    + " and found "
+                    + whatFound
+                    + " [prefix=" + prefix + "; name=" + name + "]");
+        }
+        if (!bagOrSeq.getLocalName().equals(type.card().name()))
+        {
+            // not the good array type
+            throw new XmpParsingException(ErrorType.Format, "Invalid array type, expecting " + type.card()
+                    + " and found " + bagOrSeq.getLocalName() + " [prefix="+prefix+"; name="+name+"]");
+        }
+        ArrayProperty array = tm.createArrayProperty(namespace, prefix, name, type.card());
+        container.addProperty(array);
+        List<Element> lis = DomHelper.getElementChildren(bagOrSeq);
+
+        for (Element element : lis)
+        {
+            QName propertyQName = DomHelper.getQName(property);
+            AbstractField ast = parseLiElement(xmp, propertyQName, element);
+            if (ast != null)
+            {
+                array.addProperty(ast);
+            }
+        }
+    }
+
+    private void manageLangAlt(XMPMetadata xmp, Element property, ComplexPropertyContainer container)
+            throws XmpParsingException
+    {
+        manageArray(xmp, property, TypeMapping.createPropertyType(Types.LangAlt, Cardinality.Alt), container);
+    }
+
+    private void parseDescriptionInner(XMPMetadata xmp, Element description, ComplexPropertyContainer parentContainer)
+            throws XmpParsingException
+    {
+        nsFinder.push(description);
+        TypeMapping tm = xmp.getTypeMapping();
+        try
+        {
+            List<Element> properties = DomHelper.getElementChildren(description);
+            for (Element property : properties)
+            {
+                String name = property.getLocalName();
+                PropertyType dtype = checkPropertyDefinition(xmp, DomHelper.getQName(property));
+                PropertyType ptype = tm.getStructuredPropMapping(dtype.type()).getPropertyType(name);
+                // create property
+                createProperty(xmp, property, ptype, parentContainer);
+            }
+        }
+        finally
+        {
+            nsFinder.pop();
+        }
+    }
+
+    private AbstractField parseLiElement(XMPMetadata xmp, QName descriptor, Element liElement)
+            throws XmpParsingException
+    {
+        if (DomHelper.isParseTypeResource(liElement))
+        {
+            return parseLiDescription(xmp, descriptor, liElement);
+        }
+        // will find rdf:Description
+        Element liChild = DomHelper.getUniqueElementChild(liElement);
+        if (liChild != null)
+        {
+            nsFinder.push(liChild);
+            return parseLiDescription(xmp, descriptor, liChild);
+        }
+        else
+        {
+            // no child, so consider as simple text
+            String text = liElement.getTextContent();
+            TypeMapping tm = xmp.getTypeMapping();
+            AbstractSimpleProperty sp = tm.instanciateSimpleProperty(descriptor.getNamespaceURI(),
+                    descriptor.getPrefix(), descriptor.getLocalPart(), text, Types.Text);
+            loadAttributes(sp, liElement);
+            return sp;
+        }
+    }
+
+    private void loadAttributes(AbstractField sp, Element element)
+    {
+        NamedNodeMap nnm = element.getAttributes();
+        for (int i = 0; i < nnm.getLength(); i++)
+        {
+            Attr attr = (Attr) nnm.item(i);
+            if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.getPrefix()))
+            {
+                // do nothing
+            }
+            else if (XmpConstants.DEFAULT_RDF_PREFIX.equals(attr.getPrefix())
+                    && XmpConstants.ABOUT_NAME.equals(attr.getLocalName()))
+            {
+                // set about
+                if (sp instanceof XMPSchema)
+                {
+                    ((XMPSchema) sp).setAboutAsSimple(attr.getValue());
+                }
+            }
+            else
+            {
+                Attribute attribute = new Attribute(XMLConstants.XML_NS_URI, attr.getLocalName(), attr.getValue());
+                sp.setAttribute(attribute);
+            }
+        }
+    }
+
+    private AbstractStructuredType parseLiDescription(XMPMetadata xmp, QName descriptor, Element liElement)
+            throws XmpParsingException
+    {
+        TypeMapping tm = xmp.getTypeMapping();
+        List<Element> elements = DomHelper.getElementChildren(liElement);
+        if (elements.isEmpty())
+        {
+            // The list is empty
+            return null;
+        }
+        // Instantiate abstract structured type with hint from first element
+        Element first = elements.get(0);
+        PropertyType ctype = checkPropertyDefinition(xmp, DomHelper.getQName(first));
+        Types tt = ctype.type();
+        AbstractStructuredType ast = instanciateStructured(tm, tt, descriptor.getLocalPart(), first.getNamespaceURI());
+
+        ast.setNamespace(descriptor.getNamespaceURI());
+        ast.setPrefix(descriptor.getPrefix());
+
+        PropertiesDescription pm;
+        if (tt.isStructured())
+        {
+            pm = tm.getStructuredPropMapping(tt);
+        }
+        else
+        {
+            pm = tm.getDefinedDescriptionByNamespace(first.getNamespaceURI());
+        }
+        for (Element element : elements)
+        {
+            String prefix = element.getPrefix();
+            String name = element.getLocalName();
+            String namespace = element.getNamespaceURI();
+            PropertyType type = pm.getPropertyType(name);
+            if (type == null)
+            {
+                // not defined
+                throw new XmpParsingException(ErrorType.NoType, "Type '" + name + "' not defined in "
+                        + element.getNamespaceURI());
+            }
+            else if (type.card().isArray())
+            {
+                ArrayProperty array = tm.createArrayProperty(namespace, prefix, name, type.card());
+                ast.getContainer().addProperty(array);
+                Element bagOrSeq = DomHelper.getUniqueElementChild(element);
+                List<Element> lis = DomHelper.getElementChildren(bagOrSeq);
+                for (Element element2 : lis)
+                {
+                    AbstractField ast2 = parseLiElement(xmp, descriptor, element2);
+                    if (ast2 != null)
+                    {
+                        array.addProperty(ast2);
+                    }
+                }
+            }
+            else if (type.type().isSimple())
+            {
+                AbstractSimpleProperty sp = tm.instanciateSimpleProperty(namespace, prefix, name,
+                        element.getTextContent(), type.type());
+                loadAttributes(sp, element);
+                ast.getContainer().addProperty(sp);
+            }
+            else if (type.type().isStructured())
+            {
+                // create a new structured type
+                AbstractStructuredType inner = instanciateStructured(tm, type.type(), name, null);
+                inner.setNamespace(namespace);
+                inner.setPrefix(prefix);
+                ast.getContainer().addProperty(inner);
+                ComplexPropertyContainer cpc = inner.getContainer();
+                if (DomHelper.isParseTypeResource(element))
+                {
+                    parseDescriptionInner(xmp, element, cpc);
+                }
+                else
+                {
+                    Element descElement = DomHelper.getFirstChildElement(element);
+                    if (descElement != null)
+                    {
+                        parseDescriptionInner(xmp, descElement, cpc);
+                    }
+                }
+            }
+            else
+            {
+                throw new XmpParsingException(ErrorType.NoType, "Unidentified element to parse " + element + " (type="
+                        + type + ")");
+            }
+
+        }
+        return ast;
+    }
+
+    private XMPMetadata parseInitialXpacket(ProcessingInstruction pi) throws XmpParsingException
+    {
+        if (!"xpacket".equals(pi.getNodeName()))
+        {
+            throw new XmpParsingException(ErrorType.XpacketBadStart, "Bad processing instruction name : "
+                    + pi.getNodeName());
+        }
+        String data = pi.getData();
+        StringTokenizer tokens = new StringTokenizer(data, " ");
+        String id = null;
+        String begin = null;
+        String bytes = null;
+        String encoding = null;
+        while (tokens.hasMoreTokens())
+        {
+            String token = tokens.nextToken();
+            if (!token.endsWith("\"") && !token.endsWith("\'"))
+            {
+                throw new XmpParsingException(ErrorType.XpacketBadStart, "Cannot understand PI data part : '" + token
+                        + "'");
+            }
+            String quote = token.substring(token.length() - 1);
+            int pos = token.indexOf("=" + quote);
+            if (pos <= 0)
+            {
+                throw new XmpParsingException(ErrorType.XpacketBadStart, "Cannot understand PI data part : '" + token
+                        + "'");
+            }
+            String name = token.substring(0, pos);
+            String value = token.substring(pos + 2, token.length() - 1);
+            if ("id".equals(name))
+            {
+                id = value;
+            }
+            else if ("begin".equals(name))
+            {
+                begin = value;
+            }
+            else if ("bytes".equals(name))
+            {
+                bytes = value;
+            }
+            else if ("encoding".equals(name))
+            {
+                encoding = value;
+            }
+            else
+            {
+                throw new XmpParsingException(ErrorType.XpacketBadStart, "Unknown attribute in xpacket PI : '" + token
+                        + "'");
+            }
+        }
+        return XMPMetadata.createXMPMetadata(begin, id, bytes, encoding);
+    }
+
+    private void parseEndPacket(XMPMetadata metadata, ProcessingInstruction pi) throws XmpParsingException
+    {
+        String xpackData = pi.getData();
+        // end attribute must be present and placed in first
+        // xmp spec says Other unrecognized attributes can follow, but
+        // should be ignored
+        if (xpackData.startsWith("end="))
+        {
+            char end = xpackData.charAt(5);
+            // check value (5 for end='X')
+            if (end != 'r' && end != 'w')
+            {
+                throw new XmpParsingException(ErrorType.XpacketBadEnd,
+                        "Excepted xpacket 'end' attribute with value 'r' or 'w' ");
+            }
+            else
+            {
+                metadata.setEndXPacket(Character.toString(end));
+            }
+        }
+        else
+        {
+            // should find end='r/w'
+            throw new XmpParsingException(ErrorType.XpacketBadEnd,
+                    "Excepted xpacket 'end' attribute (must be present and placed in first)");
+        }
+    }
+
+    private Element findDescriptionsParent(Element root) throws XmpParsingException
+    {
+        // always <x:xmpmeta xmlns:x="adobe:ns:meta/">
+        expectNaming(root, "adobe:ns:meta/", "x", "xmpmeta");
+        // should only have one child
+        NodeList nl = root.getChildNodes();
+        if (nl.getLength() == 0)
+        {
+            // empty description
+            throw new XmpParsingException(ErrorType.Format, "No rdf description found in xmp");
+        }
+        else if (nl.getLength() > 1)
+        {
+            // only expect one element
+            throw new XmpParsingException(ErrorType.Format, "More than one element found in x:xmpmeta");
+        }
+        else if (!(root.getFirstChild() instanceof Element))
+        {
+            // should be an element
+            throw new XmpParsingException(ErrorType.Format, "x:xmpmeta does not contains rdf:RDF element");
+        } // else let's parse
+        Element rdfRdf = (Element) root.getFirstChild();
+        // always <rdf:RDF
+        // xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+        expectNaming(rdfRdf, XmpConstants.RDF_NAMESPACE, XmpConstants.DEFAULT_RDF_PREFIX,
+                XmpConstants.DEFAULT_RDF_LOCAL_NAME);
+        // return description parent
+        return rdfRdf;
+    }
+
+    private void expectNaming(Element element, String ns, String prefix, String ln) throws XmpParsingException
+    {
+        if ((ns != null) && !(ns.equals(element.getNamespaceURI())))
+        {
+            throw new XmpParsingException(ErrorType.Format, "Expecting namespace '" + ns + "' and found '"
+                    + element.getNamespaceURI() + "'");
+        }
+        else if ((prefix != null) && !(prefix.equals(element.getPrefix())))
+        {
+            throw new XmpParsingException(ErrorType.Format, "Expecting prefix '" + prefix + "' and found '"
+                    + element.getPrefix() + "'");
+        }
+        else if ((ln != null) && !(ln.equals(element.getLocalName())))
+        {
+            throw new XmpParsingException(ErrorType.Format, "Expecting local name '" + ln + "' and found '"
+                    + element.getLocalName() + "'");
+        } // else OK
+    }
+
+    /**
+     * Remove all the comments node in the parent element of the parameter
+     * 
+     * @param root
+     *            the first node of an element or document to clear
+     */
+    private void removeComments(Node root)
+    {
+        if (root.getChildNodes().getLength()<=1) {
+            // There is only one node so we do not remove it
+            return;
+        }
+        NodeList nl = root.getChildNodes();
+        for (int i=0; i < nl.getLength() ; i ++) {
+            Node node = nl.item(i);
+            if (node instanceof Comment)
+            {
+                // remove the comment
+                root.removeChild(node);
+            }
+            else if (node instanceof Text)
+            {
+                if (node.getTextContent().trim().length() == 0)
+                {
+                        root.removeChild(node);
+                }
+            }
+            else if (node instanceof Element)
+            {
+                // clean child
+                removeComments(node);
+            } // else do nothing
+        }
+    }
+
+    private AbstractStructuredType instanciateStructured(TypeMapping tm, Types type, String name,
+            String structuredNamespace) throws XmpParsingException
+    {
+        try
+        {
+            if (type.isStructured())
+            {
+                return tm.instanciateStructuredType(type, name);
+            }
+            else if (type.isDefined())
+            {
+                return tm.instanciateDefinedType(name, structuredNamespace);
+            }
+            else
+            {
+                throw new XmpParsingException(ErrorType.InvalidType, "Type not structured : " + type);
+            }
+        }
+        catch (BadFieldValueException e)
+        {
+            throw new XmpParsingException(ErrorType.InvalidType, "Parsing failed", e);
+        }
+    }
+
+    private PropertyType checkPropertyDefinition(XMPMetadata xmp, QName prop) throws XmpParsingException
+    {
+        TypeMapping tm = xmp.getTypeMapping();
+        // test if namespace is set in xml
+        if (!nsFinder.containsNamespace(prop.getNamespaceURI()))
+        {
+            throw new XmpParsingException(ErrorType.NoSchema, "Schema is not set in this document : "
+                    + prop.getNamespaceURI());
+        }
+        // test if namespace is defined
+        String nsuri = prop.getNamespaceURI();
+        if (!tm.isDefinedNamespace(nsuri))
+        {
+            throw new XmpParsingException(ErrorType.NoSchema, "Cannot find a definition for the namespace "
+                    + prop.getNamespaceURI());
+        }
+        try
+        {
+            return tm.getSpecifiedPropertyType(prop);
+        }
+        catch (BadFieldValueException e)
+        {
+            throw new XmpParsingException(ErrorType.InvalidType, "Failed to retreive property definition", e);
+        }
+    }
+
+    protected class NamespaceFinder
+    {
+
+        private Stack<Map<String, String>> stack = new Stack<Map<String, String>>();
+
+        protected void push(Element description)
+        {
+            NamedNodeMap nnm = description.getAttributes();
+            Map<String, String> map = new HashMap<String, String>(nnm.getLength());
+            for (int j = 0; j < nnm.getLength(); j++)
+            {
+                Attr no = (Attr) nnm.item(j);
+                // if ns definition add it
+                if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(no.getNamespaceURI()))
+                {
+                    map.put(no.getLocalName(), no.getValue());
+                }
+            }
+            stack.push(map);
+        }
+
+        protected Map<String, String> pop()
+        {
+            return stack.pop();
+        }
+
+        protected boolean containsNamespace(String namespace)
+        {
+            for (int i = stack.size() - 1; i >= 0; i--)
+            {
+                Map<String, String> map = stack.get(i);
+                if (map.containsValue(namespace))
+                {
+                    return true;
+                }
+            }
+            // else namespace not found
+            return false;
+        }
+
+    }
+
+}



Mime
View raw message