tika-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ju...@apache.org
Subject svn commit: r638656 - in /incubator/tika/trunk: CHANGES.txt src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
Date Wed, 19 Mar 2008 00:17:25 GMT
Author: jukka
Date: Tue Mar 18 17:17:25 2008
New Revision: 638656

URL: http://svn.apache.org/viewvc?rev=638656&view=rev
Log:
TIKA-131: Lazy XHTML prefix generation

Modified:
    incubator/tika/trunk/CHANGES.txt
    incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java

Modified: incubator/tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=638656&r1=638655&r2=638656&view=diff
==============================================================================
--- incubator/tika/trunk/CHANGES.txt (original)
+++ incubator/tika/trunk/CHANGES.txt Tue Mar 18 17:17:25 2008
@@ -29,6 +29,9 @@
 12. TIKA-130 - self-or-descendant axis does not match self in streaming XPath
                (Jukka Zitting)
 
+13. TIKA-131 - Lazy XHTML prefix generation (Jukka Zitting)
+
+
 Release 0.1-incubating - 12/27/2007
 
 1. TIKA-5 - Port Metadata Framework from Nutch (mattmann)

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java?rev=638656&r1=638655&r2=638656&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java Tue Mar
18 17:17:25 2008
@@ -17,6 +17,7 @@
 package org.apache.tika.sax;
 
 import org.apache.tika.metadata.Metadata;
+import org.xml.sax.Attributes;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.AttributesImpl;
@@ -38,14 +39,29 @@
      */
     private final Metadata metadata;
 
+    /**
+     * Flag to indicate whether the document element has been started.
+     */
+    private boolean started = false;
+
     public XHTMLContentHandler(ContentHandler handler, Metadata metadata) {
         super(handler);
         this.metadata = metadata;
     }
 
     /**
-     * Starts an XHTML document by setting up the namespace mappings and
-     * writing following header:
+     * Starts an XHTML document by setting up the namespace mappings.
+     * The standard XHTML prefix is generated lazily when the first
+     * element is started.
+     */
+    @Override
+    public void startDocument() throws SAXException {
+        super.startDocument();
+        startPrefixMapping("", XHTML);
+    }
+
+    /**
+     * Generates the following XHTML prefix when called for the first time:
      * <pre>
      * &lt;html&gt;
      *   &lt;head&gt;
@@ -54,19 +70,20 @@
      *   &lt;body&gt;
      * </pre>
      */
-    public void startDocument() throws SAXException {
-        super.startDocument();
-        startPrefixMapping("", XHTML);
-        startElement("html");
-        startElement("head");
-        startElement("title");
-        String title = metadata.get(Metadata.TITLE);
-        if (title != null && title.length() > 0) {
-            characters(title);
+    private void lazyStartDocument() throws SAXException {
+        if (!started) {
+            started = true;
+            startElement("html");
+            startElement("head");
+            startElement("title");
+            String title = metadata.get(Metadata.TITLE);
+            if (title != null && title.length() > 0) {
+                characters(title);
+            }
+            endElement("title");
+            endElement("head");
+            startElement("body");
         }
-        endElement("title");
-        endElement("head");
-        startElement("body");
     }
 
     /**
@@ -77,11 +94,21 @@
      * &lt;/html&gt;
      * </pre>
      */
+    @Override
     public void endDocument() throws SAXException {
+        lazyStartDocument();
         endElement("body");
         endElement("html");
         endPrefixMapping("");
         super.endDocument();
+    }
+
+    @Override
+    public void startElement(
+            String uri, String local, String name, Attributes attributes)
+            throws SAXException {
+        lazyStartDocument();
+        super.startElement(uri, local, name, attributes);
     }
 
     public void startElement(String name) throws SAXException {



Mime
View raw message