tika-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ju...@apache.org
Subject svn commit: r1211027 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java
Date Tue, 06 Dec 2011 17:06:33 GMT
Author: jukka
Date: Tue Dec  6 17:06:32 2011
New Revision: 1211027

URL: http://svn.apache.org/viewvc?rev=1211027&view=rev
Log:
TIKA-800: mark/reset not supported from POIFSContainerDetector

Since we don't close() the TikaInputStream, we need to explicitly manage any temporary resources
associated with it.

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java?rev=1211027&r1=1211026&r2=1211027&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java
(original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java
Tue Dec  6 17:06:32 2011
@@ -33,6 +33,7 @@ import org.apache.tika.exception.TikaExc
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
 import org.apache.tika.io.CloseShieldInputStream;
+import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
@@ -150,9 +151,10 @@ class PackageExtractor {
      * @param xhtml content handler
      * @throws IOException if an IO error occurs
      * @throws SAXException if a SAX error occurs
+     * @throws TikaException if another error occurs
      */
     public void unpack(ArchiveInputStream archive, XHTMLContentHandler xhtml)
-            throws IOException, SAXException {
+            throws IOException, SAXException, TikaException {
         try {
             ArchiveEntry entry = archive.getNextEntry();
             while (entry != null) {
@@ -167,8 +169,13 @@ class PackageExtractor {
                         if (extractor.shouldParseEmbedded(entrydata)) {
                             // For detectors to work, we need a mark/reset supporting
                             //  InputStream, which ArchiveInputStream isn't, so wrap
-                            TikaInputStream stream = TikaInputStream.get(archive);
-                            extractor.parseEmbedded(stream, xhtml, entrydata, true);
+                            TemporaryResources tmp = new TemporaryResources();
+                            try {
+                                TikaInputStream stream = TikaInputStream.get(archive, tmp);
+                                extractor.parseEmbedded(stream, xhtml, entrydata, true);
+                            } finally {
+                                tmp.dispose();
+                            }
                         }
                     } else if (name != null && name.length() > 0) {
                         xhtml.element("p", name);



Mime
View raw message