tika-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From n...@apache.org
Subject svn commit: r1210736 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java
Date Tue, 06 Dec 2011 01:13:23 GMT
Author: nick
Date: Tue Dec  6 01:13:23 2011
New Revision: 1210736

URL: http://svn.apache.org/viewvc?rev=1210736&view=rev
Log:
TIKA-800 Wrap the ArchiveInputStream in PackageExtractor so that it can be used with Detectors

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java?rev=1210736&r1=1210735&r2=1210736&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java
(original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageExtractor.java
Tue Dec  6 01:13:23 2011
@@ -33,6 +33,7 @@ import org.apache.tika.exception.TikaExc
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
 import org.apache.tika.io.CloseShieldInputStream;
+import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.sax.XHTMLContentHandler;
@@ -164,7 +165,10 @@ class PackageExtractor {
                             entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
                         }
                         if (extractor.shouldParseEmbedded(entrydata)) {
-                            extractor.parseEmbedded(archive, xhtml, entrydata, true);
+                            // For detectors to work, we need a mark/reset supporting
+                            //  InputStream, which ArchiveInputStream isn't, so wrap
+                            TikaInputStream stream = TikaInputStream.get(archive);
+                            extractor.parseEmbedded(stream, xhtml, entrydata, true);
                         }
                     } else if (name != null && name.length() > 0) {
                         xhtml.element("p", name);



Mime
View raw message