jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ju...@apache.org
Subject svn commit: r664549 - /jackrabbit/sandbox/jackrabbit-tika/src/main/java/org/apache/jackrabbit/tika/TikaTextExctractor.java
Date Sun, 08 Jun 2008 20:23:31 GMT
Author: jukka
Date: Sun Jun  8 13:23:31 2008
New Revision: 664549

URL: http://svn.apache.org/viewvc?rev=664549&view=rev
Log:
jackrabbit-tika: Use the recent ParsingReader from Tika trunk

Modified:
    jackrabbit/sandbox/jackrabbit-tika/src/main/java/org/apache/jackrabbit/tika/TikaTextExctractor.java

Modified: jackrabbit/sandbox/jackrabbit-tika/src/main/java/org/apache/jackrabbit/tika/TikaTextExctractor.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/jackrabbit-tika/src/main/java/org/apache/jackrabbit/tika/TikaTextExctractor.java?rev=664549&r1=664548&r2=664549&view=diff
==============================================================================
--- jackrabbit/sandbox/jackrabbit-tika/src/main/java/org/apache/jackrabbit/tika/TikaTextExctractor.java
(original)
+++ jackrabbit/sandbox/jackrabbit-tika/src/main/java/org/apache/jackrabbit/tika/TikaTextExctractor.java
Sun Jun  8 13:23:31 2008
@@ -19,16 +19,12 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
-import java.io.StringReader;
 import java.util.Set;
 
 import org.apache.jackrabbit.extractor.TextExtractor;
-import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
+import org.apache.tika.parser.ParsingReader;
 
 public class TikaTextExctractor implements TextExtractor {
 
@@ -41,20 +37,11 @@
 
     public Reader extractText(InputStream stream, String type, String encoding)
             throws IOException {
-        try {
-            ContentHandler handler = new BodyContentHandler();
-            Metadata metadata = new Metadata();
-            if (type != null && type.trim().length() > 0) {
-                metadata.set(Metadata.CONTENT_TYPE, type.trim());
-            }
-            parser.parse(stream, handler, metadata);
-            return new StringReader(handler.toString());
-        } catch (SAXException e) {
-            // Should never happen
-            return new StringReader("");
-        } catch (TikaException e) {
-            return new StringReader("");
+        Metadata metadata = new Metadata();
+        if (type != null && type.trim().length() > 0) {
+            metadata.set(Metadata.CONTENT_TYPE, type.trim());
         }
+        return new ParsingReader(parser, stream, metadata);
     }
 
 }



Mime
View raw message