cxf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From serg...@apache.org
Subject cxf git commit: Adding ContentHandler back to TikaContent
Date Thu, 10 Nov 2016 14:18:00 GMT
Repository: cxf
Updated Branches:
  refs/heads/3.1.x-fixes 1ac741ef5 -> fbe8db21d


Adding ContentHandler back to TikaContent


Project: http://git-wip-us.apache.org/repos/asf/cxf/repo
Commit: http://git-wip-us.apache.org/repos/asf/cxf/commit/fbe8db21
Tree: http://git-wip-us.apache.org/repos/asf/cxf/tree/fbe8db21
Diff: http://git-wip-us.apache.org/repos/asf/cxf/diff/fbe8db21

Branch: refs/heads/3.1.x-fixes
Commit: fbe8db21d300177b37d5f9b0aa3a4e1c99bde857
Parents: 1ac741e
Author: Sergey Beryozkin <sberyozkin@gmail.com>
Authored: Thu Nov 10 14:15:57 2016 +0000
Committer: Sergey Beryozkin <sberyozkin@gmail.com>
Committed: Thu Nov 10 14:17:44 2016 +0000

----------------------------------------------------------------------
 .../jaxrs/ext/search/tika/TikaContentExtractor.java    | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cxf/blob/fbe8db21/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
----------------------------------------------------------------------
diff --git a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
index e4d1918..d69da2d 100644
--- a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
+++ b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
@@ -162,7 +162,7 @@ public class TikaContentExtractor {
      * @return the extracted content and metadata or null if extraction is not possible 
      *         or was unsuccessful
      */
-    public TikaContent extract(final InputStream in, final ContentHandler handler, 
+    public TikaContent extract(final InputStream in, ContentHandler handler, 
                                javax.ws.rs.core.MediaType mtHint, ParseContext context) {
   
         if (in == null) {
             return null;
@@ -215,12 +215,13 @@ public class TikaContentExtractor {
                 // extraction process. If we get an exception with a null handler then a
given parser is still 
                 // not ready to accept null handlers so lets retry with IgnoreContentHandler.
                 if (handler == null) {
-                    parser.parse(in, new IgnoreContentHandler(), metadata, context);
+                    handler = new IgnoreContentHandler();
+                    parser.parse(in, handler, metadata, context);
                 } else {
                     throw ex;
                 }
             }
-            return new TikaContent(handler == null ? null : handler.toString(), metadata,
mediaType);
+            return new TikaContent(handler, metadata, mediaType);
         } catch (final IOException ex) {
             LOG.log(Level.WARNING, "Unable to extract media type from input stream", ex);
         } catch (final SAXException ex) {
@@ -269,10 +270,10 @@ public class TikaContentExtractor {
      */
     public static class TikaContent implements Serializable {
         private static final long serialVersionUID = -1240120543378490963L;
-        private String content;
+        private ContentHandler content;
         private Metadata metadata;
         private MediaType mediaType;
-        public TikaContent(String content, Metadata metadata, MediaType mediaType) {
+        public TikaContent(ContentHandler content, Metadata metadata, MediaType mediaType)
{
             this.content = content;
             this.metadata = metadata;
             this.mediaType = mediaType;
@@ -283,7 +284,7 @@ public class TikaContentExtractor {
          *         to parse the content  
          */
         public String getContent() {
-            return content;
+            return content instanceof ToTextContentHandler ? content.toString() : null;
         }
         /**
          * Return the metadata


Mime
View raw message