cxf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From serg...@apache.org
Subject cxf git commit: Updating TikaContentExtractor to avoid auto-detecting for single Parser by default, making TikaContent serializable
Date Thu, 08 Sep 2016 12:26:03 GMT
Repository: cxf
Updated Branches:
  refs/heads/master e9a8fb39b -> d68d8d87e


Updating TikaContentExtractor to avoid auto-detecting for single Parser by default, making
TikaContent serializable


Project: http://git-wip-us.apache.org/repos/asf/cxf/repo
Commit: http://git-wip-us.apache.org/repos/asf/cxf/commit/d68d8d87
Tree: http://git-wip-us.apache.org/repos/asf/cxf/tree/d68d8d87
Diff: http://git-wip-us.apache.org/repos/asf/cxf/diff/d68d8d87

Branch: refs/heads/master
Commit: d68d8d87e6ca3c82456ce28c0275be4d693b8932
Parents: e9a8fb3
Author: Sergey Beryozkin <sberyozkin@gmail.com>
Authored: Thu Sep 8 13:25:42 2016 +0100
Committer: Sergey Beryozkin <sberyozkin@gmail.com>
Committed: Thu Sep 8 13:25:42 2016 +0100

----------------------------------------------------------------------
 .../ext/search/tika/TikaContentExtractor.java    | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cxf/blob/d68d8d87/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
----------------------------------------------------------------------
diff --git a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
index b46d7ce..1d2d30a 100644
--- a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
+++ b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java
@@ -20,6 +20,7 @@ package org.apache.cxf.jaxrs.ext.search.tika;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.Serializable;
 import java.util.Collections;
 import java.util.List;
 import java.util.logging.Level;
@@ -50,7 +51,7 @@ public class TikaContentExtractor {
      * @param parser parser instance
      */
     public TikaContentExtractor(final Parser parser) {
-        this(parser, true);
+        this(parser, false);
     }
     
     /**
@@ -157,7 +158,7 @@ public class TikaContentExtractor {
             MediaType mediaType = null;
             Parser parser = null;
             for (Parser p : parsers) {
-                if (detector != null) {
+                if (detector != null && in.markSupported()) {
                     mediaType = detector.detect(in, metadata);
                     if (mediaType != null && p.getSupportedTypes(context).contains(mediaType))
{
                         parser = p;
@@ -165,6 +166,7 @@ public class TikaContentExtractor {
                     }
                 } else {
                     parser = p;
+                    break;
                 }
             }
             if (parser == null) {
@@ -186,7 +188,7 @@ public class TikaContentExtractor {
                     throw ex;
                 }
             }
-            return new TikaContent(handler, metadata, mediaType);
+            return new TikaContent(handler == null ? null : handler.toString(), metadata,
mediaType);
         } catch (final IOException ex) {
             LOG.log(Level.WARNING, "Unable to extract media type from input stream", ex);
         } catch (final SAXException ex) {
@@ -206,12 +208,13 @@ public class TikaContentExtractor {
     /**
      * Extracted content, metadata and media type container
      */
-    public static class TikaContent {
-        private ContentHandler contentHandler;
+    public static class TikaContent implements Serializable {
+        private static final long serialVersionUID = -1240120543378490963L;
+        private String content;
         private Metadata metadata;
         private MediaType mediaType;
-        public TikaContent(ContentHandler contentHandler, Metadata metadata, MediaType mediaType)
{
-            this.contentHandler = contentHandler;
+        public TikaContent(String content, Metadata metadata, MediaType mediaType) {
+            this.content = content;
             this.metadata = metadata;
             this.mediaType = mediaType;
         }
@@ -221,7 +224,7 @@ public class TikaContentExtractor {
          *         to parse the content  
          */
         public String getContent() {
-            return contentHandler == null ? null : contentHandler.toString();
+            return content;
         }
         /**
          * Return the metadata


Mime
View raw message