Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id DC73E200BBB for ; Thu, 10 Nov 2016 15:16:26 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id DAD6F160B01; Thu, 10 Nov 2016 14:16:26 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 2D983160AF6 for ; Thu, 10 Nov 2016 15:16:26 +0100 (CET) Received: (qmail 53417 invoked by uid 500); 10 Nov 2016 14:16:25 -0000 Mailing-List: contact commits-help@cxf.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@cxf.apache.org Delivered-To: mailing list commits@cxf.apache.org Received: (qmail 53408 invoked by uid 99); 10 Nov 2016 14:16:25 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 10 Nov 2016 14:16:25 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id EC28AE00E5; Thu, 10 Nov 2016 14:16:24 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sergeyb@apache.org To: commits@cxf.apache.org Message-Id: <3ac899219ced4f4699c9382ef9e0a202@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: cxf git commit: Adding ContentHandler back to TikaContent Date: Thu, 10 Nov 2016 14:16:24 +0000 (UTC) archived-at: Thu, 10 Nov 2016 14:16:27 -0000 Repository: cxf Updated Branches: refs/heads/master 20ec9b68d -> d94cb4384 Adding ContentHandler back to TikaContent Project: http://git-wip-us.apache.org/repos/asf/cxf/repo Commit: http://git-wip-us.apache.org/repos/asf/cxf/commit/d94cb438 Tree: http://git-wip-us.apache.org/repos/asf/cxf/tree/d94cb438 Diff: http://git-wip-us.apache.org/repos/asf/cxf/diff/d94cb438 Branch: refs/heads/master Commit: d94cb4384654108428de8ff8c4a7cbd17c772936 Parents: 20ec9b6 Author: Sergey Beryozkin Authored: Thu Nov 10 14:15:57 2016 +0000 Committer: Sergey Beryozkin Committed: Thu Nov 10 14:15:57 2016 +0000 ---------------------------------------------------------------------- .../jaxrs/ext/search/tika/TikaContentExtractor.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cxf/blob/d94cb438/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java ---------------------------------------------------------------------- diff --git a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java index e4d1918..d69da2d 100644 --- a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java +++ b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java @@ -162,7 +162,7 @@ public class TikaContentExtractor { * @return the extracted content and metadata or null if extraction is not possible * or was unsuccessful */ - public TikaContent extract(final InputStream in, final ContentHandler handler, + public TikaContent extract(final InputStream in, ContentHandler handler, javax.ws.rs.core.MediaType mtHint, ParseContext context) { if (in == null) { return null; @@ -215,12 +215,13 @@ public class TikaContentExtractor { // extraction process. If we get an exception with a null handler then a given parser is still // not ready to accept null handlers so lets retry with IgnoreContentHandler. if (handler == null) { - parser.parse(in, new IgnoreContentHandler(), metadata, context); + handler = new IgnoreContentHandler(); + parser.parse(in, handler, metadata, context); } else { throw ex; } } - return new TikaContent(handler == null ? null : handler.toString(), metadata, mediaType); + return new TikaContent(handler, metadata, mediaType); } catch (final IOException ex) { LOG.log(Level.WARNING, "Unable to extract media type from input stream", ex); } catch (final SAXException ex) { @@ -269,10 +270,10 @@ public class TikaContentExtractor { */ public static class TikaContent implements Serializable { private static final long serialVersionUID = -1240120543378490963L; - private String content; + private ContentHandler content; private Metadata metadata; private MediaType mediaType; - public TikaContent(String content, Metadata metadata, MediaType mediaType) { + public TikaContent(ContentHandler content, Metadata metadata, MediaType mediaType) { this.content = content; this.metadata = metadata; this.mediaType = mediaType; @@ -283,7 +284,7 @@ public class TikaContentExtractor { * to parse the content */ public String getContent() { - return content; + return content instanceof ToTextContentHandler ? content.toString() : null; } /** * Return the metadata