camel-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From davscl...@apache.org
Subject [2/6] camel git commit: CAMEL-10740 - Initial work for camel-tika component.
Date Sun, 29 Jan 2017 16:54:32 GMT
CAMEL-10740 - Initial work for camel-tika component.

Project: http://git-wip-us.apache.org/repos/asf/camel/repo
Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/17c83bab
Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/17c83bab
Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/17c83bab

Branch: refs/heads/master
Commit: 17c83bab629b8c949919c8e1bbff6ba81c34fbba
Parents: 9c44f6a
Author: Bob Paulin <bob@bobpaulin.com>
Authored: Tue Jan 24 17:42:10 2017 -0600
Committer: Claus Ibsen <davsclaus@apache.org>
Committed: Sun Jan 29 17:06:27 2017 +0100

----------------------------------------------------------------------
 apache-camel/pom.xml                            |   4 +
 .../src/main/descriptors/common-bin.xml         |   1 +
 components/camel-tika/pom.xml                   |  94 +++++++++
 .../src/main/docs/tika-component.adoc           |  96 +++++++++
 .../camel/component/tika/TikaComponent.java     |  45 ++++
 .../camel/component/tika/TikaConfiguration.java | 100 +++++++++
 .../camel/component/tika/TikaEndpoint.java      |  61 ++++++
 .../camel/component/tika/TikaOperation.java     |  21 ++
 .../component/tika/TikaParseOutputFormat.java   |  21 ++
 .../camel/component/tika/TikaProducer.java      | 168 +++++++++++++++
 .../src/main/resources/META-INF/LICENSE.txt     | 203 +++++++++++++++++++
 .../src/main/resources/META-INF/NOTICE.txt      |  11 +
 .../services/org/apache/camel/component/tika    |  17 ++
 .../camel/component/tika/TikaDetectTest.java    |  85 ++++++++
 .../camel/component/tika/TikaEmptyConfig.java   |  33 +++
 .../camel/component/tika/TikaParseTest.java     | 141 +++++++++++++
 .../camel-tika/src/test/resources/test.doc      | Bin 0 -> 9216 bytes
 .../camel-tika/src/test/resources/testGIF.gif   | Bin 0 -> 8495 bytes
 .../src/test/resources/tika-empty.xml           |  22 ++
 components/pom.xml                              |   1 +
 components/readme.adoc                          |   3 +
 docs/user-manual/en/SUMMARY.md                  |   1 +
 parent/pom.xml                                  |   6 +
 .../features/src/main/resources/features.xml    |   6 +
 24 files changed, 1140 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/apache-camel/pom.xml
----------------------------------------------------------------------
diff --git a/apache-camel/pom.xml b/apache-camel/pom.xml
index eb80a32..74cfca9 100644
--- a/apache-camel/pom.xml
+++ b/apache-camel/pom.xml
@@ -966,6 +966,10 @@
     </dependency>
     <dependency>
       <groupId>org.apache.camel</groupId>
+      <artifactId>camel-tika</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.camel</groupId>
       <artifactId>camel-twitter</artifactId>
     </dependency>
     <dependency>

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/apache-camel/src/main/descriptors/common-bin.xml
----------------------------------------------------------------------
diff --git a/apache-camel/src/main/descriptors/common-bin.xml b/apache-camel/src/main/descriptors/common-bin.xml
index f487e42..534cc39 100644
--- a/apache-camel/src/main/descriptors/common-bin.xml
+++ b/apache-camel/src/main/descriptors/common-bin.xml
@@ -250,6 +250,7 @@
         <include>org.apache.camel:camel-test-karaf</include>
         <include>org.apache.camel:camel-test-spring</include>
         <include>org.apache.camel:camel-testng</include>
+        <include>org.apache.camel:camel-tika</include>
         <include>org.apache.camel:camel-twitter</include>
         <include>org.apache.camel:camel-undertow</include>
         <include>org.apache.camel:camel-univocity-parsers</include>

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/pom.xml
----------------------------------------------------------------------
diff --git a/components/camel-tika/pom.xml b/components/camel-tika/pom.xml
new file mode 100644
index 0000000..86f0131
--- /dev/null
+++ b/components/camel-tika/pom.xml
@@ -0,0 +1,94 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.apache.camel</groupId>
+        <artifactId>components</artifactId>
+        <version>2.19.0-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>camel-tika</artifactId>
+    <packaging>jar</packaging>
+    <name>Camel :: Tika</name>
+    <description>This component integrates with Apache Tika to extract content and metadata from thousands of file types.</description>
+
+    <properties>
+        <camel.osgi.export.pkg>org.apache.camel.component.tika.*</camel.osgi.export.pkg>
+        <camel.osgi.export.service>org.apache.camel.spi.ComponentResolver;component=tika</camel.osgi.export.service>
+    </properties>
+
+    <dependencies>
+
+        <dependency>
+            <groupId>org.apache.camel</groupId>
+            <artifactId>camel-core</artifactId>
+        </dependency>
+        <dependency>
+		    <groupId>org.apache.tika</groupId>
+		    <artifactId>tika-core</artifactId>
+		    <version>${tika-version}</version>
+		</dependency>
+		<dependency>
+		    <groupId>org.apache.tika</groupId>
+		    <artifactId>tika-parsers</artifactId>
+		    <version>${tika-version}</version>
+		</dependency>
+        <!-- test dependencies -->
+        <dependency>
+            <groupId>org.apache.camel</groupId>
+            <artifactId>camel-test-spring</artifactId>
+            <scope>test</scope>
+        </dependency>  
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-api</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-core</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-slf4j-impl</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+        	<groupId>commons-io</groupId>
+        	<artifactId>commons-io</artifactId>
+        	<version>${commons-io-version}</version>
+        	<scope>test</scope>
+        </dependency>
+        <dependency>
+	      <groupId>org.hamcrest</groupId>
+	      <artifactId>java-hamcrest</artifactId>
+	      <version>${hamcrest-version}</version>
+	      <scope>test</scope>
+	    </dependency>
+    </dependencies>
+
+</project>

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/docs/tika-component.adoc
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/docs/tika-component.adoc b/components/camel-tika/src/main/docs/tika-component.adoc
new file mode 100644
index 0000000..7049a59
--- /dev/null
+++ b/components/camel-tika/src/main/docs/tika-component.adoc
@@ -0,0 +1,96 @@
+## Tika Component
+### TIKA
+
+**Available as of Camel 2.19.0**
+
+The *Tika*: components provides the ability to detect and parse documents with 
+Apache Tika. This component uses
+https://tika.apache.org/[Apache Tika] as underlying library to work
+with documents.
+
+In order to use the Tika component, Maven users will need to add the
+following dependency to their `pom.xml`:
+
+*pom.xml*
+
+[source,xml]
+------------------------------------------------------------
+<dependency>
+    <groupId>org.apache.camel</groupId>
+    <artifactId>camel-tika</artifactId>
+    <version>x.x.x</version>
+    <!-- use the same version as your Camel core version -->
+</dependency>
+------------------------------------------------------------
+
+### URI format
+
+The TIKA component only supports producer endpoints.
+
+[source,java]
+-----------------------
+tika:operation[?options]
+-----------------------
+
+### Options
+
+// component options: START
+The Tika component has no options.
+// component options: END
+
+
+
+// endpoint options: START
+The Tika component supports 5 endpoint options which are listed below:
+
+{% raw %}
+[width="100%",cols="2,1,1m,1m,5",options="header"]
+|=======================================================================
+| Name | Group | Default | Java Type | Description
+| operation | producer |  | TikaOperation | *Required* Tika Operation. parse or detect
+| tikaConfig | producer |  | TikaConfig | Tika Config
+| tikaConfigUri | producer |  | String | Tika Config Uri
+| tikaParseOutputFormat | producer | xml | TikaParseOutputFormat | Tika Output Format. Supported output formats are xml html text textMain
+| synchronous | advanced | false | boolean | Sets whether synchronous processing should be strictly used or Camel is allowed to use asynchronous processing (if supported).
+|=======================================================================
+{% endraw %}
+// endpoint options: END
+
+
+### Headers
+[width="100%",cols="10%,90%",options="header",]
+|=======================================================================
+|Header |Description
+|TikaXXXX | Any Tika Metadata Header is converted to a Camel Header with Prefix Tika
+|=======================================================================
+
+### To Detect a file's MIME Type
+
+The file should be placed in the Body.
+
+[source,java]
+-------------------------------
+from("direct:start")
+        .to("tika:detect");
+-------------------------------
+
+### To Parse a File
+
+The file should be placed in the Body.
+
+[source,java]
+-------------------------------
+from("direct:start")
+        .to("tika:parse");
+-------------------------------
+
+### See Also
+
+* link:configuring-camel.html[Configuring Camel]
+* link:component.html[Component]
+* link:endpoint.html[Endpoint]
+* link:getting-started.html[Getting Started]
+
+-
+
+-

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaComponent.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaComponent.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaComponent.java
new file mode 100644
index 0000000..cb81e3c
--- /dev/null
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaComponent.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+import java.net.URI;
+import java.util.Map;
+
+import org.apache.camel.Endpoint;
+import org.apache.camel.impl.UriEndpointComponent;
+import org.apache.tika.config.TikaConfig;
+
+public class TikaComponent extends UriEndpointComponent {
+
+    private static final String TIKA_CONFIG = "tikaConfig";
+
+    public TikaComponent() {
+        super(TikaEndpoint.class);
+    }
+
+    @Override
+    protected Endpoint createEndpoint(String uri, String remaining, Map<String, Object> parameters) throws Exception {
+        TikaConfiguration tikaConfiguration = new TikaConfiguration();
+        setProperties(tikaConfiguration, parameters);
+        TikaConfig config = resolveAndRemoveReferenceParameter(parameters, TIKA_CONFIG, TikaConfig.class);
+        if (config != null) {
+            tikaConfiguration.setTikaConfig(config);
+        }
+        tikaConfiguration.setOperation(new URI(uri).getHost());
+        return new TikaEndpoint(uri, this, tikaConfiguration);
+    }
+}

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java
new file mode 100644
index 0000000..051ad2a
--- /dev/null
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+import java.io.IOException;
+
+import org.xml.sax.SAXException;
+
+import org.apache.camel.spi.Metadata;
+import org.apache.camel.spi.UriParam;
+import org.apache.camel.spi.UriParams;
+import org.apache.camel.spi.UriPath;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
+
+
+@UriParams
+public class TikaConfiguration {
+
+    @UriPath(description = "Operation type")
+    @Metadata(required = "true")
+    private TikaOperation operation;
+    @UriParam(defaultValue = "xml")
+    private TikaParseOutputFormat tikaParseOutputFormat = TikaParseOutputFormat.xml;
+    @UriParam(description = "Tika Config")
+    private TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
+    @UriParam(description = "Tika Config Url")
+    private String tikaConfigUri;
+
+    public TikaOperation getOperation() {
+        return operation;
+    }
+
+    /**
+     * 
+     * Tika Operation.  parse or detect
+     * 
+     */
+    public void setOperation(TikaOperation operation) {
+        this.operation = operation;
+    }
+
+    public void setOperation(String operation) {
+        this.operation = TikaOperation.valueOf(operation);
+    }
+
+    public TikaParseOutputFormat getTikaParseOutputFormat() {
+        return tikaParseOutputFormat;
+    }
+
+    /**
+     * 
+     * Tika Output Format. Supported output formats are xml, html, text, textMain
+     * 
+     */
+    public void setTikaParseOutputFormat(TikaParseOutputFormat tikaParseOutputFormat) {
+        this.tikaParseOutputFormat = tikaParseOutputFormat;
+    }
+
+    public TikaConfig getTikaConfig() {
+        return tikaConfig;
+    }
+
+    /**
+     * 
+     * Tika Config
+     * 
+     */
+    public void setTikaConfig(TikaConfig tikaConfig) {
+        this.tikaConfig = tikaConfig;
+    }
+
+    public String getTikaConfigUri() {
+        return tikaConfigUri;
+    }
+
+    /**
+     * 
+     * Tika Config Uri
+     * 
+     */
+    public void setTikaConfigUri(String tikaConfigUri) throws TikaException, IOException, SAXException {
+        this.tikaConfigUri = tikaConfigUri;
+        this.tikaConfig = new TikaConfig(tikaConfigUri);
+    }
+}

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java
new file mode 100644
index 0000000..cb8fbdd
--- /dev/null
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+import org.apache.camel.Component;
+import org.apache.camel.Consumer;
+import org.apache.camel.Processor;
+import org.apache.camel.Producer;
+import org.apache.camel.impl.DefaultEndpoint;
+import org.apache.camel.spi.UriEndpoint;
+import org.apache.camel.spi.UriParam;
+
+@UriEndpoint(scheme = "tika", title = "Tika", syntax = "tika:operation", producerOnly = true, label = "tika")
+public class TikaEndpoint extends DefaultEndpoint {
+
+    @UriParam
+    private TikaConfiguration tikaConfiguration;
+
+    public TikaEndpoint(String endpointUri, Component component, TikaConfiguration tikaConfiguration) {
+        super(endpointUri, component);
+        this.tikaConfiguration = tikaConfiguration;
+    }
+
+    @Override
+    public Producer createProducer() throws Exception {
+        return new TikaProducer(this);
+    }
+
+    @Override
+    public Consumer createConsumer(Processor processor) throws Exception {
+        throw new UnsupportedOperationException("Consumer does not supported for Tika component:" + getEndpointUri());
+    }
+
+    @Override
+    public boolean isSingleton() {
+        return true;
+    }
+
+    public TikaConfiguration getTikaConfiguration() {
+        return tikaConfiguration;
+    }
+
+    public void setTikaConfiguration(TikaConfiguration tikaConfiguration) {
+        this.tikaConfiguration = tikaConfiguration;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaOperation.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaOperation.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaOperation.java
new file mode 100644
index 0000000..912387d
--- /dev/null
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaOperation.java
@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+public enum TikaOperation {
+    parse, detect;
+}

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
new file mode 100644
index 0000000..82005be
--- /dev/null
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+public enum TikaParseOutputFormat {
+    xml, html, text, textMain;
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
new file mode 100644
index 0000000..1e0d9ca
--- /dev/null
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.Locale;
+
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.sax.SAXTransformerFactory;
+import javax.xml.transform.sax.TransformerHandler;
+import javax.xml.transform.stream.StreamResult;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import org.apache.camel.Exchange;
+import org.apache.camel.impl.DefaultProducer;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.html.BoilerpipeContentHandler;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.ExpandedTitleContentHandler;
+
+public class TikaProducer extends DefaultProducer {
+
+    private final TikaConfiguration tikaConfiguration;
+
+    private final Parser parser;
+
+    private final Detector detector;
+
+    public TikaProducer(TikaEndpoint endpoint) {
+        super(endpoint);
+        this.tikaConfiguration = endpoint.getTikaConfiguration();
+        TikaConfig config = this.tikaConfiguration.getTikaConfig();
+        this.parser = new AutoDetectParser(config);
+        this.detector = config.getDetector();
+    }
+
+    @Override
+    public void process(Exchange exchange) throws Exception {
+        TikaOperation operation = this.tikaConfiguration.getOperation();
+        Object result;
+        switch (operation) {
+        case detect:
+            result = doDetect(exchange);
+            break;
+        case parse:
+            result = doParse(exchange);
+            break;
+        default:
+            throw new IllegalArgumentException(String.format("Unknown operation %s", tikaConfiguration.getOperation()));
+        }
+        // propagate headers
+        exchange.getOut().setHeaders(exchange.getIn().getHeaders());
+        exchange.getOut().setAttachments(exchange.getIn().getAttachments());
+        // and set result
+        exchange.getOut().setBody(result);
+    }
+
+    private Object doDetect(Exchange exchange) throws IOException {
+        InputStream inputStream = exchange.getIn().getBody(InputStream.class);
+        Metadata metadata = new Metadata();
+        MediaType result = this.detector.detect(inputStream, metadata);
+        convertMetadataToHeaders(metadata, exchange);
+        return result.toString();
+    }
+
+    private Object doParse(Exchange exchange)
+            throws TikaException, IOException, SAXException, TransformerConfigurationException {
+        InputStream inputStream = exchange.getIn().getBody(InputStream.class);
+        OutputStream result = new ByteArrayOutputStream();
+        ContentHandler contentHandler = getContentHandler(this.tikaConfiguration, result);
+        ParseContext context = new ParseContext();
+        context.set(Parser.class, this.parser);
+        Metadata metadata = new Metadata();
+        this.parser.parse(inputStream, contentHandler, metadata, context);
+        convertMetadataToHeaders(metadata, exchange);
+        return result;
+    }
+
+    private void convertMetadataToHeaders(Metadata metadata, Exchange exchange) {
+        if (metadata != null) {
+            for (String metaname : metadata.names()) {
+                exchange.getIn().setHeader("Tika" + metaname, metadata.get(metaname));
+            }
+        }
+    }
+
+    private ContentHandler getContentHandler(TikaConfiguration configuration, OutputStream outputStream)
+            throws TransformerConfigurationException, UnsupportedEncodingException {
+
+        ContentHandler result = null;
+
+        TikaParseOutputFormat outputFormat = configuration.getTikaParseOutputFormat();
+        String encoding = Charset.defaultCharset().name();
+        switch (outputFormat) {
+        case xml:
+            result = getTransformerHandler(outputStream, "xml", encoding, true);
+            break;
+        case text:
+            result = new BodyContentHandler(outputStream);
+            break;
+        case textMain:
+            result = new BoilerpipeContentHandler(getOutputWriter(outputStream, encoding));
+            break;
+        case html:
+            result = new ExpandedTitleContentHandler(getTransformerHandler(outputStream, "html", encoding, true));
+            break;
+        default:
+            throw new IllegalArgumentException(
+                    String.format("Unknown format %s", tikaConfiguration.getTikaParseOutputFormat()));
+        }
+        return result;
+    }
+
+    private TransformerHandler getTransformerHandler(OutputStream output, String method, String encoding,
+            boolean prettyPrint) throws TransformerConfigurationException {
+        SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
+        TransformerHandler handler = factory.newTransformerHandler();
+        handler.getTransformer().setOutputProperty(OutputKeys.METHOD, method);
+        handler.getTransformer().setOutputProperty(OutputKeys.INDENT, prettyPrint ? "yes" : "no");
+        if (encoding != null) {
+            handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, encoding);
+        }
+        handler.setResult(new StreamResult(output));
+        return handler;
+    }
+
+    private Writer getOutputWriter(OutputStream output, String encoding) throws UnsupportedEncodingException {
+        if (encoding != null) {
+            return new OutputStreamWriter(output, encoding);
+        } else if (System.getProperty("os.name").toLowerCase(Locale.ROOT).startsWith("mac os x")) {
+            return new OutputStreamWriter(output, StandardCharsets.UTF_8);
+        } else {
+            return new OutputStreamWriter(output, Charset.defaultCharset());
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/resources/META-INF/LICENSE.txt
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/resources/META-INF/LICENSE.txt b/components/camel-tika/src/main/resources/META-INF/LICENSE.txt
new file mode 100644
index 0000000..43e91eb
--- /dev/null
+++ b/components/camel-tika/src/main/resources/META-INF/LICENSE.txt
@@ -0,0 +1,203 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/resources/META-INF/NOTICE.txt
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/resources/META-INF/NOTICE.txt b/components/camel-tika/src/main/resources/META-INF/NOTICE.txt
new file mode 100644
index 0000000..455e6a5
--- /dev/null
+++ b/components/camel-tika/src/main/resources/META-INF/NOTICE.txt
@@ -0,0 +1,11 @@
+   =========================================================================
+   ==  NOTICE file corresponding to the section 4 d of                    ==
+   ==  the Apache License, Version 2.0,                                   ==
+   ==  in this case for the Apache Camel distribution.                    ==
+   =========================================================================
+
+   This product includes software developed by
+   The Apache Software Foundation (http://www.apache.org/).
+
+   Please read the different LICENSE files present in the licenses directory of
+   this distribution.

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/resources/META-INF/services/org/apache/camel/component/tika
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/resources/META-INF/services/org/apache/camel/component/tika b/components/camel-tika/src/main/resources/META-INF/services/org/apache/camel/component/tika
new file mode 100644
index 0000000..1a07241
--- /dev/null
+++ b/components/camel-tika/src/main/resources/META-INF/services/org/apache/camel/component/tika
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+class=org.apache.camel.component.tika.TikaComponent

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaDetectTest.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaDetectTest.java b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaDetectTest.java
new file mode 100644
index 0000000..7ca1ed1
--- /dev/null
+++ b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaDetectTest.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+import java.io.File;
+
+import org.apache.camel.EndpointInject;
+import org.apache.camel.Exchange;
+import org.apache.camel.Predicate;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.mock.MockEndpoint;
+import org.apache.camel.test.junit4.CamelTestSupport;
+
+import org.junit.Test;
+
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.instanceOf;
+
+public class TikaDetectTest extends CamelTestSupport {
+
+    @EndpointInject(uri = "mock:result")
+    protected MockEndpoint resultEndpoint;
+
+    @Test
+    public void testDocumentDetect() throws Exception {
+        File document = new File("src/test/resources/test.doc");
+        template.sendBody("direct:start", document);
+
+        resultEndpoint.setExpectedMessageCount(1);
+
+        resultEndpoint.expectedMessagesMatches(new Predicate() {
+            @Override
+            public boolean matches(Exchange exchange) {
+                Object body = exchange.getIn().getBody(String.class);
+                assertThat(body, instanceOf(String.class));
+                assertThat((String) body, containsString("application/x-tika-msoffice"));
+                return true;
+            }
+        });
+        resultEndpoint.assertIsSatisfied();
+    }
+
+    @Test
+    public void testImageDetect() throws Exception {
+        File document = new File("src/test/resources/testGIF.gif");
+        template.sendBody("direct:start", document);
+
+        resultEndpoint.setExpectedMessageCount(1);
+
+        resultEndpoint.expectedMessagesMatches(new Predicate() {
+            @Override
+            public boolean matches(Exchange exchange) {
+                Object body = exchange.getIn().getBody(String.class);
+                assertThat(body, instanceOf(String.class));
+                assertThat((String) body, containsString("image/gif"));
+                return true;
+            }
+        });
+        resultEndpoint.assertIsSatisfied();
+    }
+
+    @Override
+    protected RouteBuilder createRouteBuilder() throws Exception {
+        return new RouteBuilder() {
+            @Override
+            public void configure() throws Exception {
+                from("direct:start").to("tika:detect").to("mock:result");
+            }
+        };
+    }
+}

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaEmptyConfig.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaEmptyConfig.java b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaEmptyConfig.java
new file mode 100644
index 0000000..c30968f
--- /dev/null
+++ b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaEmptyConfig.java
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.xml.sax.SAXException;
+
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
+
+public class TikaEmptyConfig extends TikaConfig {
+
+    public TikaEmptyConfig() throws TikaException, IOException, SAXException {
+        super(new File("src/test/resources/tika-empty.xml"));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java
new file mode 100644
index 0000000..dc6d97e
--- /dev/null
+++ b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+import java.io.File;
+import java.util.Map;
+
+import org.apache.camel.EndpointInject;
+import org.apache.camel.Exchange;
+import org.apache.camel.Predicate;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.mock.MockEndpoint;
+import org.apache.camel.impl.JndiRegistry;
+import org.apache.camel.test.junit4.CamelTestSupport;
+import org.junit.Test;
+import static org.hamcrest.Matchers.*;
+
+public class TikaParseTest extends CamelTestSupport {
+
+    @EndpointInject(uri = "mock:result")
+    protected MockEndpoint resultEndpoint;
+
+    @Test
+    public void testDocumentParse() throws Exception {
+
+        File document = new File("src/test/resources/test.doc");
+        template.sendBody("direct:start", document);
+
+        resultEndpoint.setExpectedMessageCount(1);
+
+        resultEndpoint.expectedMessagesMatches(new Predicate() {
+            @Override
+            public boolean matches(Exchange exchange) {
+                Object body = exchange.getIn().getBody(String.class);
+                Map<String, Object> headerMap = exchange.getIn().getHeaders();
+                assertThat(body, instanceOf(String.class));
+                assertThat((String) body, containsString("test"));
+                assertThat(headerMap.get("TikaContent-Type"), equalTo("application/msword"));
+                return true;
+            }
+        });
+        resultEndpoint.assertIsSatisfied();
+    }
+
+    @Test
+    public void testImageParse() throws Exception {
+        File document = new File("src/test/resources/testGIF.gif");
+        template.sendBody("direct:start", document);
+
+        resultEndpoint.setExpectedMessageCount(1);
+
+        resultEndpoint.expectedMessagesMatches(new Predicate() {
+            @Override
+            public boolean matches(Exchange exchange) {
+                Object body = exchange.getIn().getBody(String.class);
+                Map<String, Object> headerMap = exchange.getIn().getHeaders();
+                assertThat(body, instanceOf(String.class));
+                assertThat((String) body, containsString("<body/>"));
+                assertThat(headerMap.get("TikaContent-Type"), equalTo("image/gif"));
+                return true;
+            }
+        });
+        resultEndpoint.assertIsSatisfied();
+    }
+
+    @Test
+    public void testEmptyConfigDocumentParse() throws Exception {
+        File document = new File("src/test/resources/test.doc");
+        template.sendBody("direct:start3", document);
+
+        resultEndpoint.setExpectedMessageCount(1);
+
+        resultEndpoint.expectedMessagesMatches(new Predicate() {
+            @Override
+            public boolean matches(Exchange exchange) {
+                Object body = exchange.getIn().getBody(String.class);
+                Map<String, Object> headerMap = exchange.getIn().getHeaders();
+                assertThat(body, instanceOf(String.class));
+                assertThat((String) body, containsString("<body/>"));
+                assertThat(headerMap.get("TikaContent-Type"), equalTo("application/msword"));
+                return true;
+            }
+        });
+        resultEndpoint.assertIsSatisfied();
+    }
+
+    @Test
+    public void testRegistryConfigDocumentParse() throws Exception {
+        File document = new File("src/test/resources/test.doc");
+        template.sendBody("direct:start3", document);
+
+        resultEndpoint.setExpectedMessageCount(1);
+
+        resultEndpoint.expectedMessagesMatches(new Predicate() {
+            @Override
+            public boolean matches(Exchange exchange) {
+                Object body = exchange.getIn().getBody(String.class);
+                Map<String, Object> headerMap = exchange.getIn().getHeaders();
+                assertThat(body, instanceOf(String.class));
+                assertThat((String) body, containsString("<body/>"));
+                assertThat(headerMap.get("TikaContent-Type"), equalTo("application/msword"));
+                return true;
+            }
+        });
+        resultEndpoint.assertIsSatisfied();
+    }
+
+    @Override
+    protected RouteBuilder createRouteBuilder() throws Exception {
+        return new RouteBuilder() {
+            @Override
+            public void configure() throws Exception {
+                from("direct:start").to("tika:parse").to("mock:result");
+                from("direct:start2").to("tika:parse?tikaConfigUri=src/test/resources/tika-empty.xml")
+                        .to("mock:result");
+                from("direct:start3").to("tika:parse?tikaConfig=#testConfig").to("mock:result");
+            }
+        };
+    }
+
+    @Override
+    protected JndiRegistry createRegistry() throws Exception {
+        JndiRegistry reg = super.createRegistry();
+        reg.bind("testConfig", new TikaEmptyConfig());
+        return reg;
+    }
+}

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/test/resources/test.doc
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/resources/test.doc b/components/camel-tika/src/test/resources/test.doc
new file mode 100644
index 0000000..93198c8
Binary files /dev/null and b/components/camel-tika/src/test/resources/test.doc differ

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/test/resources/testGIF.gif
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/resources/testGIF.gif b/components/camel-tika/src/test/resources/testGIF.gif
new file mode 100644
index 0000000..e09e641
Binary files /dev/null and b/components/camel-tika/src/test/resources/testGIF.gif differ

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/test/resources/tika-empty.xml
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/resources/tika-empty.xml b/components/camel-tika/src/test/resources/tika-empty.xml
new file mode 100644
index 0000000..56504c4
--- /dev/null
+++ b/components/camel-tika/src/test/resources/tika-empty.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <parsers>
+    <parser class="org.apache.tika.parser.EmptyParser"/>
+  </parsers>
+</properties>

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/pom.xml
----------------------------------------------------------------------
diff --git a/components/pom.xml b/components/pom.xml
index 4276240..90b092e 100644
--- a/components/pom.xml
+++ b/components/pom.xml
@@ -265,6 +265,7 @@
     <module>camel-tagsoup</module>
     <module>camel-tarfile</module>
     <module>camel-telegram</module>
+    <module>camel-tika</module>
     <module>camel-twitter</module>
     <module>camel-undertow</module>
     <module>camel-univocity-parsers</module>

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/readme.adoc
----------------------------------------------------------------------
diff --git a/components/readme.adoc b/components/readme.adoc
index 190f8ea..fccd05c 100644
--- a/components/readme.adoc
+++ b/components/readme.adoc
@@ -543,6 +543,9 @@ Components
 | link:camel-telegram/src/main/docs/telegram-component.adoc[Telegram] (camel-telegram) +
 `telegram:type/authorizationToken` | The telegram component provides access to the Telegram Bot API.
 
+| link:camel-tika/src/main/docs/tika-component.adoc[Tika] (camel-tika) +
+`tika:operation` | This component integrates with Apache Tika to extract content and metadata from thousands of file types.
+
 | link:camel-twitter/src/main/docs/twitter-component.adoc[Twitter] (camel-twitter) +
 `twitter:kind` | This component integrates with Twitter to send tweets or search for tweets and more.
 

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/docs/user-manual/en/SUMMARY.md
----------------------------------------------------------------------
diff --git a/docs/user-manual/en/SUMMARY.md b/docs/user-manual/en/SUMMARY.md
index b7dd18c..d93d575 100644
--- a/docs/user-manual/en/SUMMARY.md
+++ b/docs/user-manual/en/SUMMARY.md
@@ -303,6 +303,7 @@
 	* [Stream](stream-component.adoc)
 	* [String Template](string-template-component.adoc)
 	* [Telegram](telegram-component.adoc)
+	* [Tika](tika-component.adoc)
 	* [Twitter](twitter-component.adoc)
 	* [Undertow](undertow-component.adoc)
 	* [Velocity](velocity-component.adoc)

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/parent/pom.xml
----------------------------------------------------------------------
diff --git a/parent/pom.xml b/parent/pom.xml
index a3692a7..0d9fe43 100644
--- a/parent/pom.xml
+++ b/parent/pom.xml
@@ -614,6 +614,7 @@
     <tagsoup-version>1.2.1</tagsoup-version>
     <testng-version>6.8.21</testng-version>
     <tinybundles-version>2.1.1</tinybundles-version>
+    <tika-version>1.14</tika-version>
     <twitter4j-bundle-version>4.0.6_1</twitter4j-bundle-version>
     <twitter4j-version>4.0.6</twitter4j-version>
     <undertow-version>1.4.8.Final</undertow-version>
@@ -1896,6 +1897,11 @@
       </dependency>
       <dependency>
         <groupId>org.apache.camel</groupId>
+        <artifactId>camel-tika</artifactId>
+        <version>${project.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.camel</groupId>
         <artifactId>camel-twitter</artifactId>
         <version>${project.version}</version>
       </dependency>

http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/platforms/karaf/features/src/main/resources/features.xml
----------------------------------------------------------------------
diff --git a/platforms/karaf/features/src/main/resources/features.xml b/platforms/karaf/features/src/main/resources/features.xml
index 0993d46..20d52f5 100644
--- a/platforms/karaf/features/src/main/resources/features.xml
+++ b/platforms/karaf/features/src/main/resources/features.xml
@@ -1881,6 +1881,12 @@
     <bundle>mvn:org.apache.camel/camel-test/${project.version}</bundle>
     <bundle>mvn:org.apache.camel/camel-test-spring/${project.version}</bundle>
   </feature>
+  <feature name='camel-tika' version='${project.version}' resolver='(obr)' start-level='50'>
+    <bundle dependency='true'>mvn:org.apache.tika/tika-core/${tika-version}</bundle>
+    <bundle dependency='true'>mvn:org.apache.tika/tika-bundle/${tika-version}</bundle>
+    <feature version='${project.version}'>camel-core</feature>
+    <bundle>mvn:org.apache.camel/camel-tika/${project.version}</bundle>
+  </feature>
   <feature name='camel-twitter' version='${project.version}' resolver='(obr)' start-level='50'>
     <feature version='${project.version}'>camel-core</feature>
     <bundle dependency='true'>mvn:org.apache.servicemix.bundles/org.apache.servicemix.bundles.twitter4j/${twitter4j-bundle-version}</bundle>


Mime
View raw message