cxf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From serg...@apache.org
Subject git commit: [CXF-5549] Adding a constructor accepting list of parsers to Lucene extractor too
Date Fri, 27 Jun 2014 15:58:40 GMT
Repository: cxf
Updated Branches:
  refs/heads/master 785c0bd70 -> 0253be4a7


[CXF-5549] Adding a constructor accepting list of parsers to Lucene extractor too


Project: http://git-wip-us.apache.org/repos/asf/cxf/repo
Commit: http://git-wip-us.apache.org/repos/asf/cxf/commit/0253be4a
Tree: http://git-wip-us.apache.org/repos/asf/cxf/tree/0253be4a
Diff: http://git-wip-us.apache.org/repos/asf/cxf/diff/0253be4a

Branch: refs/heads/master
Commit: 0253be4a776598fae2d5952fa2f7fa36aac6deeb
Parents: 785c0bd
Author: Sergey Beryozkin <sberyozkin@talend.com>
Authored: Fri Jun 27 16:58:13 2014 +0100
Committer: Sergey Beryozkin <sberyozkin@talend.com>
Committed: Fri Jun 27 16:58:13 2014 +0100

----------------------------------------------------------------------
 .../ext/search/tika/TikaLuceneContentExtractor.java | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cxf/blob/0253be4a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java
----------------------------------------------------------------------
diff --git a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java
b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java
index 28eaa35..567463b 100644
--- a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java
+++ b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java
@@ -20,6 +20,7 @@ package org.apache.cxf.jaxrs.ext.search.tika;
 
 import java.io.InputStream;
 import java.util.Date;
+import java.util.List;
 
 import org.apache.cxf.jaxrs.ext.search.tika.TikaContentExtractor.TikaContent;
 import org.apache.lucene.document.Document;
@@ -88,6 +89,21 @@ public class TikaLuceneContentExtractor {
     }
     
     /**
+     * Create new Tika-based content extractor using the provided parser instance and
+     * optional media type validation. If validation is enabled, the implementation 
+     * will try to detect the media type of the input and validate it against media types
+     * supported by the parser.
+     * @param parser parser instancethis.contentFieldName
+     * @param validateMediaType enabled or disable media type validation
+     * @param documentMetadata documentMetadata
+     */
+    public TikaLuceneContentExtractor(final List<Parser> parsers, 
+                                      final LuceneDocumentMetadata documentMetadata) {
+        this.extractor = new TikaContentExtractor(parsers);
+        this.defaultDocumentMetadata = documentMetadata;
+    }
+    
+    /**
      * Extract the content and metadata from the input stream. Depending on media type validation,
      * the detector could be run against input stream in order to ensure that parser supports
this
      * type of content. 


Mime
View raw message