jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ju...@apache.org
Subject svn commit: r778621 - in /jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query: TextFilter.java lucene/JackrabbitTextExtractor.java lucene/TextExtractorFilter.java lucene/TextFilterExtractor.java lucene/TextPlainTextFilter.java
Date Tue, 26 May 2009 08:37:55 GMT
Author: jukka
Date: Tue May 26 08:37:54 2009
New Revision: 778621

URL: http://svn.apache.org/viewvc?rev=778621&view=rev
Log:
JCR-1878: Use Apache Tika for text extraction

Drop the deprecated TextFilter classes

Removed:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/TextFilter.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorFilter.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextFilterExtractor.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextPlainTextFilter.java
Modified:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitTextExtractor.java

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitTextExtractor.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitTextExtractor.java?rev=778621&r1=778620&r2=778621&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitTextExtractor.java
(original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/JackrabbitTextExtractor.java
Tue May 26 08:37:54 2009
@@ -19,15 +19,11 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
-import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.Set;
 import java.util.StringTokenizer;
 
-import org.apache.jackrabbit.core.query.TextFilter;
 import org.apache.jackrabbit.extractor.CompositeTextExtractor;
 import org.apache.jackrabbit.extractor.DelegatingTextExtractor;
 import org.apache.jackrabbit.extractor.EmptyTextExtractor;
@@ -40,8 +36,8 @@
  * implements the following functionality:
  * <ul>
  *   <li>
- *     Parses the configured {@link TextExtractor} and {@link TextFilter}
- *     class names and instantiates the configured classes.
+ *     Parses the configured {@link TextExtractor} class names and
+ *     instantiates the configured classes.
  *   </li>
  *   <li>
  *     Acts as the delegate extractor for any configured
@@ -77,10 +73,10 @@
      * Set of content types that are known to be supported by the
      * composite extractor.
      */
-    private final Set types = new HashSet();
+    private final Set<String> types = new HashSet<String>();
 
     /**
-     * Composite extractor used to for all text extration tasks. Contains
+     * Composite extractor used to for all text extraction tasks. Contains
      * all the {@link TextExtractor} instances for directly supported content
      * types, the {@link TextFilterExtractor} adapters for backwards
      * compatibility with configured {@link TextFilter} instances that have
@@ -91,13 +87,6 @@
         new CompositeTextExtractor();
 
     /**
-     * Configured {@link TextFilter} instances. Used for backwards
-     * compatibility with existing configuration files and {@link TextFilter}
-     * implementations.
-     */
-    private final Collection filters = new ArrayList();
-
-    /**
      * Creates a Jackrabbit text extractor containing the configured component
      * classes.
      *
@@ -117,8 +106,6 @@
                 }
                 if (object instanceof TextExtractor) {
                     extractor.addTextExtractor((TextExtractor) object);
-                } else if (object instanceof TextFilter) {
-                    filters.add(object);
                 } else {
                     logger.warn("Unknown text extractor class: {}", name);
                 }
@@ -173,18 +160,6 @@
     public Reader extractText(InputStream stream, String type, String encoding)
             throws IOException {
         logger.debug("extractText(stream, {}, {})", type, encoding);
-        if (!types.contains(type)) {
-            Iterator iterator = filters.iterator();
-            while (iterator.hasNext()) {
-                TextFilter filter = (TextFilter) iterator.next();
-                if (filter.canFilter(type)) {
-                    types.add(type);
-                    extractor.addTextExtractor(
-                            new TextFilterExtractor(type, filter));
-                    break;
-                }
-            }
-        }
 
         if (!types.contains(type)) {
             logger.debug("Full text indexing of {} is not supported", type);



Mime
View raw message