jackrabbit-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mreut...@apache.org
Subject svn commit: r330282 - in /incubator/jackrabbit/trunk: contrib/textfilters/ src/java/META-INF/services/ src/java/org/apache/jackrabbit/core/query/ src/java/org/apache/jackrabbit/core/query/lucene/
Date Wed, 02 Nov 2005 16:38:56 GMT
Author: mreutegg
Date: Wed Nov  2 08:38:48 2005
New Revision: 330282

URL: http://svn.apache.org/viewcvs?rev=330282&view=rev
Log:
JCR-265: Bug with textfilters and classloaders

Removed:
    incubator/jackrabbit/trunk/src/java/META-INF/services/org.apache.jackrabbit.core.query.TextFilterService
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/TextFilterService.java
Modified:
    incubator/jackrabbit/trunk/contrib/textfilters/README.txt
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/TextFilter.java
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
    incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java

Modified: incubator/jackrabbit/trunk/contrib/textfilters/README.txt
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/contrib/textfilters/README.txt?rev=330282&r1=330281&r2=330282&view=diff
==============================================================================
--- incubator/jackrabbit/trunk/contrib/textfilters/README.txt (original)
+++ incubator/jackrabbit/trunk/contrib/textfilters/README.txt Wed Nov  2 08:38:48 2005
@@ -11,10 +11,19 @@
 
 How to register in jackrabbit?
 Build the jar file and place it in the Jackrabbit 
-classpath. The filters will be automatically loaded 
-on startup.
+classpath together with the dependencies of these text
+filters.
+Configure them in the SearchIndex element of the workspace.xml
+
+Sample:
+
+...
+  <SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
+    <param name="path" value="${wsp.home}/index" />
+    <param name="textFilterClasses" value="org.apache.jackrabbit.core.query.MsExcelTextFilter,org.apache.jackrabbit.core.query.MsPowerPointTextFilter,org.apache.jackrabbit.core.query.MsWordTextFilter,org.apache.jackrabbit.core.query.PdfTextFilter"
/>
+  </SearchIndex>
+...
 
 For further information, see the javadocs for:
 org.apache.jackrabbit.core.query.TextFilter
-org.apache.jackrabbit.core.query.TextFilterService
 

Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/TextFilter.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/TextFilter.java?rev=330282&r1=330281&r2=330282&view=diff
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/TextFilter.java (original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/TextFilter.java Wed
Nov  2 08:38:48 2005
@@ -24,8 +24,11 @@
 /**
  * Defines an interface for extracting text out of binary properties according
  * to their mime-type.
- *
- * @see TextFilterService
+ * </p>
+ * {@link TextFilter} implementations are asked if they can handle a certain
+ * mime type ({@link #canFilter(String)} and if one of them returns
+ * <code>true</code> the text representation is created with
+ * {@link #doFilter(PropertyState, String)}
  */
 public interface TextFilter {
 

Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java?rev=330282&r1=330281&r2=330282&view=diff
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
(original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java
Wed Nov  2 08:38:48 2005
@@ -18,7 +18,7 @@
 
 import org.apache.jackrabbit.core.NodeId;
 import org.apache.jackrabbit.core.PropertyId;
-import org.apache.jackrabbit.core.query.TextFilterService;
+import org.apache.jackrabbit.core.query.TextFilter;
 import org.apache.jackrabbit.core.state.ItemStateException;
 import org.apache.jackrabbit.core.state.ItemStateManager;
 import org.apache.jackrabbit.core.state.NoSuchItemStateException;
@@ -40,6 +40,8 @@
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
+import java.util.List;
+import java.util.Collections;
 
 /**
  * Creates a lucene <code>Document</code> object from a {@link javax.jcr.Node}.
@@ -68,18 +70,26 @@
     protected final NamespaceMappings mappings;
 
     /**
+     * List of text filters in use.
+     */
+    protected final List textFilters;
+
+    /**
      * Creates a new node indexer.
      *
      * @param node          the node state to index.
      * @param stateProvider the persistent item state manager to retrieve properties.
      * @param mappings      internal namespace mappings.
+     * @param textFilters   List of {@link org.apache.jackrabbit.core.query.TextFilter}s.
      */
     protected NodeIndexer(NodeState node,
                           ItemStateManager stateProvider,
-                          NamespaceMappings mappings) {
+                          NamespaceMappings mappings,
+                          List textFilters) {
         this.node = node;
         this.stateProvider = stateProvider;
         this.mappings = mappings;
+        this.textFilters = textFilters;
     }
 
     /**
@@ -88,15 +98,18 @@
      * @param node          the node state to index.
      * @param stateProvider the state provider to retrieve property values.
      * @param mappings      internal namespace mappings.
+     * @param textFilters   list of text filters to use for indexing binary
+     *                      properties.
      * @return the lucene Document.
      * @throws RepositoryException if an error occurs while reading property
      *                             values from the <code>ItemStateProvider</code>.
      */
     public static Document createDocument(NodeState node,
                                           ItemStateManager stateProvider,
-                                          NamespaceMappings mappings)
+                                          NamespaceMappings mappings,
+                                          List textFilters)
             throws RepositoryException {
-        NodeIndexer indexer = new NodeIndexer(node, stateProvider, mappings);
+        NodeIndexer indexer = new NodeIndexer(node, stateProvider, mappings, textFilters);
         return indexer.createDoc();
     }
 
@@ -240,8 +253,7 @@
      * <p/>
      * This implementation checks if this {@link #node} is of type nt:resource
      * and if that is the case, tries to extract text from the data atom using
-     * {@link TextFilterService}add a {@link FieldNames#FULLTEXT} field
-     * .
+     * the {@link #textFilters}.
      *
      * @param doc           The document to which to add the field
      * @param fieldName     The name of the field to add
@@ -268,9 +280,17 @@
                     encodingProp.getValues()[0].internalValue().toString();
                 }
 
-                Map fields = TextFilterService.extractText(dataProp,
-                        mimeTypeProp.getValues()[0].internalValue().toString(),
-                        encoding);
+                String mimeType = mimeTypeProp.getValues()[0].internalValue().toString();
+                Map fields = Collections.EMPTY_MAP;
+                for (Iterator it = textFilters.iterator(); it.hasNext(); ) {
+                    TextFilter filter = (TextFilter) it.next();
+                    // use the first filter that can handle the mimeType
+                    if (filter.canFilter(mimeType)) {
+                        fields = filter.doFilter(dataProp, encoding);
+                        break;
+                    }
+                }
+
                 for (Iterator it = fields.keySet().iterator(); it.hasNext();) {
                     String field = (String) it.next();
                     Reader r = (Reader) fields.get(field);

Modified: incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
URL: http://svn.apache.org/viewcvs/incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java?rev=330282&r1=330281&r2=330282&view=diff
==============================================================================
--- incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
(original)
+++ incubator/jackrabbit/trunk/src/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java
Wed Nov  2 08:38:48 2005
@@ -21,6 +21,7 @@
 import org.apache.jackrabbit.core.query.AbstractQueryHandler;
 import org.apache.jackrabbit.core.query.ExecutableQuery;
 import org.apache.jackrabbit.core.query.QueryHandlerContext;
+import org.apache.jackrabbit.core.query.TextFilter;
 import org.apache.jackrabbit.core.state.NodeState;
 import org.apache.jackrabbit.name.NoPrefixDeclaredException;
 import org.apache.jackrabbit.name.QName;
@@ -43,6 +44,9 @@
 import java.io.File;
 import java.util.Iterator;
 import java.util.List;
+import java.util.StringTokenizer;
+import java.util.ArrayList;
+import java.util.Collections;
 
 /**
  * Implements a {@link org.apache.jackrabbit.core.query.QueryHandler} using
@@ -69,6 +73,11 @@
     public static final int DEFAULT_MERGE_FACTOR = 10;
 
     /**
+     * Default text filters.
+     */
+    public static final String DEFAULT_TEXT_FILTERS = TextPlainTextFilter.class.getName();
+
+    /**
      * The actual index
      */
     private MultiIndex index;
@@ -79,6 +88,11 @@
     private Analyzer analyzer;
 
     /**
+     * List of {@link org.apache.jackrabbit.core.query.TextFilter} instance.
+     */
+    private List textFilters;
+
+    /**
      * The location of the search index.
      * <p/>
      * Note: This is a <b>mandatory</b> parameter!
@@ -149,6 +163,7 @@
      */
     public SearchIndex() {
         this.analyzer = new StandardAnalyzer(new String[]{});
+        setTextFilterClasses(DEFAULT_TEXT_FILTERS);
     }
 
     /**
@@ -333,6 +348,16 @@
     }
 
     /**
+     * Returns an unmodifiable list of {@link TextFilter} configured for
+     * this search index.
+     *
+     * @return unmodifiable list of text filters.
+     */
+    List getTextFilters() {
+        return textFilters;
+    }
+
+    /**
      * Returns the namespace mappings for the internal representation.
      * @return the namespace mappings for the internal representation.
      */
@@ -353,7 +378,7 @@
     protected Document createDocument(NodeState node, NamespaceMappings nsMappings)
             throws RepositoryException {
         return NodeIndexer.createDocument(node, getContext().getItemStateManager(),
-                nsMappings);
+                nsMappings, textFilters);
     }
 
     //--------------------------< properties >----------------------------------
@@ -542,5 +567,50 @@
 
     public int getCacheSize() {
         return cacheSize;
+    }
+
+    /**
+     * Sets a new set of text filter classes that are in use for indexing
+     * binary properties. The <code>filterClasses</code> must be a comma
+     * separated <code>String</code> of fully qualified class names implementing
+     * {@link org.apache.jackrabbit.core.query.TextFilter}. Each class must
+     * provide a default constructor.
+     * </p>
+     * Filter class names that cannot be resolved are skipped and a warn message
+     * is logged.
+     *
+     * @param filterClasses comma separated list of filter class names
+     */
+    public void setTextFilterClasses(String filterClasses) {
+        List filters = new ArrayList();
+        StringTokenizer tokenizer = new StringTokenizer(filterClasses, ", \t\n\r\f");
+        while (tokenizer.hasMoreTokens()) {
+            String className = tokenizer.nextToken();
+            try {
+                Class filterClass = Class.forName(className);
+                TextFilter filter = (TextFilter) filterClass.newInstance();
+                filters.add(filter);
+            } catch (Exception e) {
+                log.warn("Invalid TextFilter class: " + className, e);
+            }
+        }
+        textFilters = Collections.unmodifiableList(filters);
+    }
+
+    /**
+     * Returns the fully qualified class names of the text filter instances
+     * currently in use. The names are comma separated.
+     *
+     * @return class names of the text filters in use.
+     */
+    public String getTextFilterClasses() {
+        StringBuffer names = new StringBuffer();
+        String delim = "";
+        for (Iterator it = textFilters.iterator(); it.hasNext(); ) {
+            names.append(delim);
+            names.append(it.next().getClass().getName());
+            delim = ",";
+        }
+        return names.toString();
     }
 }



Mime
View raw message