jackrabbit-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From thomasg <thomasgascoi...@hotmail.com>
Subject Re: Restricting xpath query to document text
Date Mon, 20 Nov 2006 10:27:12 GMT

Sure, theres probably an obvious error / omission. This is the code:

package com.axxia.dms.indexing.jackrabbit;

import java.io.IOException;
import java.io.Reader;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.jcr.RepositoryException;

import org.apache.jackrabbit.core.PropertyId;
import org.apache.jackrabbit.core.query.TextFilter;
import org.apache.jackrabbit.core.query.lucene.FieldNames;
import org.apache.jackrabbit.core.query.lucene.NamespaceMappings;
import org.apache.jackrabbit.core.query.lucene.NodeIndexer;
import org.apache.jackrabbit.core.state.ItemStateException;
import org.apache.jackrabbit.core.state.ItemStateManager;
import org.apache.jackrabbit.core.state.NodeState;
import org.apache.jackrabbit.core.state.PropertyState;
import org.apache.jackrabbit.name.QName;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;

import com.axxia.dms.indexing.util.IndexingUtil;

public class AxxiaJackrabbitNodeIndexer extends NodeIndexer
{
    /**
     * Creates a new node indexer.
     *
     * @param node          the node state to index.
     * @param stateProvider the persistent item state manager to retrieve
properties.
     * @param mappings      internal namespace mappings.
     * @param textFilters   List of {@link
org.apache.jackrabbit.core.query.TextFilter}s.
     */
    protected AxxiaJackrabbitNodeIndexer(NodeState node,
                          ItemStateManager stateProvider,
                          NamespaceMappings mappings,
                          List textFilters) {
        super(node, stateProvider, mappings, textFilters);
    }

    
    
    /**
     * Creates a lucene Document from a node.
     *
     * @param node          the node state to index.
     * @param stateProvider the state provider to retrieve property values.
     * @param mappings      internal namespace mappings.
     * @param textFilters   list of text filters to use for indexing binary
     *                      properties.
     * @return the lucene Document.
     * @throws RepositoryException if an error occurs while reading property
     *                             values from the
<code>ItemStateProvider</code>.
     */
    public static Document createDocument(NodeState node,
                                          ItemStateManager stateProvider,
                                          NamespaceMappings mappings,
                                          List textFilters)
            throws RepositoryException {
    	AxxiaJackrabbitNodeIndexer indexer = new
AxxiaJackrabbitNodeIndexer(node, stateProvider, mappings, textFilters);
        return indexer.createDoc();
    }    
    
    /**
     * Adds the binary value to the document as the named field.
     * <p/>
     * This implementation checks if this {@link #node} is of type
nt:resource
     * and if that is the case, tries to extract text from the data atom
using
     * the {@link #textFilters}.
     *
     * @param doc           The document to which to add the field
     * @param fieldName     The name of the field to add
     * @param internalValue The value for the field to add to the document.
     */
    protected void addBinaryValue(Document doc, String fieldName, Object
internalValue) {
    	
        // 'check' if node is of type nt:resource
        try {
            String jcrData = mappings.getPrefix(QName.NS_JCR_URI) + ":data";
            if (!jcrData.equals(fieldName)) {
                // don't know how to index
                return;
            }
            //NB node variabel is of type NodeState
            if (node.hasPropertyName(QName.JCR_MIMETYPE)) {
                PropertyState dataProp = (PropertyState)
stateProvider.getItemState(
                        new PropertyId(node.getNodeId(), QName.JCR_DATA));
                PropertyState mimeTypeProp =
                        (PropertyState) stateProvider.getItemState(
                                new PropertyId(node.getNodeId(),
QName.JCR_MIMETYPE));

                // jcr:encoding is not mandatory
                String encoding = null;
                if (node.hasPropertyName(QName.JCR_ENCODING)) {
                    PropertyState encodingProp =
                            (PropertyState) stateProvider.getItemState(
                                    new PropertyId(node.getNodeId(),
QName.JCR_ENCODING));
                    encoding =
encodingProp.getValues()[0].internalValue().toString();
                }

                
                

                String mimeType =
mimeTypeProp.getValues()[0].internalValue().toString();
                Map fields = Collections.EMPTY_MAP;
                for (Iterator it = textFilters.iterator(); it.hasNext();) {
                    TextFilter filter = (TextFilter) it.next();
                    // use the first filter that can handle the mimeType
                    if (filter.canFilter(mimeType)) {
                        fields = filter.doFilter(dataProp, encoding);
                        break;
                    }
                }

                for (Iterator it = fields.keySet().iterator();
it.hasNext();) {
                    String field = (String) it.next();
                    Reader r = (Reader) fields.get(field);
                    doc.add(Field.Text(field, r));
                }
                
            	//After obtaining the  map of fields returned by
            	//the text filter look for the Reader that was returned with
the key
            	//FieldNames.FULLTEXT. you then have to spool the reader into a
string
            	//value and call addStringValue(). 
                Reader fullTextReader = (Reader)
fields.get(FieldNames.FULLTEXT);
                if (fullTextReader != null)
                {
                	try
                	{                    
                	    String text = readerToString(fullTextReader);
                	    addStringValue(doc, fieldName, text.toLowerCase());
                	}
                	catch (IOException e)
                	{
                		//TODO Logging etc
                		e.printStackTrace();
                	}
                }
            }
        } catch (ItemStateException e) {
        	//TODO
            //log.warn("Exception while indexing binary property: " +
e.toString());
            //log.debug("Dump: ", e);
        } catch (RepositoryException e) {
        	//TODO
            //log.warn("Exception while indexing binary property: " +
e.toString());
            //log.debug("Dump: ", e);
        }
    }
    
   /**
    * Spools a reader object to string representation.
    * @param reader The reader to convert to a string.
    * @return String representation of the reader
    * @throws IOException
    */
    private String readerToString(Reader reader) throws IOException 
    {
	    int charValue = 0;
	    StringBuilder sb = new StringBuilder(2024);
	    
	    while ((charValue = reader.read()) != -1) {
	    	sb.append((char)charValue);
	    }
	    String result = sb.toString();
	    return result;
    }      
    
}
-- 
View this message in context: http://www.nabble.com/Restricting-xpath-query-to-document-text-tf1512215.html#a7440129
Sent from the Jackrabbit - Dev mailing list archive at Nabble.com.


Mime
View raw message