jackrabbit-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Marcel Reutegger <marcel.reuteg...@day.com>
Subject Re: Restricting xpath query to document text
Date Tue, 21 Nov 2006 08:34:43 GMT
The class looks ok, except that you shouldn't lower-case the text retrieved from 
the resource, or is there a specific reason why this is done?

Jackrabbit 1.2 will support the functions fn:lower-case() and fn:upper-case(), 
so there is no need to lower-case the text when it is indexed.

The query you mentioned will return nt:resource nodes with content that starts 
with 'comp':

//element(*, axxia:resource)[(jcr:like(@jcr:data, 'comp%'))]

Are you sure that the first word in the document starts with 'comp'?

regards
  marcel

thomasg wrote:
> Sure, theres probably an obvious error / omission. This is the code:
> 
> package com.axxia.dms.indexing.jackrabbit;
> 
> import java.io.IOException;
> import java.io.Reader;
> import java.util.Collections;
> import java.util.Iterator;
> import java.util.List;
> import java.util.Map;
> 
> import javax.jcr.RepositoryException;
> 
> import org.apache.jackrabbit.core.PropertyId;
> import org.apache.jackrabbit.core.query.TextFilter;
> import org.apache.jackrabbit.core.query.lucene.FieldNames;
> import org.apache.jackrabbit.core.query.lucene.NamespaceMappings;
> import org.apache.jackrabbit.core.query.lucene.NodeIndexer;
> import org.apache.jackrabbit.core.state.ItemStateException;
> import org.apache.jackrabbit.core.state.ItemStateManager;
> import org.apache.jackrabbit.core.state.NodeState;
> import org.apache.jackrabbit.core.state.PropertyState;
> import org.apache.jackrabbit.name.QName;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.document.Field;
> 
> import com.axxia.dms.indexing.util.IndexingUtil;
> 
> public class AxxiaJackrabbitNodeIndexer extends NodeIndexer
> {
>     /**
>      * Creates a new node indexer.
>      *
>      * @param node          the node state to index.
>      * @param stateProvider the persistent item state manager to retrieve
> properties.
>      * @param mappings      internal namespace mappings.
>      * @param textFilters   List of {@link
> org.apache.jackrabbit.core.query.TextFilter}s.
>      */
>     protected AxxiaJackrabbitNodeIndexer(NodeState node,
>                           ItemStateManager stateProvider,
>                           NamespaceMappings mappings,
>                           List textFilters) {
>         super(node, stateProvider, mappings, textFilters);
>     }
> 
>     
>     
>     /**
>      * Creates a lucene Document from a node.
>      *
>      * @param node          the node state to index.
>      * @param stateProvider the state provider to retrieve property values.
>      * @param mappings      internal namespace mappings.
>      * @param textFilters   list of text filters to use for indexing binary
>      *                      properties.
>      * @return the lucene Document.
>      * @throws RepositoryException if an error occurs while reading property
>      *                             values from the
> <code>ItemStateProvider</code>.
>      */
>     public static Document createDocument(NodeState node,
>                                           ItemStateManager stateProvider,
>                                           NamespaceMappings mappings,
>                                           List textFilters)
>             throws RepositoryException {
>     	AxxiaJackrabbitNodeIndexer indexer = new
> AxxiaJackrabbitNodeIndexer(node, stateProvider, mappings, textFilters);
>         return indexer.createDoc();
>     }    
>     
>     /**
>      * Adds the binary value to the document as the named field.
>      * <p/>
>      * This implementation checks if this {@link #node} is of type
> nt:resource
>      * and if that is the case, tries to extract text from the data atom
> using
>      * the {@link #textFilters}.
>      *
>      * @param doc           The document to which to add the field
>      * @param fieldName     The name of the field to add
>      * @param internalValue The value for the field to add to the document.
>      */
>     protected void addBinaryValue(Document doc, String fieldName, Object
> internalValue) {
>     	
>         // 'check' if node is of type nt:resource
>         try {
>             String jcrData = mappings.getPrefix(QName.NS_JCR_URI) + ":data";
>             if (!jcrData.equals(fieldName)) {
>                 // don't know how to index
>                 return;
>             }
>             //NB node variabel is of type NodeState
>             if (node.hasPropertyName(QName.JCR_MIMETYPE)) {
>                 PropertyState dataProp = (PropertyState)
> stateProvider.getItemState(
>                         new PropertyId(node.getNodeId(), QName.JCR_DATA));
>                 PropertyState mimeTypeProp =
>                         (PropertyState) stateProvider.getItemState(
>                                 new PropertyId(node.getNodeId(),
> QName.JCR_MIMETYPE));
> 
>                 // jcr:encoding is not mandatory
>                 String encoding = null;
>                 if (node.hasPropertyName(QName.JCR_ENCODING)) {
>                     PropertyState encodingProp =
>                             (PropertyState) stateProvider.getItemState(
>                                     new PropertyId(node.getNodeId(),
> QName.JCR_ENCODING));
>                     encoding =
> encodingProp.getValues()[0].internalValue().toString();
>                 }
> 
>                 
>                 
> 
>                 String mimeType =
> mimeTypeProp.getValues()[0].internalValue().toString();
>                 Map fields = Collections.EMPTY_MAP;
>                 for (Iterator it = textFilters.iterator(); it.hasNext();) {
>                     TextFilter filter = (TextFilter) it.next();
>                     // use the first filter that can handle the mimeType
>                     if (filter.canFilter(mimeType)) {
>                         fields = filter.doFilter(dataProp, encoding);
>                         break;
>                     }
>                 }
> 
>                 for (Iterator it = fields.keySet().iterator();
> it.hasNext();) {
>                     String field = (String) it.next();
>                     Reader r = (Reader) fields.get(field);
>                     doc.add(Field.Text(field, r));
>                 }
>                 
>             	//After obtaining the  map of fields returned by
>             	//the text filter look for the Reader that was returned with
> the key
>             	//FieldNames.FULLTEXT. you then have to spool the reader into a
> string
>             	//value and call addStringValue(). 
>                 Reader fullTextReader = (Reader)
> fields.get(FieldNames.FULLTEXT);
>                 if (fullTextReader != null)
>                 {
>                 	try
>                 	{                    
>                 	    String text = readerToString(fullTextReader);
>                 	    addStringValue(doc, fieldName, text.toLowerCase());
>                 	}
>                 	catch (IOException e)
>                 	{
>                 		//TODO Logging etc
>                 		e.printStackTrace();
>                 	}
>                 }
>             }
>         } catch (ItemStateException e) {
>         	//TODO
>             //log.warn("Exception while indexing binary property: " +
> e.toString());
>             //log.debug("Dump: ", e);
>         } catch (RepositoryException e) {
>         	//TODO
>             //log.warn("Exception while indexing binary property: " +
> e.toString());
>             //log.debug("Dump: ", e);
>         }
>     }
>     
>    /**
>     * Spools a reader object to string representation.
>     * @param reader The reader to convert to a string.
>     * @return String representation of the reader
>     * @throws IOException
>     */
>     private String readerToString(Reader reader) throws IOException 
>     {
> 	    int charValue = 0;
> 	    StringBuilder sb = new StringBuilder(2024);
> 	    
> 	    while ((charValue = reader.read()) != -1) {
> 	    	sb.append((char)charValue);
> 	    }
> 	    String result = sb.toString();
> 	    return result;
>     }      
>     
> }


Mime
View raw message