Return-Path: Delivered-To: apmail-jackrabbit-commits-archive@www.apache.org Received: (qmail 98144 invoked from network); 11 Sep 2008 07:52:41 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 11 Sep 2008 07:52:41 -0000 Received: (qmail 62975 invoked by uid 500); 11 Sep 2008 07:52:38 -0000 Delivered-To: apmail-jackrabbit-commits-archive@jackrabbit.apache.org Received: (qmail 62935 invoked by uid 500); 11 Sep 2008 07:52:38 -0000 Mailing-List: contact commits-help@jackrabbit.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@jackrabbit.apache.org Delivered-To: mailing list commits@jackrabbit.apache.org Received: (qmail 62926 invoked by uid 99); 11 Sep 2008 07:52:38 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 11 Sep 2008 00:52:38 -0700 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 11 Sep 2008 07:51:39 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 8C99F238896D; Thu, 11 Sep 2008 00:51:41 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r694164 - in /jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene: AbstractExcerpt.java AbstractIndex.java LazyTextExtractorField.java NodeIndexer.java SearchIndex.java Util.java Date: Thu, 11 Sep 2008 07:51:41 -0000 To: commits@jackrabbit.apache.org From: mreutegg@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20080911075141.8C99F238896D@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: mreutegg Date: Thu Sep 11 00:51:40 2008 New Revision: 694164 URL: http://svn.apache.org/viewvc?rev=694164&view=rev Log: JCR-1730: Background text extraction not possible when supportHighlighting is set true Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LazyTextExtractorField.java (with props) Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/Util.java Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java?rev=694164&r1=694163&r2=694164&view=diff ============================================================================== --- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java (original) +++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractExcerpt.java Thu Sep 11 00:51:40 2008 @@ -26,7 +26,7 @@ import org.apache.lucene.index.TermPositionVector; import org.apache.lucene.index.TermVectorOffsetInfo; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Token; import org.apache.jackrabbit.core.NodeId; @@ -98,7 +98,7 @@ } finally { tDocs.close(); } - Field[] fields = doc.getFields(FieldNames.FULLTEXT); + Fieldable[] fields = doc.getFieldables(FieldNames.FULLTEXT); if (fields == null) { log.debug("Fulltext field not stored, using {}", SimpleExcerptProvider.class.getName()); Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java?rev=694164&r1=694163&r2=694164&view=diff ============================================================================== --- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java (original) +++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/AbstractIndex.java Thu Sep 11 00:51:40 2008 @@ -23,6 +23,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; import org.apache.lucene.search.Similarity; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -417,8 +418,8 @@ Document copy = new Document(); Iterator fields = doc.getFields().iterator(); while (fields.hasNext()) { - Field f = (Field) fields.next(); - Field field = null; + Fieldable f = (Fieldable) fields.next(); + Fieldable field = null; Field.TermVector tv = getTermVectorParameter(f); Field.Store stored = getStoreParameter(f); Field.Index indexed = getIndexParameter(f); @@ -510,7 +511,7 @@ * @param f a lucene field. * @return the index parameter on f. */ - private Field.Index getIndexParameter(Field f) { + private Field.Index getIndexParameter(Fieldable f) { if (!f.isIndexed()) { return Field.Index.NO; } else if (f.isTokenized()) { @@ -526,7 +527,7 @@ * @param f a lucene field. * @return the store parameter on f. */ - private Field.Store getStoreParameter(Field f) { + private Field.Store getStoreParameter(Fieldable f) { if (f.isCompressed()) { return Field.Store.COMPRESS; } else if (f.isStored()) { @@ -542,7 +543,7 @@ * @param f a lucene field. * @return the term vector parameter on f. */ - private Field.TermVector getTermVectorParameter(Field f) { + private Field.TermVector getTermVectorParameter(Fieldable f) { if (f.isStorePositionWithTermVector() && f.isStoreOffsetWithTermVector()) { return Field.TermVector.WITH_POSITIONS_OFFSETS; } else if (f.isStorePositionWithTermVector()) { Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LazyTextExtractorField.java URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LazyTextExtractorField.java?rev=694164&view=auto ============================================================================== --- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LazyTextExtractorField.java (added) +++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LazyTextExtractorField.java Thu Sep 11 00:51:40 2008 @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query.lucene; + +import org.apache.lucene.document.AbstractField; +import org.apache.lucene.document.Field; +import org.apache.lucene.analysis.TokenStream; +import org.apache.commons.io.IOUtils; +import org.slf4j.LoggerFactory; +import org.slf4j.Logger; + +import java.io.Reader; +import java.io.IOException; + +/** + * LazyTextExtractorField implements a Lucene field with a String + * value that is lazily initialized from a given {@link Reader}. In addition + * this class provides a method to find out whether the purpose of the reader + * is to extract text and whether the extraction process is already finished. + * + * @see #isExtractorFinished() + */ +public class LazyTextExtractorField extends AbstractField { + + /** + * The serial version UID. + */ + private static final long serialVersionUID = -2707986404659820071L; + + /** + * The logger instance for this class. + */ + private static final Logger log = LoggerFactory.getLogger(LazyTextExtractorField.class); + + /** + * The reader from where to read the text extract. + */ + private final Reader reader; + + /** + * The extract as obtained lazily from {@link #reader}. + */ + private String extract; + + /** + * Creates a new LazyTextExtractorField with the given + * name. + * + * @param name the name of the field. + * @param reader the reader where to obtain the string from. + * @param store when set true the string value is stored in the + * index. + * @param withOffsets when set true a term vector with offsets + * is written into the index. + */ + public LazyTextExtractorField(String name, + Reader reader, + boolean store, + boolean withOffsets) { + super(name, + store ? Field.Store.YES : Field.Store.NO, + Field.Index.TOKENIZED, + withOffsets ? Field.TermVector.WITH_OFFSETS : Field.TermVector.NO); + this.reader = reader; + } + + /** + * @return the string value of this field. + */ + public String stringValue() { + if (extract == null) { + StringBuffer textExtract = new StringBuffer(); + char[] buffer = new char[1024]; + int len; + try { + while ((len = reader.read(buffer)) > -1) { + textExtract.append(buffer, 0, len); + } + } catch (IOException e) { + log.warn("Exception reading value for field: " + + e.getMessage()); + log.debug("Dump:", e); + } finally { + IOUtils.closeQuietly(reader); + } + extract = textExtract.toString(); + } + return extract; + } + + /** + * @return always null. + */ + public Reader readerValue() { + return null; + } + + /** + * @return always null. + */ + public byte[] binaryValue() { + return null; + } + + /** + * @return always null. + */ + public TokenStream tokenStreamValue() { + return null; + } + + /** + * @return true if the underlying reader is ready to provide + * extracted text. + */ + public boolean isExtractorFinished() { + if (reader instanceof TextExtractorReader) { + return ((TextExtractorReader) reader).isExtractorFinished(); + } + return true; + } + + /** + * Disposes this field and closes the underlying reader. + * + * @throws IOException if an error occurs while closing the reader. + */ + public void dispose() throws IOException { + reader.close(); + } +} Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/LazyTextExtractorField.java ------------------------------------------------------------------------------ svn:eol-style = native Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java?rev=694164&r1=694163&r2=694164&view=diff ============================================================================== --- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java (original) +++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/NodeIndexer.java Thu Sep 11 00:51:40 2008 @@ -16,7 +16,6 @@ */ package org.apache.jackrabbit.core.query.lucene; -import org.apache.commons.io.IOUtils; import org.apache.jackrabbit.core.PropertyId; import org.apache.jackrabbit.core.NodeId; import org.apache.jackrabbit.core.state.ItemStateException; @@ -37,6 +36,7 @@ import org.slf4j.LoggerFactory; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; import javax.jcr.NamespaceException; import javax.jcr.PropertyType; @@ -44,7 +44,6 @@ import java.io.InputStream; import java.io.Reader; -import java.io.IOException; import java.util.Calendar; import java.util.Iterator; import java.util.Set; @@ -249,7 +248,7 @@ // now add fields that are not used in excerpt (must go at the end) for (Iterator it = doNotUseInExcerpt.iterator(); it.hasNext(); ) { - doc.add((Field) it.next()); + doc.add((Fieldable) it.next()); } return doc; } @@ -761,26 +760,11 @@ * @param value the reader value. * @return a lucene field. */ - protected Field createFulltextField(Reader value) { + protected Fieldable createFulltextField(Reader value) { if (supportHighlighting) { - // need to create a string value - StringBuffer textExtract = new StringBuffer(); - char[] buffer = new char[1024]; - int len; - try { - while ((len = value.read(buffer)) > -1) { - textExtract.append(buffer, 0, len); - } - } catch (IOException e) { - log.warn("Exception reading value for fulltext field: " - + e.getMessage()); - log.debug("Dump:", e); - } finally { - IOUtils.closeQuietly(value); - } - return createFulltextField(textExtract.toString(), true, true); + return new LazyTextExtractorField(FieldNames.FULLTEXT, value, true, true); } else { - return new Field(FieldNames.FULLTEXT, value); + return new LazyTextExtractorField(FieldNames.FULLTEXT, value, false, false); } } Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java?rev=694164&r1=694163&r2=694164&view=diff ============================================================================== --- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java (original) +++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java Thu Sep 11 00:51:40 2008 @@ -55,6 +55,7 @@ import org.apache.lucene.search.Similarity; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; import org.apache.commons.collections.iterators.AbstractIteratorDecorator; import org.xml.sax.SAXException; import org.w3c.dom.Element; @@ -1111,7 +1112,7 @@ getNamespaceMappings(), index.getIndexFormatVersion()); // transfer fields to doc if there are any - Field[] fulltextFields = aDoc.getFields(FieldNames.FULLTEXT); + Fieldable[] fulltextFields = aDoc.getFieldables(FieldNames.FULLTEXT); if (fulltextFields != null) { for (int k = 0; k < fulltextFields.length; k++) { doc.add(fulltextFields[k]); Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/Util.java URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/Util.java?rev=694164&r1=694163&r2=694164&view=diff ============================================================================== --- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/Util.java (original) +++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/Util.java Thu Sep 11 00:51:40 2008 @@ -17,14 +17,14 @@ package org.apache.jackrabbit.core.query.lucene; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.index.Term; import org.slf4j.LoggerFactory; import org.slf4j.Logger; -import java.util.Enumeration; +import java.util.Iterator; import java.io.IOException; /** @@ -44,15 +44,17 @@ * @param old the document to dispose. */ public static void disposeDocument(Document old) { - Enumeration e = old.fields(); - while (e.hasMoreElements()) { - Field f = (Field) e.nextElement(); - if (f.readerValue() != null) { - try { + for (Iterator it = old.getFields().iterator(); it.hasNext(); ) { + Fieldable f = (Fieldable) it.next(); + try { + if (f.readerValue() != null) { f.readerValue().close(); - } catch (IOException ex) { - log.warn("Exception while disposing index document: " + ex); + } else if (f instanceof LazyTextExtractorField) { + LazyTextExtractorField field = (LazyTextExtractorField) f; + field.dispose(); } + } catch (IOException ex) { + log.warn("Exception while disposing index document: " + ex); } } } @@ -66,12 +68,11 @@ * otherwise. */ public static boolean isDocumentReady(Document doc) { - Enumeration fields = doc.fields(); - while (fields.hasMoreElements()) { - Field f = (Field) fields.nextElement(); - if (f.readerValue() instanceof TextExtractorReader) { - TextExtractorReader r = (TextExtractorReader) f.readerValue(); - if (!r.isExtractorFinished()) { + for (Iterator it = doc.getFields().iterator(); it.hasNext(); ) { + Fieldable f = (Fieldable) it.next(); + if (f instanceof LazyTextExtractorField) { + LazyTextExtractorField field = (LazyTextExtractorField) f; + if (!field.isExtractorFinished()) { return false; } }