A title

Return-Path: Delivered-To: apmail-xml-forrest-dev-archive@www.apache.org Received: (qmail 64087 invoked from network); 12 Sep 2003 19:07:33 -0000 Received: from daedalus.apache.org (HELO mail.apache.org) (208.185.179.12) by minotaur-2.apache.org with SMTP; 12 Sep 2003 19:07:33 -0000 Received: (qmail 96163 invoked by uid 500); 12 Sep 2003 19:07:23 -0000 Delivered-To: apmail-xml-forrest-dev-archive@xml.apache.org Received: (qmail 96134 invoked by uid 500); 12 Sep 2003 19:07:22 -0000 Mailing-List: contact forrest-cvs-help@xml.apache.org; run by ezmlm Precedence: bulk list-help: list-unsubscribe: list-post: Reply-To: forrest-dev@xml.apache.org Delivered-To: mailing list forrest-cvs@xml.apache.org Received: (qmail 96111 invoked from network); 12 Sep 2003 19:07:22 -0000 Received: from unknown (HELO minotaur.apache.org) (209.237.227.194) by daedalus.apache.org with SMTP; 12 Sep 2003 19:07:22 -0000 Received: (qmail 64077 invoked by uid 1699); 12 Sep 2003 19:07:31 -0000 Date: 12 Sep 2003 19:07:31 -0000 Message-ID: <20030912190731.64076.qmail@minotaur.apache.org> From: cheche@apache.org To: xml-forrest-cvs@apache.org Subject: cvs commit: xml-forrest/src/scratchpad/src/java/org/apache/forrest/search ForrestDocument.java ForrestDocumentSAXParser.java ForrestIndexer.java ForrestSearchRenderer.java ForrestSearchServlet.java ForrestSearcher.java X-Spam-Rating: daedalus.apache.org 1.6.2 0/1000/N X-Spam-Rating: minotaur-2.apache.org 1.6.2 0/1000/N cheche 2003/09/12 12:07:31 Modified: . status.xml src/resources/conf web.xml src/resources/forrest-shbat forrest.build.xml src/resources/fresh-site/src/documentation skinconf.xml src/resources/schema/relaxng skinconf.rnc src/resources/skins/common/xslt/html document2html.xsl src/resources/skins/forrest-site/xslt/html site2xhtml.xsl Added: src/scratchpad/src/java/org/apache/forrest/search ForrestDocument.java ForrestDocumentSAXParser.java ForrestIndexer.java ForrestSearchRenderer.java ForrestSearchServlet.java ForrestSearcher.java Log: Added first version of Lucene integrated within Forrest. PR: FOR-9 Submitted by: Ram�n Prades rprades@porcelanosa.com Revision Changes Path 1.210 +14 -2 xml-forrest/status.xml Index: status.xml =================================================================== RCS file: /home/cvs/xml-forrest/status.xml,v retrieving revision 1.209 retrieving revision 1.210 diff -u -r1.209 -r1.210 --- status.xml 12 Sep 2003 15:53:20 -0000 1.209 +++ status.xml 12 Sep 2003 19:07:30 -0000 1.210 @@ -24,6 +24,18 @@ + + + Integrate Lucene on Forrest. For more info + + + adding @label support for notes and warning. + + + Added XSP support. + + New and old resolver.jars in the classpath produces reflection error. @@ -90,7 +102,7 @@ attribute from entries in site.xml or book.xml + due-to="Ramón Prades" due-to-email="rprades@porcelanosa.com" > The generated Tables of Contents can be configured to show a certain number of section levels. Default is two levels. Level 0 means show no ToC at all. See the fresh-site skinconf.xml for example usage. 1.7 +21 -1 xml-forrest/src/resources/conf/web.xml Index: web.xml =================================================================== RCS file: /home/cvs/xml-forrest/src/resources/conf/web.xml,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- web.xml 22 Jun 2003 12:31:47 -0000 1.6 +++ web.xml 12 Sep 2003 19:07:30 -0000 1.7 @@ -305,8 +305,28 @@ 1 + + + SearchServlet + org.apache.forrest.search.ForrestSearchServlet + + project-skin + @skin@ + + + + - + + + SearchServlet + /search.cmd + + @@ -443,7 +444,7 @@ - + @@ -453,21 +454,21 @@ - - - @@ -475,7 +476,7 @@ - @@ -484,26 +485,26 @@ - - - + - + @@ -512,12 +513,12 @@ - + @@ -650,7 +651,7 @@ | please ensure they keep in synch. --> - @@ -660,13 +661,13 @@ - - @@ -690,7 +691,24 @@ - + + + + + + + + + + + + @@ -711,7 +729,8 @@ - + @@ -733,6 +752,11 @@ + + + + + --------------------------------- Webapp generated in ${project.webapp} @@ -1020,7 +1044,7 @@ This translates to: copy all docs that are not in the Forrest context directory, or that are, but are different. --> - + 1.16 +4 -1 xml-forrest/src/resources/fresh-site/src/documentation/skinconf.xml Index: skinconf.xml =================================================================== RCS file: /home/cvs/xml-forrest/src/resources/fresh-site/src/documentation/skinconf.xml,v retrieving revision 1.15 retrieving revision 1.16 diff -u -r1.15 -r1.16 --- skinconf.xml 7 Sep 2003 12:30:36 -0000 1.15 +++ skinconf.xml 12 Sep 2003 19:07:30 -0000 1.16 @@ -9,7 +9,7 @@ - @@ -18,6 +18,7 @@ + @@ -55,6 +56,8 @@ ]> + + false

false

1.7 +2 -1 xml-forrest/src/resources/schema/relaxng/skinconf.rnc Index: skinconf.rnc =================================================================== RCS file: /home/cvs/xml-forrest/src/resources/schema/relaxng/skinconf.rnc,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- skinconf.rnc 2 Sep 2003 13:13:21 -0000 1.6 +++ skinconf.rnc 12 Sep 2003 19:07:30 -0000 1.7 @@ -6,6 +6,7 @@ skinconfig = element skinconfig { ( + element disable-lucene {xsd:boolean}?, # Disable lucene search feature. 'true' or 'false' element disable-search {xsd:boolean}?, # Disable search feature. 'true' or 'false' element disable-print-link{xsd:boolean}?, # Disable print link feature. 'true' or 'false' element disable-pdf-link{xsd:boolean}?, # Disable print link feature. 'true' or 'false' @@ -23,7 +24,7 @@ element group-logo {text}?, # Eg images/group-logo.gif element host-url {xsd:anyURI}?, # Eg Sourceforge URL element host-logo {text}?, - element year {text}?, # Used in Copyright + element year {text}?, # Used in Copyright element vendor {text}?, # Used in Copyright trail?, # 'Breadcrumbs' trail in skins that support it element toc {(attribute level {text})}?, # toc generation 1.33 +13 -8 xml-forrest/src/resources/skins/common/xslt/html/document2html.xsl Index: document2html.xsl =================================================================== RCS file: /home/cvs/xml-forrest/src/resources/skins/common/xslt/html/document2html.xsl,v retrieving revision 1.32 retrieving revision 1.33 diff -u -r1.32 -r1.33 --- document2html.xsl 12 Sep 2003 10:25:55 -0000 1.32 +++ document2html.xsl 12 Sep 2003 19:07:31 -0000 1.33 @@ -39,7 +39,8 @@ 2 - + + @@ -129,22 +130,26 @@ - - + + + PDF

PDF - + + - - + + + xml

xml - + + 1.23 +62 -12 xml-forrest/src/resources/skins/forrest-site/xslt/html/site2xhtml.xsl Index: site2xhtml.xsl =================================================================== RCS file: /home/cvs/xml-forrest/src/resources/skins/forrest-site/xslt/html/site2xhtml.xsl,v retrieving revision 1.22 retrieving revision 1.23 diff -u -r1.22 -r1.23 --- site2xhtml.xsl 2 Sep 2003 13:13:21 -0000 1.22 +++ site2xhtml.xsl 12 Sep 2003 19:07:31 -0000 1.23 @@ -83,10 +83,59 @@ ================= end Project Logo ================== ================= start Search ================== + + + + + true + false + + + + + false + true + false + + + - + + + + + +

@@ -98,15 +147,16 @@

- -
- - the site - - - +
+ + the

site +

+ + + 1.1 xml-forrest/src/scratchpad/src/java/org/apache/forrest/search/ForrestDocument.java Index: ForrestDocument.java =================================================================== /* * The Apache Software License, Version 1.1 * * * Copyright (c) 2001, 2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache Forrest" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation and was * originally based on software copyright (c) 1999, International * Business Machines, Inc., http://www.apache.org. For more * information on the Apache Software Foundation, please see * . */ package org.apache.forrest.search; import java.io.File; import java.util.HashMap; import org.apache.lucene.document.*; /** * Utility class to make Lucene Documents from Forrest Documents * @author Ramon Prades [RPR] * @version $Id: ForrestDocument.java,v 1.1 2003/09/12 19:07:31 cheche Exp $ */ public class ForrestDocument { /** * Makes the Lucene document asking the parser to extract * the relevant information. */ public static Document document(File file) { // Instantiate a parser for this file Document doc = null; ForrestDocumentSAXParser parser = new ForrestDocumentSAXParser(); try { HashMap results = parser.parseDocument(file); doc = processInfo(file, results); } catch (Exception ex) { // Not a forrest doc } return doc; } /** * Process the results returned from the parser and creates the * Lucene document */ private static Document processInfo(File file, HashMap results) { Document doc = new Document(); // Get info String docTitle = (String) getFromResults("title", results); String docSummary = (String) getFromResults("abstract", results); String docAuthor = (String) getFromResults("author", results); String docContents = (String) getFromResults("body", results); // Index and store title and summary doc.add(Field.Text("title", docTitle)); doc.add(Field.Text("summary", docSummary)); doc.add(Field.Text("author", docAuthor)); // Index but don't store contents doc.add(Field.UnStored("contents", docTitle + " " + docSummary + " " + docContents)); return doc; } // document /* * Utility method to extract a key from a hashmap */ private static Object getFromResults(String key, HashMap results) { if (results.containsKey(key)) { return results.get(key); } else { return ""; } } // getFromResults } } // Class ForrestDocument 1.1 xml-forrest/src/scratchpad/src/java/org/apache/forrest/search/ForrestDocumentSAXParser.java Index: ForrestDocumentSAXParser.java =================================================================== /* * The Apache Software License, Version 1.1 * * * Copyright (c) 2001, 2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache Forrest" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation and was * originally based on software copyright (c) 1999, International * Business Machines, Inc., http://www.apache.org. For more * information on the Apache Software Foundation, please see * . */ package org.apache.forrest.search; import org.apache.xerces.parsers.SAXParser; import java.io.*; import org.xml.sax.*; import org.xml.sax.helpers.*; import org.xml.sax.ext.LexicalHandler; import java.util.Vector; import java.util.HashMap; /** *

Parses a Forrest Document and extracts the information to use when * generating Lucene indexes.

The parser scans the document searching for a number of tags. When a match * is found, it buffers all the text contained in the full subtree. When the parser * is buffering text, it ignores all tags and just keeps the text.

As an example consider the following document:


   *    * <document>
   * <header>
   * <title>The title</title>
   * <abstract>An example</abstract>
   * </header>
   * <body>
   * <section>
   * <title>The Section</title>
   * <p>Some text with <strong>embedded</strong> tags</p>
   * <section>
   * </body>
   * </document>
   * 
   *

If the parser is applied to body the result will be * "The Section Some text with embedded tags". This permits the parser to generate * fields with the full content of the body, so it can be indexed and searched later.

If the parser now checks for title and body the * results will be "The title" for title and the same as above for body. * This demosntrates the parser is ignoring the title inside the * body, since while the parser is buffering body is * ignoring all the tags. This feature is useful to capture information inside * the header.

This is all what is needed to pass the information to Lucene, and by using this * algorithm the class gets quite simple.

* *

(Hope my English it's not too bad ;-)

* * @author Ramon Prades [RPR] * @version $Id: ForrestDocumentSAXParser.java,v 1.1 2003/09/12 19:07:31 cheche Exp $ */ public class ForrestDocumentSAXParser extends DefaultHandler { // Parser configuration constants static final String DEFAULT_PARSER_NAME = "org.apache.xerces.parsers.SAXParser"; static final String VALIDATION_FEATURE_ID = "http://xml.org/sax/features/validation"; static final String EXTERNAL_DTD_FEATURE_ID = "http://apache.org/xml/features/nonvalidating/load-external-dtd"; // List with the tags to capture static final String[] FORREST_HEADER_INDEXERS = {"title","abstract","body"}; static String docAuthors = ""; // Control variables XMLReader parser = null; HashMap results = null; String currentElement = ""; StringBuffer textBuffer = new StringBuffer(); Vector tags = null; boolean buffering = false; boolean isForrest = false; /** * Constructor. Initiliazes the parser. */ public ForrestDocumentSAXParser() { super(); // Load the list of interesting tags in a vector for later processing tags = new Vector(); for (int i=0; i0) { separator = ";"; } docAuthors += separator + attributes.getValue("name"); } else if (tags.contains(localName)) { currentElement = localName; buffering = true; } } } // startElement /** * End of element detected. If the closing element is the one the parser is * bufferig, store the text, otherwise don't do anything */ public void endElement(String uri, String localName, String qName) { if (buffering) { if (localName.equals(currentElement)) { buffering = false; results.put(currentElement, textBuffer.toString()); textBuffer.setLength(0); // reset buffer } else { // add an extra space to avoid the following case: // //

// A title //

A paragraph

// // Unless an extra space is added the result would be: "A titleA paragraph" textBuffer.append(' '); } } } // endElement /** * Buffer the parsed character when "doCapture" tells so. */ public void characters(char[] cbuf, int start, int len) { if (buffering) { textBuffer.append(cbuf, start, len); } } // characters } // ForrestDocumentSAXParser 1.1 xml-forrest/src/scratchpad/src/java/org/apache/forrest/search/ForrestIndexer.java Index: ForrestIndexer.java =================================================================== /* * The Apache Software License, Version 1.1 * * * Copyright (c) 2001, 2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache Forrest" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation and was * originally based on software copyright (c) 1999, International * Business Machines, Inc., http://www.apache.org. For more * information on the Apache Software Foundation, please see * . */ package org.apache.forrest.search; import java.io.*; import java.net.*; import java.util.*; import java.lang.StringBuffer; import org.apache.lucene.analysis.standard.*; import org.apache.lucene.document.*; import org.apache.lucene.index.*; import org.apache.lucene.util.Arrays; /** *

Indexes all xml forrest documents below a given directory.

Parametres:

* -index index_directory Directory where * the index is to be created *
* root_directory forrest 'xdocs' directory *

Current Limitations/todo

This version indexes Forrest XML documents only. Would be nice if PDF and * HTML could be added.
FAQ and TODO aren't indexed. Add support to that.
Full index created every time. Create sort of incremental indexing.
Could be a good idea to create a list of "reserved" filenames (i.e. book.xml * or status.xml) and force the indexer to skip them.

] "; // Some vars private static IndexReader reader; // Existing index private static IndexWriter writer; // New index being built private static String rootPath = ""; /** * Main method. See parametres at class javadoc. */ public static void main(String[] argv) { try { String index = ""; boolean create = true; File root = null; if (argv.length == 0) { System.err.println("Usage: " + USAGE); return; } // Get parametres from args for (int i = 0; i < argv.length; i++) { if (argv[i].equals("-index")) { // parse -index option index = argv[++i]; } else if (i != argv.length - 1) { System.err.println("Usage: " + USAGE); return; } else { root = new File(argv[i]); } } // Debugging // index = "C:/dev/uimlsite/build/webapp/index"; // root = new File("C:/dev/uimlsite/src/documentation/content/xdocs"); // Print banner System.out.println(DIVIDER); System.out.println(BANNER); System.out.println(COPYRIGHT); System.out.println(DIVIDER); System.out.println(""); rootPath = root.getPath().trim(); System.out.println("Source Directory: " + rootPath); System.out.println("Index Directory: " + index); System.out.println(""); Date start = new Date(); writer = new IndexWriter(index, new StandardAnalyzer(), create); writer.maxFieldLength = 1000000; indexDocs(root); // add new docs System.out.print("Index created! - Total milliseconds "); System.out.println(new Date().getTime() - start.getTime()); System.out.println(""); System.out.println("Optimizing index..."); writer.optimize(); writer.close(); System.out.print("Index optimized! - Total milliseconds "); System.out.println(new Date().getTime() - start.getTime()); } catch (Exception e) { System.err.println(" Exception in " + e.getClass() + "\n with message: " + e.getMessage()); e.printStackTrace(); } } // main /* * Create the index */ private static void indexDocs(File file) { if (file.isDirectory()) { // if a directory String[] files = file.list(); // list its files Arrays.sort(files); // sort the files for (int i = 0; i < files.length; i++) { // recursively index them indexDocs(new File(file, files[i])); } } else if (file.getPath().endsWith(".xml")) { // index .txt files String filePath = getRelativePath(file.getPath(), rootPath); System.out.print("Indexing ... " + filePath); Document doc = ForrestDocument.document(file); if (doc == null) { System.out.println(" [Ignored]"); } else { try { // Add last modified and path doc.add(Field.Keyword("modified", new Long(file.lastModified()).toString())); doc.add(Field.Keyword("path", filePath)); writer.addDocument(doc); // add docs unconditionally } catch (IOException ex) { System.out.println(" [Error: " + ex.getMessage() + "]"); } System.out.println(" [Done]"); } } } // indexDocs /* * Utility method to calculate the relative path of a file */ private static String getRelativePath(String filePath, String rootPath) { return filePath.substring(rootPath.length()+1); } // getRelativePath } // Class ForrestLuceneIndexer 1.1 xml-forrest/src/scratchpad/src/java/org/apache/forrest/search/ForrestSearchRenderer.java Index: ForrestSearchRenderer.java =================================================================== /* * The Apache Software License, Version 1.1 * * * Copyright (c) 2001, 2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache Forrest" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation and was * originally based on software copyright (c) 1999, International * Business Machines, Inc., http://www.apache.org. For more * information on the Apache Software Foundation, please see * . */ package org.apache.forrest.search; import org.w3c.dom.*; import javax.xml.transform.*; import javax.xml.transform.dom.*; import java.io.*; import org.apache.xerces.dom.*; import javax.xml.transform.sax.*; import javax.xml.transform.stream.StreamSource; import org.xml.sax.XMLReader; import org.xml.sax.helpers.XMLReaderFactory; /** *

Title:

Description:

Company:

* @author not attributable * @version 1.0 */ public class ForrestSearchRenderer { Transformer transformer = null; Transformer transformer2 = null; private String skinconf = ""; private static final String doc2html = "document2html.xsl"; private static final String site2xhtml = "site2xhtml.xsl"; public ForrestSearchRenderer(String rootPath, String skin) { String fullPath = rootPath + "/skins/" + skin + "/xslt/html/"; // Instantiate a TransformerFactory. TransformerFactory tFactory = TransformerFactory.newInstance(); try { skinconf = rootPath + "/skinconf.xml"; transformer = tFactory.newTransformer (new javax.xml.transform.stream.StreamSource(fullPath + doc2html)); transformer.setParameter("config-file", skinconf); transformer.setParameter("notoc", "true"); transformer.setParameter("dynamic-page", "true"); transformer2 = tFactory.newTransformer (new javax.xml.transform.stream.StreamSource(fullPath + site2xhtml)); transformer2.setParameter("config-file", skinconf); } catch (TransformerConfigurationException ex) { System.err.println("Transformer Config exception"); } } // Constructor public String render(Document dom) { String page = null; try { Document doc = new DocumentImpl(); Element root = doc.createElement("site"); DOMResult domResult = new DOMResult(root); transformer.transform(new DOMSource(dom.getDocumentElement()), domResult); OutputStream result = new ByteArrayOutputStream(); javax.xml.transform.stream.StreamResult theResult = new javax.xml.transform.stream.StreamResult(result); transformer2.transform(new DOMSource(domResult.getNode()), theResult); page = result.toString(); } catch (TransformerException ex) { ex.printStackTrace(); } return page; } // render } 1.1 xml-forrest/src/scratchpad/src/java/org/apache/forrest/search/ForrestSearchServlet.java Index: ForrestSearchServlet.java =================================================================== /* * The Apache Software License, Version 1.1 * * * Copyright (c) 2001, 2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache Forrest" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation and was * originally based on software copyright (c) 1999, International * Business Machines, Inc., http://www.apache.org. For more * information on the Apache Software Foundation, please see * . */ package org.apache.forrest.search; import javax.servlet.*; import javax.servlet.http.*; import java.io.*; import java.util.*; import org.apache.xalan.transformer.*; import org.w3c.dom.*; import javax.xml.transform.*; import javax.xml.transform.dom.*; import javax.xml.transform.stream.*; import javax.xml.transform.*; import java.net.*; /** *

This sevlet processes all search request inside a Forrest site.

* @author Ramon Prades [RPR] * @version $Id: ForrestSearchServlet.java,v 1.1 2003/09/12 19:07:31 cheche Exp $ */ public class ForrestSearchServlet extends HttpServlet { private static final String CONTENT_TYPE = "text/html"; private ForrestSearcher searcher = null; private static ForrestSearchRenderer renderer = null; private String servletPath = ""; private String indexDir = ""; // Full path to lucene index directory private String skin = ""; // Skin configured private String searchPage = "/search.html"; private static StringBuffer cache = null; /** * Prepares the servlet * @throws ServletException */ public void init() throws ServletException { servletPath = this.getServletContext().getRealPath(""); // FIXME: indexDir is hardcoded indexDir = servletPath + "/lucene-index"; searcher = new ForrestSearcher(); String skin = this.getInitParameter("project-skin"); renderer = new ForrestSearchRenderer(servletPath, skin); } // init /** * Process the HTTP Get request */ public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setContentType("text/html"); PrintWriter out = response.getWriter(); // Query string should be in parametre "query". // A valid forrest document is returned. String query = request.getParameter("query"); // Render the resulting document. Ideally the document // should be passed to Cocoon, but for the time being // use the renderer Document doc = searcher.search(indexDir, query); String page = renderer.render(doc); out.print(page); } //Clean up resources public void destroy() { } } // ForrestSearchServlet 1.1 xml-forrest/src/scratchpad/src/java/org/apache/forrest/search/ForrestSearcher.java Index: ForrestSearcher.java =================================================================== /* * The Apache Software License, Version 1.1 * * * Copyright (c) 2001, 2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Apache Forrest" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation and was * originally based on software copyright (c) 1999, International * Business Machines, Inc., http://www.apache.org. For more * information on the Apache Software Foundation, please see * . */ package org.apache.forrest.search; import java.io.IOException; import java.io.StringReader; import java.util.*; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.*; import org.apache.lucene.search.*; import org.apache.lucene.index.Term; import org.apache.xerces.dom.DocumentImpl; import org.apache.xerces.dom.DocumentTypeImpl; import org.w3c.dom.*; /** *

Searches the index for a given query string.

* @author Ramon Prades [RPR] * @version $Id: ForrestSearcher.java,v 1.1 2003/09/12 19:07:31 cheche Exp $ */ public class ForrestSearcher { public ForrestSearcher() { } /** * Searches "queryString" in "indexDir" and returns a Forrest Document (v1.2) * with the list of matches. * @param indexDir Directory with the Lucene index * @param queryString String to search * @return Forrest document */ public Document search(String indexDir, String queryString) { // Create a Forrest document with the results DOMImplementation domImpl = new org.apache.xerces.dom.DOMImplementationImpl(); DocumentType docType = domImpl.createDocumentType("document", "-//APACHE//DTD Documentation V1.1//EN", "document-v12.dtd"); Document doc = domImpl.createDocument("", "document", docType); Element rootNode = doc.getDocumentElement(); Element headerNode = doc.createElement("header"); headerNode.appendChild(this.makeElement(doc, "title", "Search Results")); rootNode.appendChild(headerNode); Element bodyNode = doc.createElement("body"); rootNode.appendChild(bodyNode); // Element sectionNode = doc.createElement("section"); // bodyNode.appendChild(sectionNode); // sectionNode.appendChild(makeElement(doc, "title", "List of Matches")); IndexSearcher searcher = null; try { searcher = new IndexSearcher(indexDir); } catch (IOException ex) { System.err.println("Error: Index dir not found!"); ex.printStackTrace(); } Hits hits = null; int count = 0; if (queryString==null || queryString.length()==0) { Element pNode = doc.createElement("p"); String txt = "Please enter a valid query"; pNode.appendChild(doc.createTextNode(txt)); bodyNode.appendChild(pNode); } else { Query query = null; try { query = QueryParser.parse(queryString, "contents", new StandardAnalyzer()); } catch (ParseException ex3) { System.out.println("QueryParser error!"); ex3.printStackTrace(); } try { hits = searcher.search(query); } catch (IOException ex1) { System.err.println("Error in search"); ex1.printStackTrace(); } // Build the section with the list of matches count = hits.length(); Element pNode = doc.createElement("p"); String txt = ""; if (count == 0) { txt = "No documents found matching: "; pNode.appendChild(doc.createTextNode(txt)); Element emNode = doc.createElement("em"); pNode.appendChild(emNode); emNode.appendChild(doc.createTextNode(queryString)); bodyNode.appendChild(pNode); } else { if (count == 1) { txt = count + " document found matching: "; } else { txt = count + " documents found matching: "; } pNode.appendChild(doc.createTextNode(txt)); Element emNode = doc.createElement("em"); pNode.appendChild(emNode); emNode.appendChild(doc.createTextNode(queryString)); //pNode.appendChild(doc.createElement("em").appendChild(doc.createTextNode(queryString))); bodyNode.appendChild(pNode); Element listNode = doc.createElement("ul"); // sectionNode.appendChild(listNode); bodyNode.appendChild(listNode); for (int i = 0; i < count; i++) { try { String title = hits.doc(i).get("title"); String summary = hits.doc(i).get("summary"); String authors = hits.doc(i).get("author"); String path = hits.doc(i).get("path").replaceAll(".xml", ".html"); float score = hits.score(i); Date modified = new Date(new Long(hits.doc(i).get("modified")). longValue()); java.text.DateFormat formatter = new java.text.SimpleDateFormat(); String strModified = formatter.format(modified); Element listItem = doc.createElement("li"); listNode.appendChild(listItem); Element strongNode = doc.createElement("strong"); listItem.appendChild(strongNode); Element linkNode = doc.createElement("link"); linkNode.setAttribute("href", path); linkNode.appendChild(doc.createTextNode(title)); strongNode.appendChild(linkNode); String scoreText = " [" + score + "]"; listItem.appendChild(doc.createTextNode(scoreText)); listItem.appendChild(doc.createElement("br")); if (summary != null && summary.length() > 0) { listItem.appendChild(doc.createTextNode(summary)); listItem.appendChild(doc.createElement("br")); } Element lastLine = doc.createElement("em"); listItem.appendChild(lastLine); lastLine.appendChild(doc.createTextNode("url: " + path)); if (authors != null && authors.length() > 0) { lastLine.appendChild(doc.createTextNode(" - author: " + authors)); } lastLine.appendChild(doc.createTextNode(" - last modified: " + strModified)); listItem.appendChild(doc.createElement("br")); listItem.appendChild(doc.createElement("br")); } catch (DOMException ex2) { System.err.println("DOM Error building results document (" + ex2.getMessage() + ")"); } catch (IOException ex2) { System.err.println("IO Error building results document (" + ex2.getMessage() + ")"); } catch (NumberFormatException ex2) { System.err.println("NUMBERFORMAT Error building results document (" + ex2.getMessage() + ")"); } } // for } // if (count==0) ... } // if queryString not null return doc; } // search /* * Utility method to contruct a DOM element with no attributes and * ine text child */ private Element makeElement(Document doc, String name, String text) { Element e = doc.createElement(name); e.appendChild(doc.createTextNode(text)); return e; } } // ForrestSearcher