lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mharw...@apache.org
Subject svn commit: r500053 - in /lucene/java/trunk/contrib/xml-query-parser/src: java/org/apache/lucene/xmlparser/ test/org/apache/lucene/xmlparser/
Date Thu, 25 Jan 2007 23:31:05 GMT
Author: mharwood
Date: Thu Jan 25 15:31:02 2007
New Revision: 500053

URL: http://svn.apache.org/viewvc?view=rev&rev=500053
Log:
Added QueryTemplateManager.java to aid construction of XML queries from form input by using
XSL templates. A Junit test provides examples of use. This approach offers a convenient way
of externalizing and changing how user input is turned into Lucene queries. Database applications
often adopt similar practices by externalizing SQL in template files that can be easily changed/optimized
by a DBA.  

Added:
    lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/QueryTemplateManager.java
    lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java
    lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumBooleanQuery.xsl
    lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumFilteredQuery.xsl
    lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumLuceneClassicQuery.xsl

Added: lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/QueryTemplateManager.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/QueryTemplateManager.java?view=auto&rev=500053
==============================================================================
--- lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/QueryTemplateManager.java
(added)
+++ lucene/java/trunk/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/QueryTemplateManager.java
Thu Jan 25 15:31:02 2007
@@ -0,0 +1,102 @@
+package org.apache.lucene.xmlparser;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Enumeration;
+import java.util.Properties;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.Result;
+import javax.xml.transform.Source;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMResult;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.xml.sax.SAXException;
+
+/**
+ * Provides utilities for turning query form input (such as from a web page or Swing gui)
into 
+ * Lucene XML queries by using XSL templates.  This approach offers a convenient way of externalizing

+ * and changing how user input is turned into Lucene queries. 
+ * Database applications often adopt similar practices by externalizing SQL in template files
that can
+ * be easily changed/optimized by a DBA.  
+ * @author Mark Harwood
+ */
+public class QueryTemplateManager
+{
+	static DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance ();
+	static TransformerFactory tFactory = TransformerFactory.newInstance();
+	
+	public static String getQueryAsXmlString(Properties formProperties, String templateName)
throws SAXException, IOException, ParserConfigurationException, TransformerException 
+	{
+		return getQueryAsXmlString(formProperties, 
+				getDOMSource(QueryTemplateManager.class.getResourceAsStream(templateName)));
+	}
+	
+	public static String getQueryAsXmlString(Properties formProperties, Source xslDs) throws
SAXException, IOException, ParserConfigurationException, TransformerException
+	{
+  		ByteArrayOutputStream baos=new ByteArrayOutputStream();
+  		StreamResult result=new StreamResult(baos);
+  		transformCriteria(formProperties,xslDs,result);
+  		return baos.toString();
+	}
+	
+	public static Document getQueryAsDOM(Properties formProperties, String templateName) throws
SAXException, IOException, ParserConfigurationException, TransformerException
+	{
+		return getQueryAsDOM(formProperties, getDOMSource(QueryTemplateManager.class.getResourceAsStream(templateName)));
+	}
+	public static Document getQueryAsDOM(Properties formProperties, InputStream xslIs) throws
SAXException, IOException, ParserConfigurationException, TransformerException
+	{
+		return getQueryAsDOM(formProperties, getDOMSource(xslIs));
+	}
+	
+	
+	public static Document getQueryAsDOM(Properties formProperties, Source xslDs) throws SAXException,
IOException, ParserConfigurationException, TransformerException
+	{
+  		DOMResult result=new DOMResult();
+  		transformCriteria(formProperties,xslDs,result);
+  		return (Document)result.getNode();
+	}
+	
+	public static void transformCriteria(Properties formProperties, Source xslDs, Result result)
throws SAXException, IOException, ParserConfigurationException, TransformerException
+	{
+        dbf.setNamespaceAware(true);	    
+		
+		Transformer transformer = tFactory.newTransformer(xslDs);
+	    //Create an XML document representing the search index document.
+		DocumentBuilder db = dbf.newDocumentBuilder ();
+		org.w3c.dom.Document doc = db.newDocument ();
+		Element root = doc.createElement ("Document");
+		doc.appendChild (root);
+		
+		Enumeration keysEnum = formProperties.keys();
+		while(keysEnum.hasMoreElements())
+		{
+		    String propName=(String) keysEnum.nextElement();
+		    String value=formProperties.getProperty(propName);
+    		if((value!=null)&&(value.length()>0))
+    		{
+    		    DOMUtils.insertChild(root,propName,value);    			
+    		}
+		}		
+		//Use XSLT to to transform into an XML query string using the  queryTemplate
+		DOMSource xml=new DOMSource(doc);
+		transformer.transform(xml,result);		
+	}
+	
+	public static DOMSource getDOMSource(InputStream xslIs) throws ParserConfigurationException,
SAXException, IOException 
+	{
+        dbf.setNamespaceAware(true);	    
+		DocumentBuilder builder = dbf.newDocumentBuilder();
+		org.w3c.dom.Document xslDoc = builder.parse(xslIs);
+		return new DOMSource(xslDoc);		
+	}
+}

Added: lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java?view=auto&rev=500053
==============================================================================
--- lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java
(added)
+++ lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java
Thu Jan 25 15:31:02 2007
@@ -0,0 +1,163 @@
+package org.apache.lucene.xmlparser;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Properties;
+import java.util.StringTokenizer;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.Source;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.dom.DOMSource;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.RAMDirectory;
+import org.w3c.dom.Document;
+import org.xml.sax.SAXException;
+
+/**
+ * This class illustrates how form input (such as from a web page or Swing gui) can be
+ * turned into Lucene queries using a choice of XSL templates for different styles of queries.

+ * @author maharwood
+ */
+public class TestQueryTemplateManager extends TestCase {
+
+	CoreParser builder;
+	Analyzer analyzer=new StandardAnalyzer();
+	HashMap templates=new HashMap();
+	private IndexSearcher searcher;
+	
+	//A collection of documents' field values for use in our tests
+	String docFieldValues []=
+	{
+			"artist=Jeff Buckley \talbum=Grace \treleaseDate=1999 \tgenre=rock",
+			"artist=Fugazi \talbum=Repeater \treleaseDate=1990 \tgenre=alternative",
+			"artist=Fugazi \talbum=Red Medicine \treleaseDate=1995 \tgenre=alternative",
+			"artist=Peeping Tom \talbum=Peeping Tom \treleaseDate=2006 \tgenre=rock",
+			"artist=Red Snapper \talbum=Prince Blimey \treleaseDate=1996 \tgenre=electronic"
+	};
+	
+	//A collection of example queries, consisting of name/value pairs representing form content
plus 
+	// a choice of query style template to use in the test, with expected number of hits
+	String queryForms[]=
+	{
+			"artist=Fugazi \texpectedMatches=2 \ttemplate=albumBooleanQuery.xsl",
+			"artist=Fugazi \treleaseDate=1990 \texpectedMatches=1 \ttemplate=albumBooleanQuery.xsl",
+			"artist=Buckley \tgenre=rock \texpectedMatches=1 \ttemplate=albumFilteredQuery.xsl",
+			"artist=Buckley \tgenre=electronic \texpectedMatches=0 \ttemplate=albumFilteredQuery.xsl",
+			"queryString=artist:buckly~ NOT genre:electronic \texpectedMatches=1 \ttemplate=albumLuceneClassicQuery.xsl"
+	};
+	
+	
+	public void testFormTransforms() throws SAXException, IOException, ParserConfigurationException,
TransformerException, ParserException 
+	{
+		//Run all of our test queries
+		for (int i = 0; i < queryForms.length; i++)
+		{
+			Properties queryFormProperties=getPropsFromString(queryForms[i]);
+			
+			//Get the required query XSL template for this test
+			Source template=getTemplate(queryFormProperties.getProperty("template"));
+			
+			//Transform the queryFormProperties into a Lucene XML query
+			Document doc=QueryTemplateManager.getQueryAsDOM(queryFormProperties,template);
+			
+			//Parse the XML query using the XML parser
+			Query q=builder.getQuery(doc.getDocumentElement());
+			
+			//Run the query
+			Hits h=searcher.search(q);
+			
+			//Check we have the expected number of results
+			int expectedHits=Integer.parseInt(queryFormProperties.getProperty("expectedMatches"));
+			assertEquals("Number of results should match for query "+queryForms[i],expectedHits,h.length());
+			
+		}
+	}
+	
+		
+	private Source getTemplate(String templateName) throws ParserConfigurationException, SAXException,
IOException 
+	{
+		Source result=(Source) templates.get(templateName);
+		if(result==null)
+		{
+			//Not yet loaded - load the stylesheet
+			result=QueryTemplateManager.getDOMSource(getClass().getResourceAsStream(templateName));
+			templates.put(templateName,result);
+		}
+		return result;
+	}
+
+
+	//Helper method to construct Lucene query forms used in our test
+	Properties getPropsFromString(String nameValuePairs)
+	{
+		Properties result=new Properties();
+		StringTokenizer st=new StringTokenizer(nameValuePairs,"\t=");
+		while(st.hasMoreTokens())
+		{
+			String name=st.nextToken().trim();
+			if(st.hasMoreTokens())
+			{
+				String value=st.nextToken().trim();
+				result.setProperty(name,value);
+			}
+		}
+		return result;
+	}
+	
+	//Helper method to construct Lucene documents used in our tests
+	org.apache.lucene.document.Document getDocumentFromString(String nameValuePairs)
+	{
+		org.apache.lucene.document.Document result=new org.apache.lucene.document.Document();
+		StringTokenizer st=new StringTokenizer(nameValuePairs,"\t=");
+		while(st.hasMoreTokens())
+		{
+			String name=st.nextToken().trim();
+			if(st.hasMoreTokens())
+			{
+				String value=st.nextToken().trim();
+				result.add(new Field(name,value,Field.Store.YES,Field.Index.TOKENIZED));
+			}
+		}
+		return result;
+	}
+	
+	/*
+	 * @see TestCase#setUp()
+	 */
+	protected void setUp() throws Exception {
+		super.setUp();
+		
+		
+		//Create an index
+		RAMDirectory dir=new RAMDirectory();
+		IndexWriter w=new IndexWriter(dir,analyzer,true);
+		for (int i = 0; i < docFieldValues.length; i++)
+		{
+			w.addDocument(getDocumentFromString(docFieldValues[i]));
+		}
+		w.optimize();
+		w.close();
+		searcher=new IndexSearcher(dir);
+		
+		//initialize the parser
+		builder=new CorePlusExtensionsParser(analyzer,new QueryParser("artist", analyzer));
+		
+	}
+	
+	
+	protected void tearDown() throws Exception {
+		searcher.close();
+	}
+}

Added: lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumBooleanQuery.xsl
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumBooleanQuery.xsl?view=auto&rev=500053
==============================================================================
--- lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumBooleanQuery.xsl
(added)
+++ lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumBooleanQuery.xsl
Thu Jan 25 15:31:02 2007
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:template match="/Document">
+<!--This template ANDs all fields together. Within a single field all terms are ORed.
+	The query fields are fed directly through an analyzer and so do not need to adhere to  
+	traditional Lucene query syntax.
+ -->	
+<BooleanQuery>
+	<xsl:if test="count(artist)>0">
+	    <Clause occurs="must">
+	      <TermsQuery fieldName="artist"><xsl:value-of select="artist"/></TermsQuery>
+	   </Clause>
+   </xsl:if>
+	<xsl:if test="count(album)>0">
+	    <Clause occurs="must">
+	      <TermsQuery fieldName="album"><xsl:value-of select="album"/></TermsQuery>
+	   </Clause>
+   </xsl:if>
+	<xsl:if test="count(genre)>0">
+	    <Clause occurs="must">
+	      <TermsQuery fieldName="genre"><xsl:value-of select="genre"/></TermsQuery>
+	   </Clause>
+   </xsl:if>
+	<xsl:if test="count(releaseDate)>0">
+	    <Clause occurs="must">
+	      <TermsQuery fieldName="releaseDate"><xsl:value-of select="releaseDate"/></TermsQuery>
+	   </Clause>
+   </xsl:if>
+</BooleanQuery>
+
+</xsl:template>
+</xsl:stylesheet>
\ No newline at end of file

Added: lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumFilteredQuery.xsl
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumFilteredQuery.xsl?view=auto&rev=500053
==============================================================================
--- lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumFilteredQuery.xsl
(added)
+++ lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumFilteredQuery.xsl
Thu Jan 25 15:31:02 2007
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:template match="/Document">
+<!-- This template uses an efficient, cached filter for the "genre" field".
+	Other query fields are fed directly through an analyzer and so do not need to adhere to
 
+	traditional Lucene query syntax. Terms within a field are ORed while different fields are
ANDed
+ -->	
+<FilteredQuery>
+	<Query>
+		<BooleanQuery>
+			<xsl:if test="count(artist)>0">
+			    <Clause occurs="must">
+			      <TermsQuery fieldName="artist"><xsl:value-of select="artist"/></TermsQuery>
+			   </Clause>
+		   </xsl:if>
+			<xsl:if test="count(album)>0">
+			    <Clause occurs="must">
+			      <TermsQuery fieldName="album"><xsl:value-of select="album"/></TermsQuery>
+			   </Clause>
+		   </xsl:if>
+			<xsl:if test="count(releaseDate)>0">
+			    <Clause occurs="must">
+			      <TermsQuery fieldName="releaseDate"><xsl:value-of select="releaseDate"/></TermsQuery>
+			   </Clause>
+		   </xsl:if>
+	</BooleanQuery>
+	</Query>
+	<Filter>
+		<CachedFilter>
+			<!-- Example filter to be cached for fast, repeated use -->
+			<TermsFilter fieldName="genre">			
+				<xsl:value-of select="genre"/>
+			</TermsFilter>
+		</CachedFilter>		
+	</Filter>	
+</FilteredQuery>
+</xsl:template>
+</xsl:stylesheet>
\ No newline at end of file

Added: lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumLuceneClassicQuery.xsl
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumLuceneClassicQuery.xsl?view=auto&rev=500053
==============================================================================
--- lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumLuceneClassicQuery.xsl
(added)
+++ lucene/java/trunk/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/albumLuceneClassicQuery.xsl
Thu Jan 25 15:31:02 2007
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:template match="/Document">
+<!-- This template is designed to work with a google-like search form - one edit box and

+	uses the traditional Lucene query syntax
+ -->		
+<BooleanQuery>
+    <Clause occurs="must">
+	      <UserQuery><xsl:value-of select="queryString"/></UserQuery>
+   </Clause>
+</BooleanQuery>
+</xsl:template>
+</xsl:stylesheet>
\ No newline at end of file



Mime
View raw message