lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From va...@apache.org
Subject svn commit: r732916 [12/14] - in /lucene/pylucene/trunk: ./ java/ java/org/ java/org/osafoundation/ java/org/osafoundation/lucene/ java/org/osafoundation/lucene/analysis/ java/org/osafoundation/lucene/queryParser/ java/org/osafoundation/lucene/search/ ...
Date Fri, 09 Jan 2009 03:28:41 GMT
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/BasicSearchingTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/BasicSearchingTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/BasicSearchingTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/BasicSearchingTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,67 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import \
+     SimpleAnalyzer, Document, TermQuery, QueryParser, IndexSearcher, Term
+
+
+class BasicSearchingTest(LiaTestCase):
+
+    def testTerm(self):
+
+        searcher = IndexSearcher(self.directory)
+        t = Term("subject", "ant")
+        query = TermQuery(t)
+        hits = searcher.search(query)
+        self.assertEqual(1, hits.length(), "JDwA")
+
+        t = Term("subject", "junit")
+        hits = searcher.search(TermQuery(t))
+        self.assertEqual(2, hits.length())
+
+        searcher.close()
+
+    def testKeyword(self):
+
+        searcher = IndexSearcher(self.directory)
+        t = Term("isbn", "1930110995")
+        query = TermQuery(t)
+        hits = searcher.search(query)
+        self.assertEqual(1, hits.length(), "JUnit in Action")
+
+    def testQueryParser(self):
+
+        searcher = IndexSearcher(self.directory)
+
+        query = QueryParser("contents",
+                            SimpleAnalyzer()).parse("+JUNIT +ANT -MOCK")
+        hits = searcher.search(query)
+        self.assertEqual(1, hits.length())
+        d = hits.doc(0)
+        self.assertEqual("Java Development with Ant", d.get("title"))
+
+        query = QueryParser("contents", SimpleAnalyzer()).parse("mock OR junit")
+        hits = searcher.search(query)
+        self.assertEqual(2, hits.length(), "JDwA and JIA")

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/BasicSearchingTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/BasicSearchingTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/BooleanQueryTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/BooleanQueryTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/BooleanQueryTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/BooleanQueryTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,63 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import\
+    Term, BooleanQuery, IndexSearcher, RangeQuery, TermQuery, BooleanClause
+
+
+class BooleanQueryTest(LiaTestCase):
+
+    def testAnd(self):
+
+        searchingBooks = TermQuery(Term("subject", "search"))
+        currentBooks = RangeQuery(Term("pubmonth", "200401"),
+                                  Term("pubmonth", "200412"), True)
+
+        currentSearchingBooks = BooleanQuery()
+        currentSearchingBooks.add(searchingBooks, BooleanClause.Occur.MUST)
+        currentSearchingBooks.add(currentBooks, BooleanClause.Occur.MUST)
+
+        searcher = IndexSearcher(self.directory)
+        hits = searcher.search(currentSearchingBooks)
+
+        self.assertHitsIncludeTitle(hits, "Lucene in Action")
+
+    def testOr(self):
+
+        methodologyBooks = TermQuery(Term("category",
+                                          "/technology/computers/programming/methodology"))
+        easternPhilosophyBooks = TermQuery(Term("category",
+                                                "/philosophy/eastern"))
+
+        enlightenmentBooks = BooleanQuery()
+        enlightenmentBooks.add(methodologyBooks, BooleanClause.Occur.SHOULD)
+        enlightenmentBooks.add(easternPhilosophyBooks, BooleanClause.Occur.SHOULD)
+
+        searcher = IndexSearcher(self.directory)
+        hits = searcher.search(enlightenmentBooks)
+        print "or =", enlightenmentBooks
+
+        self.assertHitsIncludeTitle(hits, "Extreme Programming Explained")
+        self.assertHitsIncludeTitle(hits, u"Tao Te Ching \u9053\u5FB7\u7D93")

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/BooleanQueryTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/BooleanQueryTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/Explainer.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/Explainer.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/Explainer.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/Explainer.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,66 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+import os
+
+from lucene import \
+     SimpleAnalyzer, Document, QueryParser, Explanation, \
+     IndexSearcher, FSDirectory, Hit
+
+
+class Explainer(object):
+
+    def main(cls, argv):
+
+        if len(argv) != 3:
+            print "Usage: Explainer <index dir> <query>"
+
+        else:
+            indexDir = argv[1]
+            queryExpression = argv[2]
+
+            directory = FSDirectory.getDirectory(indexDir, False)
+
+            query = QueryParser("contents",
+                                SimpleAnalyzer()).parse(queryExpression)
+
+            print "Query:", queryExpression
+
+            searcher = IndexSearcher(directory)
+            hits = searcher.search(query)
+
+            for hit in hits:
+                hit = Hit.cast_(hit)
+                doc = hit.getDocument()
+                id = hit.getId()
+                explanation = searcher.explain(query, id)
+                print "----------"
+                print doc["title"].encode('utf-8')
+                print explanation
+
+    main = classmethod(main)
+
+
+if __name__ == "__main__":
+    import sys
+    Explainer.main(sys.argv)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/Explainer.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/Explainer.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/PhraseQueryTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/PhraseQueryTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/PhraseQueryTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/PhraseQueryTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,84 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase
+
+from lucene import \
+     WhitespaceAnalyzer, Document, Field, IndexWriter, Term, \
+     IndexSearcher, PhraseQuery, RAMDirectory
+
+
+class PhraseQueryTest(TestCase):
+
+    def setUp(self):
+
+        # set up sample document
+        directory = RAMDirectory()
+        writer = IndexWriter(directory, WhitespaceAnalyzer(), True)
+        doc = Document()
+        doc.add(Field("field", "the quick brown fox jumped over the lazy dog",
+                       Field.Store.YES, Field.Index.TOKENIZED))
+        writer.addDocument(doc)
+        writer.close()
+
+        self.searcher = IndexSearcher(directory)
+
+    def matched(self, phrase, slop):
+
+        query = PhraseQuery()
+        query.setSlop(slop)
+
+        for word in phrase:
+            query.add(Term("field", word))
+
+        hits = self.searcher.search(query)
+
+        return len(hits) > 0
+
+    def testSlopComparison(self):
+
+        phrase = ["quick", "fox"]
+
+        self.assert_(not self.matched(phrase, 0), "exact phrase not found")
+        self.assert_(self.matched(phrase, 1), "close enough")
+
+    def testReverse(self):
+
+        phrase = ["fox", "quick"]
+
+        self.assert_(not self.matched(phrase, 2), "hop flop")
+        self.assert_(self.matched(phrase, 3), "hop hop slop")
+
+    def testMultiple(self):
+
+        self.assert_(not self.matched(["quick", "jumped", "lazy"], 3),
+                     "not close enough")
+
+        self.assert_(self.matched(["quick", "jumped", "lazy"], 4),
+                     "just enough")
+
+        self.assert_(not self.matched(["lazy", "jumped", "quick"], 7),
+                     "almost but not quite")
+
+        self.assert_(self.matched(["lazy", "jumped", "quick"], 8),
+                     "bingo")

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/PhraseQueryTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/PhraseQueryTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/PrefixQueryTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/PrefixQueryTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/PrefixQueryTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/PrefixQueryTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,46 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import Term, IndexSearcher, PrefixQuery, TermQuery
+
+
+class PrefixQueryTest(LiaTestCase):
+
+    def testPrefix(self):
+
+        searcher = IndexSearcher(self.directory)
+
+        # search for programming books, including subcategories
+        term = Term("category", "/technology/computers/programming")
+        query = PrefixQuery(term)
+
+        hits = searcher.search(query)
+        programmingAndBelow = hits.length()
+
+        # only programming books, not subcategories
+        hits = searcher.search(TermQuery(term))
+        justProgramming = hits.length()
+
+        self.assert_(programmingAndBelow > justProgramming)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/PrefixQueryTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/PrefixQueryTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/QueryParserTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/QueryParserTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/QueryParserTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/QueryParserTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,157 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import \
+     WhitespaceAnalyzer, StandardAnalyzer, Term, QueryParser, Locale, \
+     BooleanQuery, FuzzyQuery, IndexSearcher, RangeQuery, TermQuery, \
+     BooleanClause
+
+
+class QueryParserTest(LiaTestCase):
+
+    def setUp(self):
+
+        super(QueryParserTest, self).setUp()
+        self.analyzer = WhitespaceAnalyzer()
+        self.searcher = IndexSearcher(self.directory)
+
+    def testToString(self):
+
+        query = BooleanQuery()
+        query.add(FuzzyQuery(Term("field", "kountry")),
+                  BooleanClause.Occur.MUST)
+        query.add(TermQuery(Term("title", "western")),
+                  BooleanClause.Occur.SHOULD)
+
+        self.assertEqual("+kountry~0.5 title:western",
+                         query.toString("field"), "both kinds")
+
+    def testPrefixQuery(self):
+
+        parser = QueryParser("category", StandardAnalyzer())
+        parser.setLowercaseExpandedTerms(False)
+
+        print parser.parse("/Computers/technology*").toString("category")
+
+    def testGrouping(self):
+
+        query = QueryParser("subject", self.analyzer).parse("(agile OR extreme) AND methodology")
+        hits = self.searcher.search(query)
+
+        self.assertHitsIncludeTitle(hits, "Extreme Programming Explained")
+        self.assertHitsIncludeTitle(hits, "The Pragmatic Programmer")
+
+    def testRangeQuery(self):
+
+        parser = QueryParser("subject", self.analyzer) 
+        parser.setUseOldRangeQuery(True)
+
+        query = parser.parse("pubmonth:[200401 TO 200412]")
+
+        self.assert_(RangeQuery.instance_(query))
+
+        hits = self.searcher.search(query)
+        self.assertHitsIncludeTitle(hits, "Lucene in Action")
+
+        query = QueryParser("pubmonth",
+                            self.analyzer).parse("{200201 TO 200208}")
+
+        hits = self.searcher.search(query)
+        self.assertEqual(0, hits.length(), "JDwA in 200208")
+  
+    def testDateRangeQuery(self):
+
+        # locale diff between jre and gcj 1/1/04 -> 01/01/04
+        # expression = "modified:[1/1/04 TO 12/31/04]"
+        
+        expression = "modified:[01/01/04 TO 12/31/04]"
+        parser = QueryParser("subject", self.analyzer)
+        parser.setLocale(Locale.US)
+        query = parser.parse(expression)
+        print expression, "parsed to", query
+
+        hits = self.searcher.search(query)
+        self.assert_(hits.length() > 0)
+
+    def testSlop(self):
+
+        q = QueryParser("field", self.analyzer).parse('"exact phrase"')
+        self.assertEqual("\"exact phrase\"", q.toString("field"),
+                         "zero slop")
+
+        qp = QueryParser("field", self.analyzer)
+        qp.setPhraseSlop(5)
+        q = qp.parse('"sloppy phrase"')
+        self.assertEqual("\"sloppy phrase\"~5", q.toString("field"),
+                         "sloppy, implicitly")
+
+    def testPhraseQuery(self):
+
+        q = QueryParser("field",
+                        StandardAnalyzer()).parse('"This is Some Phrase*"')
+        self.assertEqual("\"some phrase\"", q.toString("field"), "analyzed")
+
+        q = QueryParser("field", self.analyzer).parse('"term"')
+        self.assert_(TermQuery.instance_(q), "reduced to TermQuery")
+
+    def testLowercasing(self):
+
+        q = QueryParser("field", self.analyzer).parse("PrefixQuery*")
+        self.assertEqual("prefixquery*", q.toString("field"), "lowercased")
+
+        qp = QueryParser("field", self.analyzer)
+        qp.setLowercaseExpandedTerms(False)
+        q = qp.parse("PrefixQuery*")
+        self.assertEqual("PrefixQuery*", q.toString("field"), "not lowercased")
+
+    def testWildcard(self):
+
+        try:
+            QueryParser("field", self.analyzer).parse("*xyz")
+            self.fail("Leading wildcard character should not be allowed")
+        except:
+            self.assert_(True)
+
+    def testBoost(self):
+
+         q = QueryParser("field", self.analyzer).parse("term^2")
+         self.assertEqual("term^2.0", q.toString("field"))
+
+    def testParseException(self):
+
+        try:
+            QueryParser("contents", self.analyzer).parse("^&#")
+        except:
+            # expression is invalid, as expected
+            self.assert_(True)
+        else:
+            self.fail("ParseException expected, but not thrown")
+
+#  public void testStopWord() throws ParseException {
+#    Query q = QueryParser.parse("the AND drag", "field",
+#        StopAnalyzer())
+#    //  QueryParser fails on the previous line - this is a known 
+#    //  issue
+#  }

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/QueryParserTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/QueryParserTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/RangeQueryTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/RangeQueryTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/RangeQueryTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/RangeQueryTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,54 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import Term, IndexSearcher, RangeQuery
+
+
+class RangeQueryTest(LiaTestCase):
+
+    def setUp(self):
+
+        super(RangeQueryTest, self).setUp()
+
+        self.begin = Term("pubmonth", "198805")
+
+        # pub date of TTC was October 1988
+        self.end = Term("pubmonth", "198810")
+
+    def testInclusive(self):
+
+        query = RangeQuery(self.begin, self.end, True)
+        searcher = IndexSearcher(self.directory)
+
+        hits = searcher.search(query)
+        self.assertEqual(1, hits.length(), "tao")
+
+    def testExclusive(self):
+
+        query = RangeQuery(self.begin, self.end, False)
+        searcher = IndexSearcher(self.directory)
+
+        hits = searcher.search(query)
+        self.assertEqual(0, hits.length(), "there is no tao")

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/RangeQueryTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/RangeQueryTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/ScoreTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/ScoreTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/ScoreTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/ScoreTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,123 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import \
+     WhitespaceAnalyzer, Document, Field, IndexWriter, Term, Explanation, \
+     FuzzyQuery, IndexSearcher, Similarity, TermQuery, WildcardQuery, \
+     RAMDirectory, PythonSimilarity
+
+
+class ScoreTest(LiaTestCase):
+
+    def setUp(self):
+
+        super(ScoreTest, self).setUp()
+        self.directory = RAMDirectory()
+
+    def testSimple(self):
+
+        class SimpleSimilarity(PythonSimilarity):
+
+            def lengthNorm(self, field, numTerms):
+                return 1.0
+
+            def queryNorm(self, sumOfSquaredWeights):
+                return 1.0
+
+            def tf(self, freq):
+                return freq
+
+            def sloppyFreq(self, distance):
+                return 2.0
+
+            def idfTerms(self, terms, searcher):
+                return 1.0
+
+            def idfTerm(self, docFreq, numDocs):
+                return 1.0
+
+            def coord(self, overlap, maxOverlap):
+                return 1.0
+
+        self.indexSingleFieldDocs([Field("contents", "x", Field.Store.YES,
+                                         Field.Index.TOKENIZED)])
+        searcher = IndexSearcher(self.directory)
+        searcher.setSimilarity(SimpleSimilarity())
+
+        query = TermQuery(Term("contents", "x"))
+        explanation = searcher.explain(query, 0)
+        print explanation
+
+        hits = searcher.search(query)
+        self.assertEqual(1, hits.length())
+
+        self.assertEqual(hits.score(0), 1.0)
+        searcher.close()
+
+    def indexSingleFieldDocs(self, fields):
+
+        writer = IndexWriter(self.directory, WhitespaceAnalyzer(), True)
+
+        for field in fields:
+            doc = Document()
+            doc.add(field)
+            writer.addDocument(doc)
+
+        writer.optimize()
+        writer.close()
+
+    def testWildcard(self):
+
+        self.indexSingleFieldDocs([Field("contents", "wild", Field.Store.YES,
+                                         Field.Index.TOKENIZED),
+                                   Field("contents", "child", Field.Store.YES,
+                                         Field.Index.TOKENIZED),
+                                   Field("contents", "mild", Field.Store.YES,
+                                         Field.Index.TOKENIZED),
+                                   Field("contents", "mildew", Field.Store.YES,
+                                         Field.Index.TOKENIZED)])
+
+        searcher = IndexSearcher(self.directory)
+        query = WildcardQuery(Term("contents", "?ild*"))
+        hits = searcher.search(query)
+        self.assertEqual(3, hits.length(), "child no match")
+
+        self.assertEqual(hits.score(0), hits.score(1), "score the same")
+        self.assertEqual(hits.score(1), hits.score(2), "score the same")
+
+    def testFuzzy(self):
+
+        self.indexSingleFieldDocs([Field("contents", "fuzzy", Field.Store.YES,
+                                         Field.Index.TOKENIZED),
+                                   Field("contents", "wuzzy", Field.Store.YES,
+                                         Field.Index.TOKENIZED)])
+
+        searcher = IndexSearcher(self.directory)
+        query = FuzzyQuery(Term("contents", "wuzza"))
+        hits = searcher.search(query)
+        self.assertEqual(2, hits.length(), "both close enough")
+
+        self.assert_(hits.score(0) !=  hits.score(1), "wuzzy closer than fuzzy")
+        self.assertEqual("wuzzy", hits.doc(0).get("contents"), "wuzza bear")

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/ScoreTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/ScoreTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/__init__.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1 @@
+# searching package

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/__init__.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/searching/__init__.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/BerkeleyDbIndexer.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/BerkeleyDbIndexer.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/BerkeleyDbIndexer.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/BerkeleyDbIndexer.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,112 @@
+# ====================================================================
+# Copyright (c) 2004-2005 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+import os
+
+from bsddb.db import DBEnv, DB
+from bsddb.db import \
+     DB_INIT_MPOOL, DB_INIT_LOCK, DB_INIT_TXN, DB_THREAD, DB_CREATE, DB_BTREE
+
+# missing from python interface at the moment
+DB_LOG_INMEMORY = 0x00020000
+
+from lucene import \
+     DbDirectory, IndexWriter, StandardAnalyzer, Document, Field
+
+
+class BerkeleyDbIndexer(object):
+
+    def main(cls, argv):
+
+        if len(argv) < 2:
+            print "Usage: BerkeleyDbIndexer <index dir> -create"
+            return
+
+        dbHome = argv[1]
+        create = len(argv) > 2 and argv[2] == "-create"
+
+        if not os.path.exists(dbHome):
+            os.makedirs(dbHome)
+        elif create:
+            for name in os.listdir(dbHome):
+                if name.startswith('__'):
+                    os.remove(os.path.join(dbHome, name))
+
+        env = DBEnv()
+        env.set_flags(DB_LOG_INMEMORY, 1);
+        if os.name == 'nt':
+            env.set_cachesize(0, 0x4000000, 1)
+        elif os.name == 'posix':
+            from commands import getstatusoutput
+            if getstatusoutput('uname') == (0, 'Linux'):
+                env.set_cachesize(0, 0x4000000, 1)
+
+        env.open(dbHome, (DB_CREATE | DB_THREAD |
+                          DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_TXN), 0)
+
+        index = DB(env)
+        blocks = DB(env)
+        txn = None
+        
+        try:
+            txn = env.txn_begin(None)
+            index.open(filename = '__index__', dbtype = DB_BTREE,
+                       flags = DB_CREATE | DB_THREAD, txn = txn)
+            blocks.open(filename = '__blocks__', dbtype = DB_BTREE,
+                        flags = DB_CREATE | DB_THREAD, txn = txn)
+        except:
+            if txn is not None:
+                txn.abort()
+                txn = None
+            raise
+        else:
+            txn.commit()
+            txn = None
+
+        try:
+            txn = env.txn_begin(None)
+            directory = DbDirectory(txn, index, blocks, 0)
+            writer = IndexWriter(directory, StandardAnalyzer(), create)
+            writer.setUseCompoundFile(False)
+
+            doc = Document()
+            doc.add(Field("contents", "The quick brown fox...",
+                          Field.Store.YES, Field.Index.TOKENIZED))
+            writer.addDocument(doc)
+
+            writer.optimize()
+            writer.close()
+        except:
+            if txn is not None:
+                txn.abort()
+                txn = None
+            raise
+        else:
+            txn.commit()
+            index.close()
+            blocks.close()
+            env.close()
+
+        print "Indexing Complete"
+
+    main = classmethod(main)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/BerkeleyDbIndexer.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/BerkeleyDbIndexer.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/BerkeleyDbSearcher.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/BerkeleyDbSearcher.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/BerkeleyDbSearcher.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/BerkeleyDbSearcher.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,97 @@
+# ====================================================================
+# Copyright (c) 2004-2005 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+import os
+
+from bsddb.db import DBEnv, DB
+from bsddb.db import \
+     DB_INIT_MPOOL, DB_INIT_LOCK, DB_INIT_TXN, DB_THREAD, DB_BTREE
+
+# missing from python interface at the moment
+DB_LOG_INMEMORY = 0x00020000
+
+from lucene import DbDirectory, IndexSearcher, Term, TermQuery
+
+
+class BerkeleyDbSearcher(object):
+
+    def main(cls, argv):
+
+        if len(argv) != 2:
+            print "Usage: BerkeleyDbSearcher <index dir>"
+            return
+
+        dbHome = argv[1]
+
+        env = DBEnv()
+        env.set_flags(DB_LOG_INMEMORY, 1);
+        if os.name == 'nt':
+            env.set_cachesize(0, 0x4000000, 1)
+        elif os.name == 'posix':
+            from commands import getstatusoutput
+            if getstatusoutput('uname') == (0, 'Linux'):
+                env.set_cachesize(0, 0x4000000, 1)
+
+        env.open(dbHome, (DB_THREAD |
+                          DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_TXN), 0)
+
+        index = DB(env)
+        blocks = DB(env)
+        txn = None
+
+        try:
+            txn = env.txn_begin(None)
+            index.open(filename = '__index__', dbtype = DB_BTREE,
+                       flags = DB_THREAD, txn = txn)
+            blocks.open(filename = '__blocks__', dbtype = DB_BTREE,
+                        flags = DB_THREAD, txn = txn)
+        except:
+            if txn is not None:
+                txn.abort()
+                txn = None
+            raise
+        else:
+            txn.commit()
+            txn = None
+
+        try:
+            txn = env.txn_begin(None)
+            directory = DbDirectory(txn, index, blocks, 0)
+            searcher = IndexSearcher(directory)
+
+            hits = searcher.search(TermQuery(Term("contents", "fox")))
+            print len(hits), "document(s) found"
+            searcher.close()
+        except:
+            if txn is not None:
+                txn.abort()
+                txn = None
+            raise
+        else:
+            txn.abort()
+
+            index.close()
+            blocks.close()
+            env.close()
+
+    main = classmethod(main)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/BerkeleyDbSearcher.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/BerkeleyDbSearcher.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/HighlightIt.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/HighlightIt.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/HighlightIt.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/HighlightIt.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,79 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from sys import stdout
+
+from lucene import \
+     StandardAnalyzer, Term, TermQuery, StringReader, \
+     Fragmenter, Highlighter, QueryScorer, SimpleFragmenter, SimpleHTMLFormatter
+
+
+class HighlightIt(object):
+
+    # from http://www.lipsum.com
+    text = \
+      """
+      Contrary to popular belief, Lorem Ipsum is
+      not simply random text. It has roots in a piece of
+      classical Latin literature from 45 BC, making it over
+      2000 years old. Richard McClintock, a Latin professor
+      at Hampden-Sydney College in Virginia, looked up one
+      of the more obscure Latin words, consectetur, from
+      a Lorem Ipsum passage, and going through the cites
+      of the word in classical literature, discovered the
+      undoubtable source. Lorem Ipsum comes from sections
+      1.10.32 and 1.10.33 of "de Finibus Bonorum et
+      Malorum" (The Extremes of Good and Evil) by Cicero,
+      written in 45 BC. This book is a treatise on the
+      theory of ethics, very popular during the
+      Renaissance. The first line of Lorem Ipsum, "Lorem
+      ipsum dolor sit amet..", comes from a line in
+      section 1.10.32.
+      """
+
+    def main(cls, argv):
+
+        query = TermQuery(Term("f", "ipsum"))
+        scorer = QueryScorer(query)
+        formatter = SimpleHTMLFormatter("<span class=\"highlight\">", "</span>")
+        highlighter = Highlighter(formatter, scorer)
+        fragmenter = SimpleFragmenter(50)
+        highlighter.setTextFragmenter(fragmenter)
+
+        analyzer = StandardAnalyzer()
+        tokenStream = analyzer.tokenStream("f", StringReader(cls.text))
+        result = highlighter.getBestFragments(tokenStream, cls.text, 5, "...")
+
+        stdout.write("<html>")
+        stdout.write("<style>\n")
+        stdout.write(".highlight {\n")
+        stdout.write(" background: yellow\n")
+        stdout.write("}\n")
+        stdout.write("</style>")
+
+        stdout.write("<body>")
+        stdout.write(result)
+        stdout.write("</body></html>\n")
+        stdout.flush()
+        
+    main = classmethod(main)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/HighlightIt.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/HighlightIt.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/HighlightTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/HighlightTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/HighlightTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/HighlightTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,61 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import \
+     SimpleAnalyzer, Term, IndexSearcher, TermQuery, \
+     Highlighter, QueryScorer, StringReader, Hit
+
+
+class HighlightTest(LiaTestCase):
+
+    def testHighlighting(self):
+
+        text = "The quick brown fox jumps over the lazy dog"
+
+        query = TermQuery(Term("field", "fox"))
+        scorer = QueryScorer(query)
+        highlighter = Highlighter(scorer)
+
+        tokenStream = SimpleAnalyzer().tokenStream("field", StringReader(text))
+
+        self.assertEqual("The quick brown <B>fox</B> jumps over the lazy dog",
+                         highlighter.getBestFragment(tokenStream, text))
+
+    def testHits(self):
+
+        searcher = IndexSearcher(self.directory)
+        query = TermQuery(Term("title", "action"))
+        hits = searcher.search(query)
+
+        scorer = QueryScorer(query)
+        highlighter = Highlighter(scorer)
+
+        for hit in hits:
+            doc = Hit.cast_(hit).getDocument()
+            title = doc["title"]
+            stream = SimpleAnalyzer().tokenStream("title", StringReader(title))
+            fragment = highlighter.getBestFragment(stream, title)
+    
+            print fragment

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/HighlightTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/HighlightTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/SnowballTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/SnowballTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/SnowballTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/SnowballTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,51 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase
+from lucene import SnowballAnalyzer, Token, StringReader
+
+
+class SnowballTest(TestCase):
+
+    def testEnglish(self):
+
+        analyzer = SnowballAnalyzer("English")
+        self.assertAnalyzesTo(analyzer, "stemming algorithms",
+                              ["stem", "algorithm"])
+
+    def testSpanish(self):
+
+        analyzer = SnowballAnalyzer("Spanish")
+        self.assertAnalyzesTo(analyzer, "algoritmos", ["algoritm"])
+
+    def assertAnalyzesTo(self, analyzer, input, output):
+
+        stream = analyzer.tokenStream("field", StringReader(input))
+
+        for text in output:
+            token = stream.next()
+            self.assert_(token)
+            self.assertEqual(text, token.termText())
+
+        self.assert_(not list(stream))
+        stream.close()

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/SnowballTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/SnowballTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/T9er.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/T9er.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/T9er.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/T9er.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,86 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lucene import \
+     WhitespaceAnalyzer, Document, Field, IndexReader, IndexWriter
+
+
+class T9er(object):
+
+    keys = [         "2abc", "3def",
+            "4ghi",  "5jkl", "6mno",
+            "7pqrs", "8tuv", "9wxyz"]
+
+    keyMap = {}
+
+    def main(cls, argv):
+        
+        if len(argv) != 3:
+            print "Usage: T9er <WordNet index dir> <t9 index>"
+            return
+        
+        for key in cls.keys:
+            c = key[0]
+            k = key[1:]
+            for kc in k:
+                cls.keyMap[kc] = c
+                print kc, "=", c
+
+        indexDir = argv[1]
+        t9dir = argv[2]
+
+        reader = IndexReader.open(indexDir)
+
+        numDocs = reader.maxDoc()
+        print "Processing", numDocs, "words"
+
+        writer = IndexWriter(t9dir, WhitespaceAnalyzer(), True)
+
+        for id in xrange(reader.maxDoc()):
+            origDoc = reader.document(id)
+            word = origDoc.get("word")
+            if word is None or len(word) == 0:
+                continue
+
+            newDoc = Document()
+            newDoc.add(Field("word", word,
+                             Field.Store.YES, Field.Index.UN_TOKENIZED))
+            newDoc.add(Field("t9", cls.t9(word),
+                             Field.Store.YES, Field.Index.UN_TOKENIZED))
+            newDoc.add(Field("length", str(len(word)),
+                             Field.Store.NO, Field.Index.UN_TOKENIZED))
+            writer.addDocument(newDoc)
+            if id % 100 == 0:
+                print "Document", id
+
+        writer.optimize()
+        writer.close()
+
+        reader.close()
+
+    def t9(cls, word):
+
+        return ''.join([cls.keyMap[c] for c in word])
+
+    main = classmethod(main)
+    t9 = classmethod(t9)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/T9er.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/T9er.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/__init__.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1 @@
+# tools package

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/__init__.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/tools/__init__.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/util/ClassLoader.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/util/ClassLoader.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/util/ClassLoader.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/util/ClassLoader.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,50 @@
+# ====================================================================
+# Copyright (c) 2004-2005 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+import sys
+
+class ClassLoader(object):
+
+    def loadClass(cls, name, module=None):
+
+        if module is None:
+            lastDot = name.rindex('.')
+            module = name[:lastDot]
+            name = name[lastDot+1:]
+
+        try:
+            m = __import__(module, globals(), locals(), name)
+        except ImportError:
+            raise
+        except SyntaxError:
+            raise
+        except:
+            x, value, traceback = sys.exc_info()
+            raise ImportError, value, traceback
+
+        try:
+            return getattr(m, name)
+        except AttributeError:
+            raise ImportError, "Module %s has no class %s" %(module, name)
+
+    loadClass = classmethod(loadClass)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/util/ClassLoader.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/util/ClassLoader.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/util/Streams.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/util/Streams.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/util/Streams.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/util/Streams.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,108 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from StringIO import StringIO
+from HTMLParser import HTMLParser
+
+
+class InputStreamReader(object):
+
+    def __init__(self, inputStream, encoding):
+
+        super(InputStreamReader, self).__init__()
+        self.inputStream = inputStream
+        self.encoding = encoding or 'utf-8'
+
+    def _read(self, length):
+
+        return self.inputStream.read(length)
+
+    def read(self, length=-1):
+
+        text = self._read(length)
+        text = unicode(text, self.encoding)
+
+        return text
+
+    def close(self):
+
+        self.inputStream.close()
+
+
+class HTMLReader(object):
+
+    def __init__(self, reader):
+
+        self.reader = reader
+
+        class htmlParser(HTMLParser):
+
+            def __init__(self):
+
+                HTMLParser.__init__(self)
+
+                self.buffer = StringIO()
+                self.position = 0
+
+            def handle_data(self, data):
+
+                self.buffer.write(data)
+
+            def _read(self, length):
+
+                buffer = self.buffer
+                size = buffer.tell() - self.position
+
+                if length > 0 and size > length:
+                    buffer.seek(self.position)
+                    data = buffer.read(length)
+                    self.position += len(data)
+                    buffer.seek(0, 2)
+
+                elif size > 0:
+                    buffer.seek(self.position)
+                    data = buffer.read(size)
+                    self.position = 0
+                    buffer.seek(0)
+
+                else:
+                    data = ''
+
+                return data
+                
+        self.parser = htmlParser()
+
+    def read(self, length=-1):
+
+        while True:
+            data = self.reader.read(length)
+            if len(data) > 0:
+                self.parser.feed(data)
+                data = self.parser._read(length)
+                if len(data) == 0:
+                    continue
+            return data
+
+    def close(self):
+
+        self.reader.close()

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/util/Streams.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/util/Streams.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/util/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/util/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/util/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/util/__init__.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1 @@
+# util package

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/util/__init__.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/util/__init__.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py (added)
+++ lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,69 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+# This sample illustrates how to write an Analyzer 'extension' in Python.
+# 
+#   What is happening behind the scenes ?
+#
+# The PorterStemmerAnalyzer python class does not in fact extend Analyzer,
+# it merely provides an implementation for Analyzer's abstract tokenStream()
+# method. When an instance of PorterStemmerAnalyzer is passed to PyLucene,
+# with a call to IndexWriter(store, PorterStemmerAnalyzer(), True) for
+# example, the PyLucene SWIG-based glue code wraps it into an instance of
+# PythonAnalyzer, a proper java extension of Analyzer which implements a
+# native tokenStream() method whose job is to call the tokenStream() method
+# on the python instance it wraps. The PythonAnalyzer instance is the
+# Analyzer extension bridge to PorterStemmerAnalyzer.
+
+import sys, os
+from datetime import datetime
+from lucene import *
+from IndexFiles import IndexFiles
+
+
+class PorterStemmerAnalyzer(PythonAnalyzer):
+
+    def tokenStream(self, fieldName, reader):
+
+        result = StandardTokenizer(reader)
+        result = StandardFilter(result)
+        result = LowerCaseFilter(result)
+        result = PorterStemFilter(result)
+        result = StopFilter(result, StopAnalyzer.ENGLISH_STOP_WORDS)
+
+        return result
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print IndexFiles.__doc__
+        sys.exit(1)
+    initVM(CLASSPATH)
+    print 'lucene', VERSION
+    start = datetime.now()
+    try:
+        IndexFiles(sys.argv[1], "index", PorterStemmerAnalyzer())
+        end = datetime.now()
+        print end - start
+    except Exception, e:
+        print "Failed: ", e

Propchange: lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/PorterStemmerAnalyzer.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/SearchFiles.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/SearchFiles.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/SearchFiles.py (added)
+++ lucene/pylucene/trunk/samples/SearchFiles.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+from lucene import \
+    QueryParser, IndexSearcher, StandardAnalyzer, FSDirectory, Hit, \
+    VERSION, initVM, CLASSPATH
+
+
+"""
+This script is loosely based on the Lucene (java implementation) demo class 
+org.apache.lucene.demo.SearchFiles.  It will prompt for a search query, then it
+will search the Lucene index in the current directory called 'index' for the
+search query entered against the 'contents' field.  It will then display the
+'path' and 'name' fields for each of the hits it finds in the index.  Note that
+search.close() is currently commented out because it causes a stack overflow in
+some cases.
+"""
+def run(searcher, analyzer):
+    while True:
+        print
+        print "Hit enter with no input to quit."
+        command = raw_input("Query:")
+        if command == '':
+            return
+
+        print
+        print "Searching for:", command
+        query = QueryParser("contents", analyzer).parse(command)
+        hits = searcher.search(query)
+        print "%s total matching documents." % hits.length()
+
+        for hit in hits:
+            doc = Hit.cast_(hit).getDocument()
+            print 'path:', doc.get("path"), 'name:', doc.get("name")
+
+
+if __name__ == '__main__':
+    STORE_DIR = "index"
+    initVM(CLASSPATH)
+    print 'lucene', VERSION
+    directory = FSDirectory.getDirectory(STORE_DIR, False)
+    searcher = IndexSearcher(directory)
+    analyzer = StandardAnalyzer()
+    run(searcher, analyzer)
+    searcher.close()

Propchange: lucene/pylucene/trunk/samples/SearchFiles.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/SearchFiles.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/TermPositionVector.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/TermPositionVector.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/TermPositionVector.py (added)
+++ lucene/pylucene/trunk/samples/TermPositionVector.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,36 @@
+from lucene import \
+    StandardAnalyzer, RAMDirectory, Document, Field, \
+    IndexWriter, IndexReader, TermPositionVector, initVM, CLASSPATH
+
+if __name__ == '__main__':
+    initVM(CLASSPATH)
+
+directory = RAMDirectory()
+iwriter = IndexWriter(directory, StandardAnalyzer(), True)
+ts = ["this bernhard is the text to be index text",
+      "this claudia is the text to be index"]
+for t in ts:
+    doc = Document()
+    doc.add(Field("fieldname", t,
+                  Field.Store.YES, Field.Index.TOKENIZED,
+                  Field.TermVector.WITH_POSITIONS_OFFSETS))
+    iwriter.addDocument(doc)
+iwriter.optimize()
+iwriter.close()
+
+ireader = IndexReader.open(directory)
+tpv = TermPositionVector.cast_(ireader.getTermFreqVector(0, 'fieldname'))
+
+for (t,f,i) in zip(tpv.getTerms(),tpv.getTermFrequencies(),xrange(100000)):
+    print 'term %s' % t
+    print '  freq: %i' % f
+    try:
+        print '  pos: ' + str([p for p in tpv.getTermPositions(i)])
+    except:
+        print '  no pos'
+    try:
+        print '  off: ' + \
+              str(["%i-%i" % (o.getStartOffset(), o.getEndOffset())
+                   for o in tpv.getOffsets(i)])
+    except:
+        print '  no offsets'

Propchange: lucene/pylucene/trunk/samples/TermPositionVector.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/TermPositionVector.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/ThreadIndexFiles.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/ThreadIndexFiles.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/ThreadIndexFiles.py (added)
+++ lucene/pylucene/trunk/samples/ThreadIndexFiles.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,46 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+# This sample illustrates how to use a thread with PyLucene
+
+import sys, os, threading
+from datetime import datetime
+from lucene import StandardAnalyzer, VERSION, initVM, CLASSPATH
+from IndexFiles import IndexFiles
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print IndexFiles.__doc__
+        sys.exit(1)
+    env=initVM(CLASSPATH)
+    print 'lucene', VERSION
+
+    def fn():
+        env.attachCurrentThread()
+        start = datetime.now()
+        IndexFiles(sys.argv[1], "index", StandardAnalyzer())
+        end = datetime.now()
+        print end - start
+
+    threading.Thread(target=fn).start()

Propchange: lucene/pylucene/trunk/samples/ThreadIndexFiles.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/ThreadIndexFiles.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/manindex.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/manindex.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/manindex.py (added)
+++ lucene/pylucene/trunk/samples/manindex.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,106 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+# Author: Erik Hatcher
+#
+# to index all man pages on $MANPATH or /usr/share/man:
+#   python manindex.py pages
+# ====================================================================
+
+import os, re, sys
+from subprocess import *
+from lucene import IndexWriter, StandardAnalyzer, Document, Field
+from lucene import initVM, CLASSPATH
+
+def indexDirectory(dir):
+
+    for name in os.listdir(dir):
+        path = os.path.join(dir, name)
+        if os.path.isfile(path):
+            indexFile(dir, name)
+
+
+def indexFile(dir,filename):
+
+    path = os.path.join(dir, filename)
+    print "  File: ", filename
+
+    if filename.endswith('.gz'):
+        child = Popen('gunzip -c ' + path + ' | groff -t -e -E -mandoc -Tascii | col -bx', shell=True, stdout=PIPE, cwd=os.path.dirname(dir)).stdout
+        command, section = re.search('^(.*)\.(.*)\.gz$', filename).groups()
+    else:
+        child = Popen('groff -t -e -E -mandoc -Tascii ' + path + ' | col -bx',
+                      shell=True, stdout=PIPE, cwd=os.path.dirname(dir)).stdout
+        command, section = re.search('^(.*)\.(.*)$', filename).groups()
+
+    data = child.read()
+    err = child.close()
+    if err:
+        raise RuntimeError, '%s failed with exit code %d' %(command, err)
+
+    matches = re.search('^NAME$(.*?)^\S', data,
+                        re.MULTILINE | re.DOTALL)
+    name = matches and matches.group(1) or ''
+
+    matches = re.search('^(?:SYNOPSIS|SYNOPSYS)$(.*?)^\S', data,
+                        re.MULTILINE | re.DOTALL)
+    synopsis = matches and matches.group(1) or ''
+
+    matches = re.search('^(?:DESCRIPTION|OVERVIEW)$(.*?)', data,
+                        re.MULTILINE | re.DOTALL)
+    description = matches and matches.group(1) or ''
+
+    doc = Document()
+    doc.add(Field("command", command,
+                  Field.Store.YES, Field.Index.UN_TOKENIZED))
+    doc.add(Field("section", section,
+                  Field.Store.YES, Field.Index.UN_TOKENIZED))
+    doc.add(Field("name", name.strip(),
+                  Field.Store.YES, Field.Index.TOKENIZED))
+    doc.add(Field("synopsis", synopsis.strip(),
+                  Field.Store.YES, Field.Index.TOKENIZED))
+    doc.add(Field("keywords", ' '.join((command, name, synopsis, description)),
+                  Field.Store.NO, Field.Index.TOKENIZED))
+    doc.add(Field("filename", os.path.abspath(path),
+                  Field.Store.YES, Field.Index.UN_TOKENIZED))
+
+    writer.addDocument(doc)
+
+
+if __name__ == '__main__':
+
+    if len(sys.argv) != 2:
+        print "Usage: python manindex.py <index dir>"
+
+    else:
+        initVM(CLASSPATH)
+        indexDir = sys.argv[1]
+        writer = IndexWriter(indexDir, StandardAnalyzer(), True)
+        manpath = os.environ.get('MANPATH', '/usr/share/man').split(os.pathsep)
+        for dir in manpath:
+            print "Crawling", dir
+            for name in os.listdir(dir):
+                path = os.path.join(dir, name)
+                if os.path.isdir(path):
+                    indexDirectory(path)
+        writer.optimize()
+        writer.close()

Propchange: lucene/pylucene/trunk/samples/manindex.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/manindex.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/mansearch.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/mansearch.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/mansearch.py (added)
+++ lucene/pylucene/trunk/samples/mansearch.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,89 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+# Author: Erik Hatcher
+#
+# to query the index generated with manindex.py
+#  python mansearch.py <query>
+# by default, the index is stored in 'pages', which can be overriden with
+# the MANDEX environment variable
+# ====================================================================
+
+
+import sys, os
+
+from string import Template
+from datetime import datetime
+from getopt import getopt, GetoptError
+
+from lucene import \
+     Document, IndexSearcher, FSDirectory, QueryParser, StandardAnalyzer, \
+     Hit, Field, initVM, CLASSPATH
+
+if __name__ == '__main__':
+    initVM(CLASSPATH)
+
+def usage():
+    print sys.argv[0], "[--format=<format string>] [--index=<index dir>] [--stats] <query...>"
+    print "default index is found from MANDEX environment variable"
+
+try:
+    options, args = getopt(sys.argv[1:], '', ['format=', 'index=', 'stats'])
+except GetoptError:
+    usage()
+    sys.exit(2)
+
+
+format = "#name"
+indexDir = os.environ.get('MANDEX') or 'pages'
+stats = False
+for o, a in options:
+    if o == "--format":
+        format = a
+    elif o == "--index":
+        indexDir = a
+    elif o == "--stats":
+        stats = True
+
+
+class CustomTemplate(Template):
+    delimiter = '#'
+
+template = CustomTemplate(format)
+
+fsDir = FSDirectory.getDirectory(indexDir, False)
+searcher = IndexSearcher(fsDir)
+
+parser = QueryParser("keywords", StandardAnalyzer())
+parser.setDefaultOperator(QueryParser.Operator.AND)
+query = parser.parse(' '.join(args))
+start = datetime.now()
+hits = searcher.search(query)
+duration = datetime.now() - start
+if stats:
+    print >> sys.stderr, "Found %d document(s) (in %s) that matched query '%s':" %(len(hits), duration, query)
+
+for hit in hits:
+    doc = Hit.cast_(hit).getDocument()
+    table = dict((field.name(), field.stringValue())
+                 for field in (Field.cast_(f) for f in doc.getFields()))
+    print template.substitute(table)

Propchange: lucene/pylucene/trunk/samples/mansearch.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/mansearch.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message