lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From va...@apache.org
Subject svn commit: r732916 [9/14] - in /lucene/pylucene/trunk: ./ java/ java/org/ java/org/osafoundation/ java/org/osafoundation/lucene/ java/org/osafoundation/lucene/analysis/ java/org/osafoundation/lucene/queryParser/ java/org/osafoundation/lucene/search/ j...
Date Fri, 09 Jan 2009 03:28:41 GMT
Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/BooksLikeThis.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/BooksLikeThis.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/BooksLikeThis.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/BooksLikeThis.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,98 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+import os
+
+from lucene import \
+     Document, IndexReader, Term, BooleanQuery, IndexSearcher, TermQuery, \
+     FSDirectory, System, BooleanClause, Hit
+
+
+class BooksLikeThis(object):
+
+    def main(cls, argv):
+
+        indexDir = System.getProperty("index.dir")
+        directory = FSDirectory.getDirectory(indexDir, False)
+
+        reader = IndexReader.open(directory)
+        blt = BooksLikeThis(reader)
+
+        for id in xrange(reader.maxDoc()):
+            if reader.isDeleted(id):
+                continue
+            doc = reader.document(id)
+            print ''
+            print doc.get("title").encode('utf-8')
+
+            docs = blt.docsLike(id, doc, 10)
+            if not docs:
+                print "  None like this"
+            else:
+                for doc in docs:
+                    print " ->", doc.get("title").encode('utf-8')
+
+    def __init__(self, reader):
+
+        self.reader = reader
+        self.searcher = IndexSearcher(reader)
+
+    def docsLike(self, id, doc, max):
+
+        authors = doc.getValues("author")
+        authorQuery = BooleanQuery()
+        for author in authors:
+            authorQuery.add(TermQuery(Term("author", author)),
+                            BooleanClause.Occur.SHOULD)
+        authorQuery.setBoost(2.0)
+
+        vector = self.reader.getTermFreqVector(id, "subject")
+
+        subjectQuery = BooleanQuery()
+        for term in vector.getTerms():
+            tq = TermQuery(Term("subject", term))
+            subjectQuery.add(tq, BooleanClause.Occur.SHOULD)
+
+        likeThisQuery = BooleanQuery()
+        likeThisQuery.add(authorQuery, BooleanClause.Occur.SHOULD)
+        likeThisQuery.add(subjectQuery, BooleanClause.Occur.SHOULD)
+
+        # exclude myself
+        likeThisQuery.add(TermQuery(Term("isbn", doc.get("isbn"))),
+                          BooleanClause.Occur.MUST_NOT)
+
+        print "  Query:", likeThisQuery.toString("contents")
+        hits = self.searcher.search(likeThisQuery)
+
+        docs = []
+        for hit in hits:
+            hit = Hit.cast_(hit)
+            doc = hit.getDocument()
+            if len(docs) < max:
+                docs.append(doc)
+            else:
+                break
+
+        return docs
+
+    main = classmethod(main)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/BooksLikeThis.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/BooksLikeThis.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/CategorizerTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/CategorizerTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/CategorizerTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/CategorizerTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,123 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from math import pi, sqrt, acos
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import Document, IndexReader 
+
+
+class CategorizerTest(LiaTestCase):
+
+    def setUp(self):
+
+        super(CategorizerTest, self).setUp()
+        self.categoryMap = {}
+
+        self.buildCategoryVectors()
+        self.dumpCategoryVectors()
+
+    def testCategorization(self):
+        
+        self.assertEqual("/technology/computers/programming/methodology",
+                         self.getCategory("extreme agile methodology"))
+        self.assertEqual("/education/pedagogy",
+                         self.getCategory("montessori education philosophy"))
+
+    def dumpCategoryVectors(self):
+
+        for category, vectorMap in self.categoryMap.iteritems():
+            print "Category", category
+            for term, freq in vectorMap.iteritems():
+                print "   ", term, "=", freq
+
+    def buildCategoryVectors(self):
+
+        reader = IndexReader.open(self.directory)
+
+        for id in xrange(reader.maxDoc()):
+            doc = reader.document(id)
+            category = doc.get("category")
+            vectorMap = self.categoryMap.get(category, None)
+            if vectorMap is None:
+                vectorMap = self.categoryMap[category] = {}
+
+            termFreqVector = reader.getTermFreqVector(id, "subject")
+            self.addTermFreqToMap(vectorMap, termFreqVector)
+
+    def addTermFreqToMap(self, vectorMap, termFreqVector):
+
+        terms = termFreqVector.getTerms()
+        freqs = termFreqVector.getTermFrequencies()
+
+        i = 0
+        for term in terms:
+            if term in vectorMap:
+                vectorMap[term] += freqs[i]
+            else:
+                vectorMap[term] = freqs[i]
+            i += 1
+
+    def getCategory(self, subject):
+
+        words = subject.split(' ')
+
+        bestAngle = 2 * pi
+        bestCategory = None
+
+        for category, vectorMap in self.categoryMap.iteritems():
+            angle = self.computeAngle(words, category, vectorMap)
+            if angle != 'nan' and angle < bestAngle:
+                bestAngle = angle
+                bestCategory = category
+
+        return bestCategory
+
+    def computeAngle(self, words, category, vectorMap):
+
+        # assume words are unique and only occur once
+
+        dotProduct = 0
+        sumOfSquares = 0
+
+        for word in words:
+            categoryWordFreq = 0
+
+            if word in vectorMap:
+                categoryWordFreq = vectorMap[word]
+
+            # optimized because we assume frequency in words is 1
+            dotProduct += categoryWordFreq
+            sumOfSquares += categoryWordFreq ** 2
+
+        if sumOfSquares == 0:
+            return 'nan'
+
+        if sumOfSquares == len(words):
+            # avoid precision issues for special case
+            # sqrt x * sqrt x = x
+            denominator = sumOfSquares 
+        else:
+            denominator = sqrt(sumOfSquares) * sqrt(len(words))
+
+        return acos(dotProduct / denominator)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/CategorizerTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/CategorizerTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/FilterTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/FilterTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/FilterTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/FilterTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,100 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import \
+     Term, BooleanQuery, IndexSearcher, TermQuery, DateField, \
+     CachingWrapperFilter, DateFilter, RangeQuery, QueryFilter, BooleanClause
+     
+
+class FilterTest(LiaTestCase):
+
+    def setUp(self):
+
+        super(FilterTest, self).setUp()
+
+        self.allBooks = RangeQuery(Term("pubmonth", "190001"),
+                                   Term("pubmonth", "200512"), True)
+        self.searcher = IndexSearcher(self.directory)
+        hits = self.searcher.search(self.allBooks)
+        self.numAllBooks = len(hits)
+
+    def testDateFilter(self):
+
+        jan1 = self.parseDate("2004-01-01")
+        jan31 = self.parseDate("2004-01-31")
+        dec31 = self.parseDate("2004-12-31")
+
+        filter = DateFilter("modified", jan1, dec31)
+
+        hits = self.searcher.search(self.allBooks, filter)
+        self.assertEqual(self.numAllBooks, len(hits), "all modified in 2004")
+
+        filter = DateFilter("modified", jan1, jan31)
+        hits = self.searcher.search(self.allBooks, filter)
+        self.assertEqual(0, len(hits), "none modified in January")
+
+    def testQueryFilter(self):
+
+        categoryQuery = TermQuery(Term("category", "/philosophy/eastern"))
+        categoryFilter = QueryFilter(categoryQuery)
+
+        hits = self.searcher.search(self.allBooks, categoryFilter)
+        self.assertEqual(1, len(hits), "only tao te ching")
+
+    def testFilterAlternative(self):
+
+        categoryQuery = TermQuery(Term("category", "/philosophy/eastern"))
+
+        constrainedQuery = BooleanQuery()
+        constrainedQuery.add(self.allBooks, BooleanClause.Occur.MUST)
+        constrainedQuery.add(categoryQuery, BooleanClause.Occur.MUST)
+
+        hits = self.searcher.search(constrainedQuery)
+        self.assertEqual(1, len(hits), "only tao te ching")
+
+    def testQueryFilterWithRangeQuery(self):
+
+        jan1 = self.parseDate("2004-01-01")
+        dec31 = self.parseDate("2004-12-31")
+
+        start = Term("modified", DateField.dateToString(jan1))
+        end = Term("modified", DateField.dateToString(dec31))
+
+        rangeQuery = RangeQuery(start, end, True)
+
+        filter = QueryFilter(rangeQuery)
+        hits = self.searcher.search(self.allBooks, filter)
+        self.assertEqual(self.numAllBooks, len(hits), "all of 'em")
+
+    def testCachingWrapper(self):
+
+        jan1 = self.parseDate("2004-01-01")
+        dec31 = self.parseDate("2004-12-31")
+
+        dateFilter = DateFilter("modified", jan1, dec31)
+        cachingFilter = CachingWrapperFilter(dateFilter)
+
+        hits = self.searcher.search(self.allBooks, cachingFilter)
+        self.assertEqual(self.numAllBooks, len(hits), "all of 'em")

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/FilterTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/FilterTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiFieldQueryParserTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiFieldQueryParserTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiFieldQueryParserTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiFieldQueryParserTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,60 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import \
+    SimpleAnalyzer, MultiFieldQueryParser, IndexSearcher, BooleanClause
+
+
+class MultiFieldQueryParserTest(LiaTestCase):
+
+    def testDefaultOperator(self):
+
+        SHOULD = BooleanClause.Occur.SHOULD
+        query = MultiFieldQueryParser.parse("development",
+                                            ["title", "subject"],
+                                            [SHOULD, SHOULD],
+                                            SimpleAnalyzer())
+
+        searcher = IndexSearcher(self.directory)
+        hits = searcher.search(query)
+
+        self.assertHitsIncludeTitle(hits, "Java Development with Ant")
+
+        # has "development" in the subject field
+        self.assertHitsIncludeTitle(hits, "Extreme Programming Explained")
+
+    def testSpecifiedOperator(self):
+        
+        MUST = BooleanClause.Occur.MUST
+        query = MultiFieldQueryParser.parse("development",
+                                            ["title", "subject"],
+                                            [MUST, MUST],
+                                            SimpleAnalyzer())
+
+        searcher = IndexSearcher(self.directory)
+        hits = searcher.search(query)
+
+        self.assertHitsIncludeTitle(hits, "Java Development with Ant")
+        self.assertEqual(1, hits.length(), "one and only one")

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiFieldQueryParserTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiFieldQueryParserTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiSearcherTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiSearcherTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiSearcherTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiSearcherTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,74 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase
+from lucene import \
+     WhitespaceAnalyzer, Document, Field, IndexWriter, Term, MultiSearcher, \
+     RangeQuery, RAMDirectory, IndexSearcher
+
+
+class MultiSearcherTest(TestCase):
+
+    def setUp(self):
+        
+        animals = [ "aardvark", "beaver", "coati",
+                    "dog", "elephant", "frog", "gila monster",
+                    "horse", "iguana", "javelina", "kangaroo",
+                    "lemur", "moose", "nematode", "orca",
+                    "python", "quokka", "rat", "scorpion",
+                    "tarantula", "uromastyx", "vicuna",
+                    "walrus", "xiphias", "yak", "zebra" ]
+
+        analyzer = WhitespaceAnalyzer()
+
+        aTOmDirectory = RAMDirectory()
+        nTOzDirectory = RAMDirectory()
+
+        aTOmWriter = IndexWriter(aTOmDirectory, analyzer, True)
+        nTOzWriter = IndexWriter(nTOzDirectory, analyzer, True)
+
+        for animal in animals:
+            doc = Document()
+            doc.add(Field("animal", animal,
+                          Field.Store.YES, Field.Index.UN_TOKENIZED))
+
+            if animal[0].lower() < "n":
+                aTOmWriter.addDocument(doc)
+            else:
+                nTOzWriter.addDocument(doc)
+
+        aTOmWriter.close()
+        nTOzWriter.close()
+
+        self.searchers = [ IndexSearcher(aTOmDirectory),
+                           IndexSearcher(nTOzDirectory) ]
+
+    def testMulti(self):
+
+        searcher = MultiSearcher(self.searchers)
+
+        # range spans documents across both indexes
+        query = RangeQuery(Term("animal", "h"), Term("animal", "t"), True)
+
+        hits = searcher.search(query)
+        self.assertEqual(12, hits.length(), "tarantula not included")

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiSearcherTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/MultiSearcherTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/PhrasePrefixQueryTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/PhrasePrefixQueryTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/PhrasePrefixQueryTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/PhrasePrefixQueryTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,84 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase
+from lucene import \
+     WhitespaceAnalyzer, Document, Field, IndexWriter, Term, BooleanQuery, \
+     IndexSearcher, PhrasePrefixQuery, PhraseQuery, RAMDirectory, BooleanClause
+
+
+class PhrasePrefixQueryTest(TestCase):
+
+    def setUp(self):
+
+        directory = RAMDirectory()
+        writer = IndexWriter(directory, WhitespaceAnalyzer(), True)
+
+        doc1 = Document()
+        doc1.add(Field("field", "the quick brown fox jumped over the lazy dog",
+                       Field.Store.YES, Field.Index.TOKENIZED))
+        writer.addDocument(doc1)
+
+        doc2 = Document()
+        doc2.add(Field("field", "the fast fox hopped over the hound",
+                       Field.Store.YES, Field.Index.TOKENIZED))
+        writer.addDocument(doc2)
+        writer.close()
+
+        self.searcher = IndexSearcher(directory)
+
+    def testBasic(self):
+        
+        query = PhrasePrefixQuery()
+        query.add([Term("field", "quick"), Term("field", "fast")])
+        query.add(Term("field", "fox"))
+        print query
+
+        hits = self.searcher.search(query)
+        self.assertEqual(1, len(hits), "fast fox match")
+
+        query.setSlop(1)
+        hits = self.searcher.search(query)
+        self.assertEqual(2, len(hits), "both match")
+
+    def testAgainstOR(self):
+
+        quickFox = PhraseQuery()
+        quickFox.setSlop(1)
+        quickFox.add(Term("field", "quick"))
+        quickFox.add(Term("field", "fox"))
+
+        fastFox = PhraseQuery()
+        fastFox.add(Term("field", "fast"))
+        fastFox.add(Term("field", "fox"))
+
+        query = BooleanQuery()
+        query.add(quickFox, BooleanClause.Occur.SHOULD)
+        query.add(fastFox, BooleanClause.Occur.SHOULD)
+        hits = self.searcher.search(query)
+        self.assertEqual(2, len(hits))
+
+    def debug(self, hits):
+
+        for i, doc in hits:
+            print "%s: %s" %(hits.score(i), doc['field'])

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/PhrasePrefixQueryTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/PhrasePrefixQueryTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SecurityFilterTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SecurityFilterTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SecurityFilterTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SecurityFilterTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,68 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase
+from lucene import \
+     WhitespaceAnalyzer, Document, Field, IndexWriter, Term, MultiSearcher, \
+     QueryFilter, RAMDirectory, IndexSearcher, TermQuery
+
+
+class SecurityFilterTest(TestCase):
+
+    def setUp(self):
+
+        self.directory = RAMDirectory()
+        writer = IndexWriter(self.directory, WhitespaceAnalyzer(), True)
+
+        # Elwood
+        document = Document()
+        document.add(Field("owner", "elwood",
+                           Field.Store.YES, Field.Index.UN_TOKENIZED))
+        document.add(Field("keywords", "elwoods sensitive info",
+                           Field.Store.YES, Field.Index.TOKENIZED))
+        writer.addDocument(document)
+
+        # Jake
+        document = Document()
+        document.add(Field("owner", "jake",
+                           Field.Store.YES, Field.Index.UN_TOKENIZED))
+        document.add(Field("keywords", "jakes sensitive info",
+                           Field.Store.YES, Field.Index.TOKENIZED))
+        writer.addDocument(document)
+
+        writer.close()
+
+    def testSecurityFilter(self):
+
+        query = TermQuery(Term("keywords", "info"))
+
+        searcher = IndexSearcher(self.directory)
+        hits = searcher.search(query)
+        self.assertEqual(2, len(hits), "Both documents match")
+
+        jakeFilter = QueryFilter(TermQuery(Term("owner", "jake")))
+
+        hits = searcher.search(query, jakeFilter)
+        self.assertEqual(1, len(hits))
+        self.assertEqual("jakes sensitive info", hits[0].get("keywords"),
+                         "elwood is safe")

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SecurityFilterTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SecurityFilterTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SortingExample.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SortingExample.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SortingExample.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SortingExample.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,84 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+import os
+
+from lucene import \
+     FSDirectory, Document, Field, IndexSearcher, SimpleAnalyzer, \
+     RangeQuery, Sort, SortField, DecimalFormat, System, Term
+
+
+class SortingExample(object):
+
+    def __init__(self, directory):
+
+        self.directory = directory
+
+    def displayHits(self, query, sort):
+
+        searcher = IndexSearcher(self.directory)
+        hits = searcher.search(query, sort)
+
+        print "\nResults for:", query, "sorted by", sort
+        print "Title".rjust(30), "pubmonth".rjust(10), \
+              "id".center(4), "score".center(15)
+
+        scoreFormatter = DecimalFormat("0.######")
+        for i, doc in hits:
+            title = doc["title"]
+            if len(title) > 30:
+                title = title[:30]
+            print title.encode('ascii', 'replace').rjust(30), \
+                  doc["pubmonth"].rjust(10), \
+                  str(hits.id(i)).center(4), \
+                  scoreFormatter.format(hits.score(i)).ljust(12)
+            print "  ", doc["category"]
+            # print searcher.explain(query, hits.id(i))
+
+        searcher.close()
+
+    def main(cls, argv):
+
+        earliest = Term("pubmonth", "190001")
+        latest = Term("pubmonth", "201012")
+        allBooks = RangeQuery(earliest, latest, True)
+
+        indexDir = System.getProperty("index.dir")
+        directory = FSDirectory.getDirectory(indexDir, False)
+        example = SortingExample(directory)
+
+        example.displayHits(allBooks, Sort.RELEVANCE)
+        example.displayHits(allBooks, Sort.INDEXORDER)
+        example.displayHits(allBooks, Sort("category"))
+        example.displayHits(allBooks, Sort("pubmonth", True))
+
+        example.displayHits(allBooks,
+                            Sort([SortField("category"),
+                                  SortField.FIELD_SCORE,
+                                  SortField("pubmonth", SortField.INT, True)]))
+
+        example.displayHits(allBooks,
+                            Sort([SortField.FIELD_SCORE,
+                                  SortField("category")]))
+
+    main = classmethod(main)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SortingExample.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SortingExample.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SpanQueryTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SpanQueryTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SpanQueryTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SpanQueryTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,221 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase
+from cStringIO import StringIO
+
+from lucene import \
+     WhitespaceAnalyzer, Document, Field, IndexReader, IndexWriter, Term, \
+     IndexSearcher, PhraseQuery, SpanFirstQuery, SpanNearQuery, SpanNotQuery, \
+     SpanOrQuery, SpanTermQuery, RAMDirectory, Hit
+
+from lia.analysis.AnalyzerUtils import AnalyzerUtils
+
+
+class SpanQueryTest(TestCase):
+
+    def setUp(self):
+
+        self.directory = RAMDirectory()
+        self.analyzer = WhitespaceAnalyzer()
+
+        writer = IndexWriter(self.directory, self.analyzer, True)
+
+        doc = Document()
+        doc.add(Field("f", "the quick brown fox jumps over the lazy dog",
+                      Field.Store.YES, Field.Index.TOKENIZED))
+        writer.addDocument(doc)
+
+        doc = Document()
+        doc.add(Field("f", "the quick red fox jumps over the sleepy cat",
+                      Field.Store.YES, Field.Index.TOKENIZED))
+        writer.addDocument(doc)
+
+        writer.close()
+
+        self.searcher = IndexSearcher(self.directory)
+        self.reader = IndexReader.open(self.directory)
+
+        self.quick = SpanTermQuery(Term("f", "quick"))
+        self.brown = SpanTermQuery(Term("f", "brown"))
+        self.red = SpanTermQuery(Term("f", "red"))
+        self.fox = SpanTermQuery(Term("f", "fox"))
+        self.lazy = SpanTermQuery(Term("f", "lazy"))
+        self.sleepy = SpanTermQuery(Term("f", "sleepy"))
+        self.dog = SpanTermQuery(Term("f", "dog"))
+        self.cat = SpanTermQuery(Term("f", "cat"))
+
+    def assertOnlyBrownFox(self, query):
+
+        hits = self.searcher.search(query)
+        self.assertEqual(1, len(hits))
+        self.assertEqual(0, hits.id(0), "wrong doc")
+
+    def assertBothFoxes(self, query):
+
+        hits = self.searcher.search(query)
+        self.assertEqual(2, len(hits))
+
+    def assertNoMatches(self, query):
+
+        hits = self.searcher.search(query)
+        self.assertEquals(0, len(hits))
+
+    def testSpanTermQuery(self):
+
+        self.assertOnlyBrownFox(self.brown)
+        self.dumpSpans(self.brown)
+
+    def testSpanFirstQuery(self):
+
+        sfq = SpanFirstQuery(self.brown, 2)
+        self.assertNoMatches(sfq)
+
+        self.dumpSpans(sfq)
+
+        sfq = SpanFirstQuery(self.brown, 3)
+        self.dumpSpans(sfq)
+        self.assertOnlyBrownFox(sfq)
+
+    def testSpanNearQuery(self):
+
+        quick_brown_dog = [self.quick, self.brown, self.dog]
+        snq = SpanNearQuery(quick_brown_dog, 0, True)
+        self.assertNoMatches(snq)
+        self.dumpSpans(snq)
+
+        snq = SpanNearQuery(quick_brown_dog, 4, True)
+        self.assertNoMatches(snq)
+        self.dumpSpans(snq)
+
+        snq = SpanNearQuery(quick_brown_dog, 5, True)
+        self.assertOnlyBrownFox(snq)
+        self.dumpSpans(snq)
+
+        # interesting - even a sloppy phrase query would require
+        # more slop to match
+        snq = SpanNearQuery([self.lazy, self.fox], 3, False)
+        self.assertOnlyBrownFox(snq)
+        self.dumpSpans(snq)
+
+        pq = PhraseQuery()
+        pq.add(Term("f", "lazy"))
+        pq.add(Term("f", "fox"))
+        pq.setSlop(4)
+        self.assertNoMatches(pq)
+
+        pq.setSlop(5)
+        self.assertOnlyBrownFox(pq)
+
+    def testSpanNotQuery(self):
+
+        quick_fox = SpanNearQuery([self.quick, self.fox], 1, True)
+        self.assertBothFoxes(quick_fox)
+        self.dumpSpans(quick_fox)
+
+        quick_fox_dog = SpanNotQuery(quick_fox, self.dog)
+        self.assertBothFoxes(quick_fox_dog)
+        self.dumpSpans(quick_fox_dog)
+
+        no_quick_red_fox = SpanNotQuery(quick_fox, self.red)
+        self.assertOnlyBrownFox(no_quick_red_fox)
+        self.dumpSpans(no_quick_red_fox)
+
+    def testSpanOrQuery(self):
+
+        quick_fox = SpanNearQuery([self.quick, self.fox], 1, True)
+        lazy_dog = SpanNearQuery([self.lazy, self.dog], 0, True)
+        sleepy_cat = SpanNearQuery([self.sleepy, self.cat], 0, True)
+        qf_near_ld = SpanNearQuery([quick_fox, lazy_dog], 3, True)
+
+        self.assertOnlyBrownFox(qf_near_ld)
+        self.dumpSpans(qf_near_ld)
+
+        qf_near_sc = SpanNearQuery([quick_fox, sleepy_cat], 3, True)
+        self.dumpSpans(qf_near_sc)
+
+        orQ = SpanOrQuery([qf_near_ld, qf_near_sc])
+        self.assertBothFoxes(orQ)
+        self.dumpSpans(orQ)
+
+    def testPlay(self):
+
+        orQ = SpanOrQuery([self.quick, self.fox])
+        self.dumpSpans(orQ)
+
+        quick_fox = SpanNearQuery([self.quick, self.fox], 1, True)
+        sfq = SpanFirstQuery(quick_fox, 4)
+        self.dumpSpans(sfq)
+
+        self.dumpSpans(SpanTermQuery(Term("f", "the")))
+
+        quick_brown = SpanNearQuery([self.quick, self.brown], 0, False)
+        self.dumpSpans(quick_brown)
+
+    def dumpSpans(self, query):
+
+        spans = query.getSpans(self.reader)
+        print "%s:" % query
+        numSpans = 0
+
+        hits = self.searcher.search(query)
+        scores = [0, 0]
+        for hit in hits:
+            hit = Hit.cast_(hit)
+            scores[hit.getId()] = hit.getScore()
+
+        while spans.next():
+            numSpans += 1
+
+            id = spans.doc()
+            doc = self.reader.document(id)
+
+            # for simplicity - assume tokens are in sequential,
+            # positions, starting from 0
+            tokens = AnalyzerUtils.tokensFromAnalysis(self.analyzer, doc["f"])
+            buffer = StringIO()
+            buffer.write("   ")
+
+            i = 0
+            for token in tokens:
+                if i == spans.start():
+                    buffer.write("<")
+
+                buffer.write(token.termText())
+                if i + 1 == spans.end():
+                    buffer.write(">")
+
+                buffer.write(" ")
+                i += 1
+      
+            buffer.write("(")
+            buffer.write(str(scores[id]))
+            buffer.write(") ")
+
+            print buffer.getvalue()
+            # print self.searcher.explain(query, id)
+
+        if numSpans == 0:
+            print "   No spans"
+
+        print ''

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SpanQueryTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/SpanQueryTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/__init__.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1 @@
+# advsearching package

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/__init__.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/advsearching/__init__.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerDemo.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerDemo.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerDemo.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerDemo.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,69 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+
+from lia.analysis.AnalyzerUtils import AnalyzerUtils
+from lucene import \
+     StopAnalyzer, SimpleAnalyzer, WhitespaceAnalyzer, StandardAnalyzer
+
+
+class AnalyzerDemo(object):
+
+    examples = ["The quick brown fox jumped over the lazy dogs",
+                "XY&Z Corporation - xyz@example.com"]
+    
+    analyzers = [WhitespaceAnalyzer(),
+                 SimpleAnalyzer(),
+                 StopAnalyzer(),
+                 StandardAnalyzer()]
+
+    def main(cls, argv):
+
+        # Use the embedded example strings, unless
+        # command line arguments are specified, then use those.
+        strings = cls.examples
+
+        if len(argv) > 1:
+            strings = argv[1:]
+
+        for string in strings:
+            cls.analyze(string)
+
+    def analyze(cls, text):
+
+        print'"Analyzing "', text, '"'
+
+        for analyzer in cls.analyzers:
+            name = type(analyzer).__name__
+            print " %s:" %(name),
+            AnalyzerUtils.displayTokens(analyzer, text)
+            print ''
+        print ''
+
+    main = classmethod(main)
+    analyze = classmethod(analyze)
+
+
+if __name__ == "__main__":
+    import sys
+    AnalyzerDemo.main(sys.argv)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerDemo.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerDemo.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerUtils.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerUtils.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerUtils.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerUtils.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,94 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lucene import \
+     SimpleAnalyzer, Token, TokenStream, StandardAnalyzer, StringReader
+
+
+class AnalyzerUtils(object):
+
+    def main(cls, argv):
+
+        print "SimpleAnalyzer"
+        cls.displayTokensWithFullDetails(SimpleAnalyzer(),
+                                         "The quick brown fox....")
+
+        print "\n----"
+        print "StandardAnalyzer"
+        cls.displayTokensWithFullDetails(StandardAnalyzer(),
+                                         "I'll e-mail you at xyz@example.com")
+
+    def tokensFromAnalysis(cls, analyzer, text):
+        return [token for token in analyzer.tokenStream("contents", StringReader(text))]
+
+    def displayTokens(cls, analyzer, text):
+
+        for token in cls.tokensFromAnalysis(analyzer, text):
+            print "[%s]" %(token.termText()),
+
+    def displayTokensWithPositions(cls, analyzer, text):
+
+        position = 0
+        for token in cls.tokensFromAnalysis(analyzer, text):
+            increment = token.getPositionIncrement()
+            if increment > 0:
+                position += increment
+                print "\n%d:" %(position),
+
+            print "[%s]" %(token.termText()),
+
+    def displayTokensWithFullDetails(cls, analyzer, text):
+
+        position = 0
+        for token in cls.tokensFromAnalysis(analyzer, text):
+            increment = token.getPositionIncrement()
+
+            if increment > 0:
+                position += increment
+                print "\n%s:" %(position),
+
+            print "[%s:%d->%d:%s]" %(token.termText(),
+                                     token.startOffset(),
+                                     token.endOffset(),
+                                     token.type()),
+
+    def assertTokensEqual(cls, unittest, tokens, strings):
+
+        unittest.assertEqual(len(strings), len(tokens))
+
+        i = 0
+        for token in tokens:
+            unittest.assertEqual(strings[i], token.termText(), "index %d" %(i))
+            i += 1
+
+    main = classmethod(main)
+    tokensFromAnalysis = classmethod(tokensFromAnalysis)
+    displayTokens = classmethod(displayTokens)
+    displayTokensWithPositions = classmethod(displayTokensWithPositions)
+    displayTokensWithFullDetails = classmethod(displayTokensWithFullDetails)
+    assertTokensEqual = classmethod(assertTokensEqual)
+
+
+if __name__ == "__main__":
+    import sys
+    AnalyzerUtils.main(sys.argv)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerUtils.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/AnalyzerUtils.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/UsingAnalyzersExample.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/UsingAnalyzersExample.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/UsingAnalyzersExample.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/UsingAnalyzersExample.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,53 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lucene import \
+     RAMDirectory, IndexWriter, StandardAnalyzer, Document, Field, \
+     QueryParser
+
+class UsingAnalyzersExample(object):
+
+    #
+    # This method doesn't do anything, except compile correctly.
+    # This is used to show snippets of how Analyzers are used.
+    #
+    def someMethod(self):
+
+        directory = RAMDirectory()
+
+        analyzer = StandardAnalyzer()
+        writer = IndexWriter(directory, analyzer, True)
+
+        doc = Document()
+        doc.add(Field.Text("title", "This is the title"))
+        doc.add(Field.UnStored("contents", "...document contents..."))
+        writer.addDocument(doc)
+
+        writer.addDocument(doc, analyzer)
+
+        expression = "some query"
+
+        query = QueryParser.parse(expression, "contents", analyzer)
+
+        parser = QueryParser("contents", analyzer)
+        query = parser.parseQuery(expression)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/UsingAnalyzersExample.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/UsingAnalyzersExample.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/__init__.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1 @@
+# analysis package

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/__init__.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/__init__.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/ChineseTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/ChineseTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/ChineseTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/ChineseTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+from lucene import Term, IndexSearcher, TermQuery
+
+
+class ChineseTest(LiaTestCase):
+
+    def testChinese(self):
+
+        searcher = IndexSearcher(self.directory)
+        hits = searcher.search(TermQuery(Term("contents", "道")))
+
+        self.assertEqual(1, hits.length(), "tao")

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/ChineseTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/ChineseTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/__init__.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1 @@
+# i18n package

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/__init__.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/i18n/__init__.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzer.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzer.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzer.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzer.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,48 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lucene import Token, PythonAnalyzer, PythonTokenStream, JArray
+
+#
+# "Tokenizes" the entire stream as a single token.
+#
+
+class KeywordAnalyzer(PythonAnalyzer):
+    
+    def tokenStream(self, fieldName, reader):
+
+        class _tokenStream(PythonTokenStream):
+
+            def __init__(self):
+                super(_tokenStream, self).__init__()
+                self.done = False
+      
+            def next(self):
+                if not self.done:
+                    self.done = True
+                    text = JArray('char')(1024)
+                    size = reader.read(text, 0, 1024)
+                    return Token(text, 0, size, 0, size)
+                return None
+
+        return _tokenStream()

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzer.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzer.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzerTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzerTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzerTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzerTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,89 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase
+
+from lucene import \
+     IndexWriter, Term, SimpleAnalyzer, PerFieldAnalyzerWrapper, \
+     RAMDirectory, Document, Field, IndexSearcher, TermQuery, \
+     QueryParser, Analyzer, StringReader, Token, JavaError
+
+from lia.analysis.keyword.KeywordAnalyzer import KeywordAnalyzer
+from lia.analysis.keyword.SimpleKeywordAnalyzer import SimpleKeywordAnalyzer
+
+
+class KeywordAnalyzerTest(TestCase):
+
+    def setUp(self):
+
+        self.directory = RAMDirectory()
+        writer = IndexWriter(self.directory, SimpleAnalyzer(), True)
+
+        doc = Document()
+        doc.add(Field("partnum", "Q36",
+                      Field.Store.YES, Field.Index.UN_TOKENIZED))
+        doc.add(Field("description", "Illidium Space Modulator",
+                      Field.Store.YES, Field.Index.TOKENIZED))
+        writer.addDocument(doc)
+        writer.close()
+
+        self.searcher = IndexSearcher(self.directory)
+
+    def testTermQuery(self):
+
+        query = TermQuery(Term("partnum", "Q36"))
+        hits = self.searcher.search(query)
+        self.assertEqual(1, hits.length())
+
+    def testBasicQueryParser(self):
+        
+        query = QueryParser("description",
+                            SimpleAnalyzer()).parse("partnum:Q36 AND SPACE")
+
+        hits = self.searcher.search(query)
+        self.assertEqual("+partnum:q +space", query.toString("description"),
+                         "note Q36 -> q")
+        self.assertEqual(0, hits.length(), "doc not found :(")
+
+    def testPerFieldAnalyzer(self):
+
+        analyzer = PerFieldAnalyzerWrapper(SimpleAnalyzer())
+        analyzer.addAnalyzer("partnum", KeywordAnalyzer())
+
+        query = QueryParser("description",
+                            analyzer).parse("partnum:Q36 AND SPACE")
+        hits = self.searcher.search(query)
+
+        self.assertEqual("+partnum:Q36 +space", query.toString("description"),
+                         "Q36 kept as-is")
+        self.assertEqual(1, hits.length(), "doc found!")
+
+    def testSimpleKeywordAnalyzer(self):
+
+        analyzer = SimpleKeywordAnalyzer()
+
+        input = "Hello World"
+        ts = analyzer.tokenStream("dummy", StringReader(input))
+        self.assertEqual(ts.next().termText(), input)
+        self.assert_(not list(ts) is None)
+        ts.close()

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzerTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/KeywordAnalyzerTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/SimpleKeywordAnalyzer.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/SimpleKeywordAnalyzer.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/SimpleKeywordAnalyzer.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/SimpleKeywordAnalyzer.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,44 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+#
+# CharTokenizer limits token width to 255 characters, though.
+# This implementation assumes keywords are 255 in length or less.
+#
+
+from lucene import PythonAnalyzer, PythonCharTokenizer
+
+
+class SimpleKeywordAnalyzer(PythonAnalyzer):
+
+    def tokenStream(self, fieldName, reader):
+
+        class charTokenizer(PythonCharTokenizer):
+            def __init__(self, reader):
+                super(charTokenizer, self).__init__(reader)
+            def isTokenChar(self, c):
+                return True
+            def normalize(self, c):
+                return c
+        
+        return charTokenizer(reader)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/SimpleKeywordAnalyzer.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/SimpleKeywordAnalyzer.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/__init__.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1 @@
+# keyword package

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/__init__.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/keyword/__init__.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzer.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzer.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzer.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzer.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,57 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+import sys
+
+from lucene import \
+     LowerCaseTokenizer, PorterStemFilter, StopAnalyzer, StopFilter, \
+     TokenStream, PythonAnalyzer
+
+from lia.analysis.positional.PositionalStopFilter import PositionalStopFilter
+
+python_ver = '%d.%d.%d' %(sys.version_info[0:3])
+if python_ver < '2.4':
+    from sets import Set as set
+
+
+#
+# An Analyzer extension
+#
+
+class PositionalPorterStopAnalyzer(PythonAnalyzer):
+
+    def __init__(self, stopWords=None):
+
+        super(PositionalPorterStopAnalyzer, self).__init__()
+
+        if stopWords is None:
+            stopWords = StopAnalyzer.ENGLISH_STOP_WORDS
+
+        self.stopWords = set(stopWords)
+
+    def tokenStream(self, fieldName, reader):
+
+        tokenStream = LowerCaseTokenizer(reader)
+        stopFilter = PositionalStopFilter(tokenStream, self.stopWords)
+
+        return PorterStemFilter(stopFilter)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzer.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzer.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzerTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzerTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzerTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzerTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,92 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from unittest import TestCase
+
+from lucene import \
+     IndexWriter, Term, RAMDirectory, Document, Field, \
+     IndexSearcher, QueryParser
+
+from lia.analysis.AnalyzerUtils import AnalyzerUtils
+from lia.analysis.positional.PositionalPorterStopAnalyzer import \
+     PositionalPorterStopAnalyzer
+
+
+class PositionalPorterStopAnalyzerTest(TestCase):
+
+    porterAnalyzer = PositionalPorterStopAnalyzer()
+    
+    def setUp(self):
+
+        self.directory = RAMDirectory()
+        writer = IndexWriter(self.directory, self.porterAnalyzer, True)
+
+        doc = Document()
+        doc.add(Field("contents",
+                      "The quick brown fox jumps over the lazy dogs",
+                       Field.Store.YES, Field.Index.TOKENIZED))
+        writer.addDocument(doc)
+        writer.close()
+
+    def testStems(self):
+        
+        searcher = IndexSearcher(self.directory)
+        query = QueryParser("contents", self.porterAnalyzer).parse("laziness")
+        hits = searcher.search(query)
+
+        self.assertEqual(1, hits.length(), "lazi")
+
+        query = QueryParser("contents",
+                            self.porterAnalyzer).parse('"fox jumped"')
+        hits = searcher.search(query)
+
+        self.assertEqual(1, hits.length(), "jump jumps jumped jumping")
+
+    def testExactPhrase(self):
+
+        searcher = IndexSearcher(self.directory)
+        query = QueryParser("contents",
+                            self.porterAnalyzer).parse('"over the lazy"')
+        hits = searcher.search(query)
+
+        self.assertEqual(0, hits.length(), "exact match not found!")
+
+    def testWithSlop(self):
+
+        searcher = IndexSearcher(self.directory)
+
+        parser = QueryParser("contents", self.porterAnalyzer)
+        parser.setPhraseSlop(1)
+
+        query = parser.parse('"over the lazy"')
+        hits = searcher.search(query)
+
+        self.assertEqual(1, hits.length(), "hole accounted for")
+
+    def main(cls):
+
+        text = "The quick brown fox jumps over the lazy dogs"
+        AnalyzerUtils.displayTokensWithPositions(cls.porterAnalyzer, text)
+        print ''
+        
+    main = classmethod(main)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzerTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalPorterStopAnalyzerTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalStopFilter.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalStopFilter.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalStopFilter.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalStopFilter.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,51 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lucene import PythonTokenFilter
+
+#
+# A TokenFilter extension
+#
+
+class PositionalStopFilter(PythonTokenFilter):
+
+    def __init__(self, tokenStream, stopWords):
+
+        super(PositionalStopFilter, self).__init__(tokenStream)
+
+        self.input = tokenStream
+        self.stopWords = stopWords
+
+    def next(self):
+
+        increment = 0
+
+        for token in self.input:
+            if not token.termText() in self.stopWords:
+                token.setPositionIncrement(token.getPositionIncrement() +
+                                           increment)
+                return token
+
+            increment += 1
+
+        return None

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalStopFilter.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/PositionalStopFilter.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/__init__.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1 @@
+# positional package

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/__init__.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/positional/__init__.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/AnalysisParalysisTest.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/AnalysisParalysisTest.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/AnalysisParalysisTest.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/AnalysisParalysisTest.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,48 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lia.common.LiaTestCase import LiaTestCase
+
+from lucene import \
+     QueryParser, StandardAnalyzer, PerFieldAnalyzerWrapper, WhitespaceAnalyzer
+
+
+class AnalysisParalysisTest(LiaTestCase):
+
+    def testAnalyzer(self):
+
+        analyzer = StandardAnalyzer()
+        queryString = "category:/philosophy/eastern"
+
+        query = QueryParser("contents", analyzer).parse(queryString)
+
+        self.assertEqual("category:\"philosophy eastern\"",
+                         query.toString("contents"), "path got split, yikes!")
+
+        perFieldAnalyzer = PerFieldAnalyzerWrapper(analyzer)
+        perFieldAnalyzer.addAnalyzer("category", WhitespaceAnalyzer())
+        query = QueryParser("contents", perFieldAnalyzer).parse(queryString)
+
+        self.assertEqual("category:/philosophy/eastern",
+                         query.toString("contents"),
+                         "leave category field alone")

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/AnalysisParalysisTest.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/AnalysisParalysisTest.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/__init__.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/__init__.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/__init__.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/__init__.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,2 @@
+# queryparser package
+

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/__init__.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/queryparser/__init__.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/stopanalyzer/StopAnalyzer2.py
URL: http://svn.apache.org/viewvc/lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/stopanalyzer/StopAnalyzer2.py?rev=732916&view=auto
==============================================================================
--- lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/stopanalyzer/StopAnalyzer2.py (added)
+++ lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/stopanalyzer/StopAnalyzer2.py Thu Jan  8 19:28:33 2009
@@ -0,0 +1,43 @@
+# ====================================================================
+# Copyright (c) 2004-2007 Open Source Applications Foundation.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions: 
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software. 
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+# ====================================================================
+#
+
+from lucene import \
+     LetterTokenizer, LowerCaseFilter, StopAnalyzer, StopFilter
+
+#
+# An Analyzer extension
+#
+
+class StopAnalyzer2(object):
+
+    def __init__(self, stopWords=None):
+
+        if stopWords is None:
+            self.stopWords = StopAnalyzer.ENGLISH_STOP_WORDS
+        else:
+            self.stopWords = stopWords
+
+    def tokenStream(self, fieldName, reader):
+
+        return StopFilter(LowerCaseFilter(LetterTokenizer(reader)),
+                          self.stopWords)

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/stopanalyzer/StopAnalyzer2.py
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/pylucene/trunk/samples/LuceneInAction/lia/analysis/stopanalyzer/StopAnalyzer2.py
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message