bloodhound-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From and...@apache.org
Subject svn commit: r1454347 - in /incubator/bloodhound/trunk/bloodhound_search/bhsearch: tests/query_parser.py tests/whoosh_backend.py whoosh_backend.py
Date Fri, 08 Mar 2013 12:35:15 GMT
Author: andrej
Date: Fri Mar  8 12:35:14 2013
New Revision: 1454347

URL: http://svn.apache.org/r1454347
Log:
removing stop words from whoosh index - towards #447 (from astaric)

Added:
    incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/query_parser.py
Modified:
    incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py
    incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py

Added: incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/query_parser.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/query_parser.py?rev=1454347&view=auto
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/query_parser.py (added)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/query_parser.py Fri Mar  8
12:35:14 2013
@@ -0,0 +1,59 @@
+import unittest
+from bhsearch.tests.base import BaseBloodhoundSearchTest
+from bhsearch.query_parser import DefaultQueryParser
+from whoosh.query import terms, nary, wrappers
+
+
+class MetaKeywordsParsingTestCase(BaseBloodhoundSearchTest):
+    def setUp(self):
+        super(MetaKeywordsParsingTestCase, self).setUp()
+        self.parser = DefaultQueryParser(self.env)
+
+    def test_can_parse_keyword_ticket(self):
+        parsed_query = self.parser.parse("$ticket")
+        self.assertEqual(parsed_query, terms.Term('type', 'ticket'))
+
+    def test_can_parse_NOT_keyword_ticket(self):
+        parsed_query = self.parser.parse("NOT $ticket")
+        self.assertEqual(parsed_query,
+                         wrappers.Not(
+                             terms.Term('type', 'ticket')))
+
+    def test_can_parse_keyword_wiki(self):
+        parsed_query = self.parser.parse("$wiki")
+        self.assertEqual(parsed_query, terms.Term('type', 'wiki'))
+
+    def test_can_parse_keyword_resolved(self):
+        parsed_query = self.parser.parse("$resolved")
+        self.assertEqual(parsed_query,
+                         nary.Or([terms.Term('status', 'resolved'),
+                                  terms.Term('status', 'closed')]))
+
+    def test_can_parse_meta_keywords_that_resolve_to_meta_keywords(self):
+        parsed_query = self.parser.parse("$unresolved")
+        self.assertEqual(parsed_query,
+                         wrappers.Not(
+                         nary.Or([terms.Term('status', 'resolved'),
+                                  terms.Term('status', 'closed')])))
+
+    def test_can_parse_complex_query(self):
+        parsed_query = self.parser.parse("content:test $ticket $unresolved")
+
+        self.assertEqual(parsed_query,
+                         nary.And([
+                             terms.Term('content', 'test'),
+                             terms.Term('type', 'ticket'),
+                             wrappers.Not(
+                                 nary.Or([terms.Term('status', 'resolved'),
+                                          terms.Term('status', 'closed')])
+                             )
+                         ]))
+
+
+def suite():
+    test_suite = unittest.TestSuite()
+    test_suite.addTest(unittest.makeSuite(MetaKeywordsParsingTestCase, 'test'))
+    return test_suite
+
+if __name__ == '__main__':
+    unittest.main()

Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py?rev=1454347&r1=1454346&r2=1454347&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/tests/whoosh_backend.py Fri Mar
 8 12:35:14 2013
@@ -31,7 +31,7 @@ from trac.util.datefmt import FixedOffse
 from whoosh import index, sorting, query
 from whoosh.fields import Schema, ID, TEXT, KEYWORD
 from whoosh.qparser import MultifieldPlugin, QueryParser, WhitespacePlugin, \
-    PhrasePlugin
+    PhrasePlugin, MultifieldParser
 
 
 class WhooshBackendTestCase(BaseBloodhoundSearchTest):
@@ -581,6 +581,32 @@ class WhooshFunctionalityTestCase(unitte
             results = s.search(query.Every())
             self.assertEquals(0, len(results))
 
+    def test_handles_stop_words_in_queries(self):
+        schema = WhooshBackend.SCHEMA
+        ix = index.create_in(self.index_dir, schema=schema)
+        with ix.writer() as w:
+            w.add_document(content=u"A nice sentence with stop words.")
+
+        with ix.searcher() as s:
+            query = u"with stop"
+
+            # field_names both ignore stop words
+            q = MultifieldParser(['content', 'summary'],
+                                 WhooshBackend.SCHEMA).parse(query)
+            self.assertEqual(q.simplify(s).__unicode__(),
+                             u'((content:with OR summary:with) AND '
+                             u'(content:stop OR summary:stop))')
+            self.assertEqual(len(s.search(q)), 1)
+
+            # 'content' and 'id' ignores stop words
+            q = MultifieldParser(['content', 'id'],
+                                 WhooshBackend.SCHEMA).parse(query)
+            self.assertEqual(q.simplify(s).__unicode__(),
+                             u'((content:with OR id:with) AND '
+                             u'(content:stop OR id:stop))')
+            self.assertEqual(len(s.search(q)), 1)
+
+
 class WhooshEmptyFacetErrorWorkaroundTestCase(BaseBloodhoundSearchTest):
     def setUp(self):
         super(WhooshEmptyFacetErrorWorkaroundTestCase, self).setUp()

Modified: incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py?rev=1454347&r1=1454346&r2=1454347&view=diff
==============================================================================
--- incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py (original)
+++ incubator/bloodhound/trunk/bloodhound_search/bhsearch/whoosh_backend.py Fri Mar  8 12:35:14
2013
@@ -29,7 +29,7 @@ from trac.config import Option, IntOptio
 from trac.util.text import empty
 from trac.util.datefmt import utc
 from whoosh.fields import Schema, ID, DATETIME, KEYWORD, TEXT
-from whoosh import index
+from whoosh import index, analysis
 import whoosh
 import whoosh.highlight
 from whoosh.writing import AsyncWriter
@@ -64,9 +64,11 @@ class WhooshBackend(Component):
         status=ID(stored=True),
         resolution=ID(stored=True),
         keywords=KEYWORD(scorable=True),
-        summary=TEXT(stored=True),
-        content=TEXT(stored=True),
-        changes=TEXT(),
+        summary=TEXT(stored=True,
+                     analyzer=analysis.StandardAnalyzer(stoplist=None)),
+        content=TEXT(stored=True,
+                     analyzer=analysis.StandardAnalyzer(stoplist=None)),
+        changes=TEXT(analyzer=analysis.StandardAnalyzer(stoplist=None)),
         )
 
     max_fragment_size = IntOption('bhsearch', 'max_fragment_size', 240,



Mime
View raw message