incubator-bloodhound-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j...@apache.org
Subject svn commit: r1440987 [2/2] - in /incubator/bloodhound/branches/bep_0003_multiproduct: ./ bloodhound_search/ bloodhound_search/bhsearch/ bloodhound_search/bhsearch/default-pages/ bloodhound_search/bhsearch/search_resources/ bloodhound_search/bhsearch/te...
Date Thu, 31 Jan 2013 14:41:22 GMT
Modified: incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/bhsearch/whoosh_backend.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/bhsearch/whoosh_backend.py?rev=1440987&r1=1440986&r2=1440987&view=diff
==============================================================================
--- incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/bhsearch/whoosh_backend.py
(original)
+++ incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/bhsearch/whoosh_backend.py
Thu Jan 31 14:41:22 2013
@@ -19,18 +19,21 @@
 #  under the License.
 
 r"""Whoosh specific backend for Bloodhound Search plugin."""
-from bhsearch.api import ISearchBackend, DESC, QueryResult, SCORE
+from bhsearch.api import ISearchBackend, DESC, QueryResult, SCORE, \
+    IDocIndexPreprocessor, IResultPostprocessor, IndexFields, \
+    IQueryPreprocessor
 import os
-from trac.core import *
+from trac.core import Component, implements, TracError
 from trac.config import Option
+from trac.util.text import empty
 from trac.util.datefmt import utc
-from whoosh.fields import *
-from whoosh import index, sorting, query
-from whoosh.searching import ResultsPage
+from whoosh.fields import Schema, ID, DATETIME, KEYWORD, TEXT
+from whoosh import index
+import whoosh
 from whoosh.writing import AsyncWriter
 from datetime import datetime
 
-UNIQUE_ID = 'unique_id'
+UNIQUE_ID = "unique_id"
 
 class WhooshBackend(Component):
     """
@@ -43,19 +46,22 @@ class WhooshBackend(Component):
         directory of the environment.""")
 
     #This is schema prototype. It will be changed later
-    #TODO: add other fields support, add dynamic field support
+    #TODO: add other fields support, add dynamic field support.
+    #Schema must be driven by index participants
     SCHEMA = Schema(
         unique_id=ID(stored=True, unique=True),
         id=ID(stored=True),
         type=ID(stored=True),
         product=ID(stored=True),
+        milestone=ID(stored=True),
         time=DATETIME(stored=True),
+        due=DATETIME(stored=True),
+        completed=DATETIME(stored=True),
         author=ID(stored=True),
-        component=KEYWORD(stored=True),
-        status=KEYWORD(stored=True),
-        resolution=KEYWORD(stored=True),
+        component=ID(stored=True),
+        status=ID(stored=True),
+        resolution=ID(stored=True),
         keywords=KEYWORD(scorable=True),
-        milestone=TEXT(spelling=True),
         summary=TEXT(stored=True),
         content=TEXT(stored=True),
         changes=TEXT(),
@@ -65,7 +71,7 @@ class WhooshBackend(Component):
         self.index_dir = self.index_dir_setting
         if not os.path.isabs(self.index_dir):
             self.index_dir = os.path.join(self.env.path, self.index_dir)
-        self.open_or_create_index_if_missing()
+        self.index = self._open_or_create_index_if_missing()
 
     #ISearchBackend methods
     def start_operation(self):
@@ -80,16 +86,13 @@ class WhooshBackend(Component):
         The contents should be a dict with fields matching the search schema.
         The only required fields are type and id, everything else is optional.
         """
-        # Really make sure it's unicode, because Whoosh won't have it any
-        # other way.
         is_local_writer = False
         if writer is None:
             is_local_writer = True
             writer = self._create_writer()
 
-        for key in doc:
-            doc[key] = self._to_whoosh_format(doc[key])
-        doc["unique_id"] = self._create_unique_id(doc["type"], doc["id"])
+        self._reformat_doc(doc)
+        doc[UNIQUE_ID] = self._create_unique_id(doc["type"], doc["id"])
         self.log.debug("Doc to index: %s", doc)
         try:
             writer.update_document(**doc)
@@ -98,9 +101,24 @@ class WhooshBackend(Component):
         except:
             if is_local_writer:
                 writer.cancel()
+            raise
+
+    def _reformat_doc(self, doc):
+        """
+        Strings must be converted unicode format accepted by Whoosh.
+        """
+        for key, value in doc.items():
+            if key is None:
+                del doc[None]
+            elif value is None:
+                del doc[key]
+            elif isinstance(value, basestring) and value == "":
+                del doc[key]
+            else:
+                doc[key] = self._to_whoosh_format(value)
 
-    def delete_doc(self, type, id, writer=None):
-        unique_id = self._create_unique_id(type, id)
+    def delete_doc(self, doc_type, doc_id, writer=None):
+        unique_id = self._create_unique_id(doc_type, doc_id)
         self.log.debug('Removing document from the index: %s', unique_id)
         is_local_writer = False
         if writer is None:
@@ -126,7 +144,7 @@ class WhooshBackend(Component):
     def cancel(self, writer):
         try:
             writer.cancel()
-        except Exception,ex:
+        except Exception, ex:
             self.env.log.error("Error during writer cancellation: %s", ex)
 
     def recreate_index(self):
@@ -134,14 +152,21 @@ class WhooshBackend(Component):
         self._make_dir_if_not_exists()
         return index.create_in(self.index_dir, schema=self.SCHEMA)
 
-    def open_or_create_index_if_missing(self):
+    def _open_or_create_index_if_missing(self):
         if index.exists_in(self.index_dir):
-            self.index = index.open_dir(self.index_dir)
+            return index.open_dir(self.index_dir)
         else:
-            self.index = self.recreate_index()
+            return self.recreate_index()
 
-    def query(self, query, sort = None, fields = None, boost = None, filter = None,
-                  facets = None, pagenum = 1, pagelen = 20):
+    def query(self,
+              query,
+              sort = None,
+              fields = None,
+              boost = None,
+              filter = None,
+              facets = None,
+              pagenum = 1,
+              pagelen = 20):
         """
         Perform query.
 
@@ -157,28 +182,31 @@ class WhooshBackend(Component):
         """
         with self.index.searcher() as searcher:
             sortedby = self._prepare_sortedby(sort)
+
+            #TODO: investigate how faceting is applied to multi-value fields
+            #e.g. keywords. For now, just pass facets lit to Whoosh API
+            #groupedby = self._prepare_groupedby(facets)
             groupedby = facets
-            query_filter = self._prepare_filter(filter)
 
             #workaround of Whoosh bug, read method __doc__
             query = self._workaround_join_query_and_filter(
                 query,
-                query_filter)
+                filter)
 
-            search_parameters = dict(
+            query_parameters = dict(
                 query = query,
                 pagenum = pagenum,
                 pagelen = pagelen,
                 sortedby = sortedby,
                 groupedby = groupedby,
-                maptype=sorting.Count,
+                maptype=whoosh.sorting.Count,
                 #workaround of Whoosh bug, read method __doc__
-                #filter = query_filter,
+                #filter = filter,
             )
             self.env.log.debug("Whoosh query to execute: %s",
-                search_parameters)
-            raw_page = searcher.search_page(**search_parameters)
-            results = self._process_results(raw_page, fields, search_parameters)
+                query_parameters)
+            raw_page = searcher.search_page(**query_parameters)
+            results = self._process_results(raw_page, fields, query_parameters)
         return results
 
     def _workaround_join_query_and_filter(
@@ -187,21 +215,10 @@ class WhooshBackend(Component):
             query_filter):
         if not query_filter:
             return query_expression
-        return query.And((query_expression, query_filter))
-
-    def _prepare_filter(self, filters):
-        if not filters:
-            return None
-        and_filters = []
-        for filter in filters:
-            and_filters.append(query.Term(
-                unicode(filter[0]),
-                unicode(filter[1])))
-        return query.And(and_filters)
-
+        return whoosh.query.And((query_expression, query_filter))
 
-    def _create_unique_id(self, type, id):
-        return u"%s:%s" % (type, id)
+    def _create_unique_id(self, doc_type, doc_id):
+        return u"%s:%s" % (doc_type, doc_id)
 
     def _to_whoosh_format(self, value):
         if isinstance(value, basestring):
@@ -212,9 +229,9 @@ class WhooshBackend(Component):
 
     def _convert_date_to_tz_naive_utc(self, value):
         """Convert datetime to naive utc datetime
-        Whoosh can not read  from index datetime value with
+        Whoosh can not read  from index datetime values passed from Trac with
         tzinfo=trac.util.datefmt.FixedOffset because of non-empty
-        constructor"""
+        constructor of FixedOffset"""
         if value.tzinfo:
             utc_time = value.astimezone(utc)
             value = utc_time.replace(tzinfo=None)
@@ -225,13 +242,16 @@ class WhooshBackend(Component):
             value = utc.localize(value)
         return value
 
-#    def _prepare_groupedby(self, facets):
-#        if not facets:
-#            return None
-#        groupedby = sorting.Facets()
-#        for facet_name in facets:
-#            groupedby.add_field(facet_name, allow_overlap=True, maptype=sorting.Count)
-#        return groupedby
+    def _prepare_groupedby(self, facets):
+        if not facets:
+            return None
+        groupedby = whoosh.sorting.Facets()
+        for facet_name in facets:
+            groupedby.add_field(
+                facet_name,
+                allow_overlap=True,
+                maptype=whoosh.sortingwhoosh.Count)
+        return groupedby
 
     def _prepare_sortedby(self, sort):
         if not sort:
@@ -240,12 +260,15 @@ class WhooshBackend(Component):
         for (field, order) in sort:
             if field.lower() == SCORE:
                 if self._is_desc(order):
-                    #We can implement later our own ScoreFacet with
+                    #We can implement tis later by our own ScoreFacet with
                     # "score DESC" support
-                    raise TracError("Whoosh does not support DESC score ordering.")
-                sort_condition = sorting.ScoreFacet()
+                    raise TracError(
+                        "Whoosh does not support DESC score ordering.")
+                sort_condition = whoosh.sorting.ScoreFacet()
             else:
-                sort_condition = sorting.FieldFacet(field, reverse=self._is_desc(order))
+                sort_condition = whoosh.sorting.FieldFacet(
+                    field,
+                    reverse=self._is_desc(order))
             sortedby.append(sort_condition)
         return sortedby
 
@@ -267,7 +290,7 @@ class WhooshBackend(Component):
         results.facets = self._load_facets(page)
 
         docs = []
-        for doc_offset, retrieved_record in enumerate(page):
+        for retrieved_record in page:
             result_doc = self._process_record(fields, retrieved_record)
             docs.append(result_doc)
         results.docs = docs
@@ -294,6 +317,7 @@ class WhooshBackend(Component):
         return result_doc
 
     def _load_facets(self, page):
+        """This method can be also used by unit-tests"""
         non_paged_results = page.results
         facet_names = non_paged_results.facet_names()
         if not facet_names:
@@ -313,3 +337,73 @@ class WhooshBackend(Component):
                  current user."
                 % self.index_dir)
 
+
+class WhooshEmptyFacetErrorWorkaround(Component):
+    """
+        Whoosh 2.4.1 raises "IndexError: list index out of range"
+        when search contains facets on field that is missing in at least one
+        document in the index. The error manifests only when index contains
+        more than one segment.
+
+        The goal of this class is to temporary solve the problem for
+        prototype phase. Fro non-prototype phase, the problem should be solved
+        by the next version of Whoosh.
+
+        Remove this class when fixed version of Whoosh is introduced.
+    """
+    implements(IDocIndexPreprocessor)
+    implements(IResultPostprocessor)
+    implements(IQueryPreprocessor)
+
+    NULL_MARKER = u"empty"
+
+    should_not_be_empty_fields = [
+        IndexFields.STATUS,
+        IndexFields.MILESTONE,
+        IndexFields.COMPONENT,
+    ]
+
+    #IDocIndexPreprocessor methods
+    def pre_process(self, doc):
+        for field in self.should_not_be_empty_fields:
+            if field not in doc or doc[field] is None or doc[field] == empty:
+                doc[field] = self.NULL_MARKER
+
+    #IResultPostprocessor methods
+    def post_process(self, query_result):
+        #fix facets
+        if query_result.facets:
+            for count_dict in query_result.facets.values():
+                for field, count in count_dict.iteritems():
+                    if field == self.NULL_MARKER:
+                        count_dict[None] = count
+                        del count_dict[self.NULL_MARKER]
+        #we can fix query_result.docs later if needed
+
+    #IQueryPreprocessor methods
+    def query_pre_process(self, query_parameters):
+        """
+        Go through filter queries and replace "NOT (field_name:*)" query with
+        "field_name:NULL_MARKER" query.
+
+        This is really quick fix to make prototype working with hope that
+        the next Whoosh version will be released soon.
+        """
+        if "filter" in query_parameters and query_parameters["filter"]:
+            self._find_and_fix_condition(query_parameters["filter"])
+        if "query" in query_parameters and query_parameters["query"]:
+            self._find_and_fix_condition(query_parameters["query"])
+
+    def _find_and_fix_condition(self, filter_condition):
+        if isinstance(filter_condition, whoosh.query.CompoundQuery):
+            sub_queries = list(filter_condition.subqueries)
+            for i, subquery in enumerate(sub_queries):
+                term_to_replace =  self._find_and_fix_condition(subquery)
+                if term_to_replace:
+                    filter_condition.subqueries[i] = term_to_replace
+        elif isinstance(filter_condition, whoosh.query.Not):
+            not_query = filter_condition.query
+            if isinstance(not_query, whoosh.query.Every) and \
+               not_query.fieldname in self.should_not_be_empty_fields:
+                return whoosh.query.Term(not_query.fieldname, self.NULL_MARKER)
+        return None

Modified: incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/setup.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/setup.py?rev=1440987&r1=1440986&r2=1440987&view=diff
==============================================================================
--- incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/setup.py (original)
+++ incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_search/setup.py Thu Jan
31 14:41:22 2013
@@ -109,35 +109,26 @@ PKG_INFO = {'bhsearch' : ('bhsearch',   
                           ),
             }
 
-#ENTRY_POINTS = r"""
-#               [trac.plugins]
-#               bhsearch.web_ui = bhsearch.web_ui
-#               bhsearch.api = bhsearch.api
-#               bhsearch.admin = bhsearch.admin
-#               bhsearch.ticket_search = bhsearch.ticket_search
-#               bhsearch.query_parser = bhsearch.query_parser
-#               bhsearch.whoosh_backend = bhsearch.whoosh_backend
-#               """
 ENTRY_POINTS = {
-        'trac.plugins': [
-            'bhsearch.web_ui = bhsearch.web_ui',
-            'bhsearch.api = bhsearch.api',
-            'bhsearch.admin = bhsearch.admin',
-            'bhsearch.ticket_search = bhsearch.ticket_search',
-            'bhsearch.wiki_search = bhsearch.wiki_search',
-            'bhsearch.query_parser = bhsearch.query_parser',
-            'bhsearch.whoosh_backend = bhsearch.whoosh_backend',
-        ],
+    'trac.plugins': [
+        'bhsearch.web_ui = bhsearch.web_ui',
+        'bhsearch.api = bhsearch.api',
+        'bhsearch.admin = bhsearch.admin',
+        'bhsearch.search_resources.ticket_search =\
+            bhsearch.search_resources.ticket_search',
+        'bhsearch.search_resources.wiki_search = \
+            bhsearch.search_resources.wiki_search',
+        'bhsearch.search_resources.milestone_search = \
+            bhsearch.search_resources.milestone_search',
+        'bhsearch.query_parser = bhsearch.query_parser',
+        'bhsearch.whoosh_backend = bhsearch.whoosh_backend',
+    ],
     }
-#bhsearch.whoosh_backend = bhsearch.whoosh_backend
-#bhsearch.ticket_search = bhsearch.ticket_search
-
 setup(
     name=DIST_NM,
     version=latest,
     description=DESC.split('\n', 1)[0],
     requires = ['trac'],
-#    tests_require = ['dutest>=0.2.4', 'TracXMLRPC'],
     install_requires = [
         'setuptools>=0.6b1',
         'Trac>=0.11',

Modified: incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_theme/bhtheme/theme.py
URL: http://svn.apache.org/viewvc/incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_theme/bhtheme/theme.py?rev=1440987&r1=1440986&r2=1440987&view=diff
==============================================================================
--- incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_theme/bhtheme/theme.py
(original)
+++ incubator/bloodhound/branches/bep_0003_multiproduct/bloodhound_theme/bhtheme/theme.py
Thu Jan 31 14:41:22 2013
@@ -16,6 +16,8 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
+import sys
+
 from genshi.builder import tag
 from genshi.core import TEXT
 from genshi.filters.transform import Transformer
@@ -213,6 +215,10 @@ class BloodhoundTheme(ThemeBase):
     def post_process_request(self, req, template, data, content_type):
         """Post process request filter.
         Removes all trac provided css if required"""
+        
+        if template is None and data is None and sys.exc_info() == (None, None, None):
+            return template, data, content_type
+        
         def is_active_theme():
             is_active = False
             active_theme = ThemeEngineSystem(self.env).theme



Mime
View raw message