lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From roo...@apache.org
Subject svn commit: r155649 - in incubator/lucene4c/trunk: src/search/scorer.c test/search/scorer_test.c
Date Sun, 27 Feb 2005 23:23:20 GMT
Author: rooneg
Date: Sun Feb 27 15:23:18 2005
New Revision: 155649

URL: http://svn.apache.org/viewcvs?view=rev&rev=155649
Log:
Add support for should queries.

Well, sort of anyway.  It's not quite working, but it's close, and I want
to save this snapshot before I screw it up some more trying to debug it.

* src/search/scorer.c
  (boolean_scorer_baton_t): store the scorer we used to grab our candidate
   document the last time through the loop, so we can move it along to the
   next doc before we use it again.
  (pick_scorer): new, holds the logic for picking the scorer we should use
   for our candidate document.
  (boolean_scorer_find_doc): use pick_scorer, add logic for should queries
   and remove logic that's moved into pick_scorer.

* test/search/scorer_test.c
  (test_boolean_scorer): add some tests for should queries, part of which
   is currently failing.

Modified:
    incubator/lucene4c/trunk/src/search/scorer.c
    incubator/lucene4c/trunk/test/search/scorer_test.c

Modified: incubator/lucene4c/trunk/src/search/scorer.c
URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/src/search/scorer.c?view=diff&r1=155648&r2=155649
==============================================================================
--- incubator/lucene4c/trunk/src/search/scorer.c (original)
+++ incubator/lucene4c/trunk/src/search/scorer.c Sun Feb 27 15:23:18 2005
@@ -64,64 +64,142 @@
   apr_array_header_t *must;
   apr_array_header_t *should;
   apr_array_header_t *must_not;
+
+  /* the scorer we used last time through boolean_scorer_find_doc's main
+   * loop to get our initial document for comparisons. */
+  lcn_scorer_t *last_scorer;
 } boolean_scorer_baton_t;
 
+/* select the scorer from which we should pull our initial candidate doc. */
 static lcn_error_t *
-boolean_scorer_find_doc (apr_uint32_t *doc, boolean_scorer_baton_t *bsb)
+pick_scorer (boolean_scorer_baton_t *bsb)
 {
-  if (bsb->should->nelts)
-    return lcn_error_create (APR_ENOTIMPL, NULL, "should isn't implemented");
-  if (bsb->must_not->nelts)
-    return lcn_error_create (APR_ENOTIMPL, NULL, "must_not isn't implemented");
+  if (bsb->must->nelts == 0
+      && bsb->should->nelts == 0
+      && bsb->must_not->nelts == 0)
+    {
+      return lcn_error_create (APR_EINVAL,
+                               NULL,
+                               "boolean query needs at least one subquery");
+    }
 
-  if (bsb->must->nelts != 0)
+  if ((bsb->must->nelts != 0 || bsb->should->nelts != 0)
+      && bsb->must_not->nelts == 0)
     {
-      for (;;)
+      if (bsb->should->nelts == 0)
+        {
+          /* if there are no should queries just pick an arbitrary must. */
+          bsb->last_scorer = APR_ARRAY_IDX (bsb->must, 0, lcn_scorer_t *);
+        }
+      else
         {
-          lcn_boolean_t got_a_hit = TRUE;
+          /* otherwise we pick the lowest of the should queries. */
+          apr_uint32_t lowest = 4294967295UL; /* 2 ^ 32 - 1*/
           int i;
 
-          /* *doc will only be zero on the first time through, since the
-           * lowest valid document number is one.  if it isn't the first
-           * time through we need to move along to the next document. */
-
-          if (*doc != 0)
-            LCN_ERR (lcn_scorer_next (APR_ARRAY_IDX (bsb->must,
-                                                     0,
-                                                     lcn_scorer_t *)));
+          for (i = 0; i < bsb->should->nelts; ++i)
+            {
+              lcn_scorer_t *scorer = APR_ARRAY_IDX (bsb->should,
+                                                    i,
+                                                    lcn_scorer_t *);
+
+              apr_uint32_t doc = lcn_scorer_doc (scorer);
+
+              if (doc < lowest)
+                {
+                  bsb->last_scorer = scorer;
+                  lowest = doc;
+                }
+            }
+        }
+
+      return LCN_NO_ERROR;
+    }
+  else
+    {
+      return lcn_error_create (APR_ENOTIMPL,
+                               NULL,
+                               "we don't support must_not queries yet");
+    }
+}
+
+static lcn_error_t *
+boolean_scorer_find_doc (apr_uint32_t *doc, boolean_scorer_baton_t *bsb)
+{
+  for (;;)
+    {
+      lcn_boolean_t got_a_hit = TRUE;
+      int i;
+
+      /* if we have a last scorer (i.e. we already came through this
+       * function already) move it along to the next document. */
+      if (bsb->last_scorer != NULL)
+        LCN_ERR (lcn_scorer_next (bsb->last_scorer));
+
+      /* when we add support for should scorers the selection of this
+       * scorer becomes more complex, but for now we can just pick an
+       * arbitrary must scorer to start with. */
+      LCN_ERR (pick_scorer (bsb));
+
+      *doc = lcn_scorer_doc (bsb->last_scorer);
+
+      /* if there are any should's then we start out as FALSE until we 
+       * find a match... */
+      if (bsb->should->nelts)
+        got_a_hit = FALSE;
+
+      for (i = 0; i < bsb->should->nelts; ++i)
+        {
+          lcn_scorer_t *should_scorer = APR_ARRAY_IDX (bsb->should,
+                                                       i,
+                                                       lcn_scorer_t *);
 
-          *doc = lcn_scorer_doc (APR_ARRAY_IDX (bsb->must, 0, lcn_scorer_t *));
+          apr_uint32_t otherdoc = lcn_scorer_doc (should_scorer);
 
-          for (i = 1; i < bsb->must->nelts; ++i)
+          while (otherdoc < *doc)
             {
-              lcn_scorer_t *must_scorer = APR_ARRAY_IDX (bsb->must,
-                                                         i,
-                                                         lcn_scorer_t *);
+              LCN_ERR (lcn_scorer_next (should_scorer));
 
-              apr_uint32_t otherdoc = lcn_scorer_doc (must_scorer);
+              otherdoc = lcn_scorer_doc (should_scorer);
+            }
 
-              while (otherdoc < *doc)
-                {
-                  LCN_ERR (lcn_scorer_next (must_scorer));
+          if (otherdoc == *doc)
+            {
+              got_a_hit = TRUE;
+              break;
+            }
+        }
 
-                  otherdoc = lcn_scorer_doc (must_scorer);
-                }
+      if (got_a_hit == FALSE)
+        continue;
 
-              if (otherdoc != *doc)
-                {
-                  got_a_hit = FALSE;
-                  break;
-                }
+      for (i = 0; i < bsb->must->nelts; ++i)
+        {
+          lcn_scorer_t *must_scorer = APR_ARRAY_IDX (bsb->must,
+                                                     i,
+                                                     lcn_scorer_t *);
+
+          apr_uint32_t otherdoc = lcn_scorer_doc (must_scorer);
+
+          while (otherdoc < *doc)
+            {
+              LCN_ERR (lcn_scorer_next (must_scorer));
+
+              otherdoc = lcn_scorer_doc (must_scorer);
             }
 
-          if (got_a_hit)
-            return LCN_NO_ERROR;
+          if (otherdoc != *doc)
+            {
+              got_a_hit = FALSE;
+              break;
+            }
         }
+
+      /* XXX check bsb->must_not scorers */
+
+      if (got_a_hit)
+        return LCN_NO_ERROR;
     }
-  else
-    return lcn_error_create (APR_EINVAL,
-                             NULL,
-                             "boolean query needs at least one subquery");
 }
 
 static lcn_error_t *

Modified: incubator/lucene4c/trunk/test/search/scorer_test.c
URL: http://svn.apache.org/viewcvs/incubator/lucene4c/trunk/test/search/scorer_test.c?view=diff&r1=155648&r2=155649
==============================================================================
--- incubator/lucene4c/trunk/test/search/scorer_test.c (original)
+++ incubator/lucene4c/trunk/test/search/scorer_test.c Sun Feb 27 15:23:18 2005
@@ -141,6 +141,42 @@
 
   ABTS_INT_EQUAL (tc, APR_EOF, err->apr_err);
 
+  CHK_ERR (lcn_boolean_query_create (&query, p));
+
+  CHK_ERR (lcn_term_query_create (&tquery,
+                                  lcn_term_create_cstring ("lucene",
+                                                           "contents",
+                                                           p),
+                                  p));
+
+  CHK_ERR (lcn_boolean_query_add (query, tquery, LCN_SHOULD));
+
+  CHK_ERR (lcn_term_query_create (&tquery,
+                                  lcn_term_create_cstring ("cutting",
+                                                           "contents",
+                                                           p),
+                                  p));
+
+  CHK_ERR (lcn_boolean_query_add (query, tquery, LCN_SHOULD));
+
+  CHK_ERR (lcn_query_scorer (&scorer, query, index, p));
+
+  ABTS_INT_EQUAL (tc, 1, lcn_scorer_doc (scorer));
+
+  count = 0;
+
+  while ((err = lcn_scorer_next (scorer)) == LCN_NO_ERROR)
+    {
+      ++count;
+    }
+
+  ABTS_PTR_NOTNULL (tc, err);
+
+  ABTS_INT_EQUAL (tc, APR_EOF, err->apr_err);
+
+  /* expected to fail for now, should queries aren't quite working yet */
+  ABTS_INT_EQUAL (tc, 114, count);
+
   apr_pool_clear (p);
 }
 



Mime
View raw message