lucy-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kasilak <kasi.anbum...@gmail.com>
Subject Re: [lucy-user] ProxmityQuery in C
Date Sat, 18 Feb 2017 00:35:38 GMT
Sharing the complete C code for search.c. 

Please look for g_testProximity to follow the proximity query related
changes.

As such the default QParser, handles the follow query strings passed from
command line arguments without issues. I need not make any code changes.
(1) "a AND b"
(2) "a or b"
(3) "a NOT b"
(4) "a b"

But what is failing is the case "a b"~100,  though my indexed documents have
the necessary terms with a span of 100 words. 

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define CFISH_USE_SHORT_NAMES
#define LUCY_USE_SHORT_NAMES
#include "Clownfish/String.h"
#include "Clownfish/Vector.h"
#include "Lucy/Document/HitDoc.h"
#include "Lucy/Highlight/Highlighter.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Search/ANDQuery.h"
#include "Lucy/Search/Hits.h"
#include "Lucy/Search/IndexSearcher.h"
#include "Lucy/Search/TermQuery.h"
#include "Lucy/Search/QueryParser.h"
#include "LucyX/Search/ProximityQuery.h"
#include "Lucy/Analysis/Analyzer.h"
#include "Clownfish/TestHarness/TestUtils.h"
#include "QUtils.h"
#include "version.h"

char path_to_index[100] = "./lucy_index/lucy_index";
#define ENABLE_HIGHLIGHTER

// Test Configuration
enum
{
    g_testDefault    = 0, //QParser supports BOOLEAN/TERM queries
    g_testProximity  = 1, //To support proximity queries
    g_testMax        = 2
};

typedef struct TestOpts_ {
   const char*   name;
}TestOpts;


static void
S_usage_and_exit(const char *arg0) {
    printf("Usage: %s [-p <x86_64/aarch64> platform] [-a
<enable(1)/disable(0)> angel signals] [-s <Docs count>] [-c <category
(OPTIONAL)>] <querystring>\n", arg0);
    exit(1);
}

int
main(int argc, char *argv[]) {
    fprintf( stderr, "Search Version: %d.%d\n", MAJOR_VERSION,
MINOR_VERSION);
    bool isEnableAngelSignals = false;
    uint32_t docCount = 0;
    uint32_t numWanted = 10;

    // Initialize the library.
    lucy_bootstrap_parcel();

    const char *category  = NULL;
    const char *platform  = NULL;
    const char *testQuery = NULL;
    TestOpts g_testopts[] =
    {
      { "default"  },
      { "proximity"},
    };
    bool  queryType[g_testMax] = {false};

    int i = 1;
    uint32_t j;

    while (i < argc - 1) {
        if (strcmp(argv[i], "-p") == 0) {
            if (i + 1 >= argc) {
                S_usage_and_exit(argv[0]);
            }
            i += 1;
            platform = argv[i];
        }
        else if (strcmp(argv[i], "-a") == 0) {
            if (i + 1 >= argc) {
                S_usage_and_exit(argv[0]);
            }
            i += 1;
            isEnableAngelSignals = argv[i];
        }
        else if (strcmp(argv[i], "-s") == 0) {
            if (i + 1 >= argc) {
                S_usage_and_exit(argv[0]);
            }
            i += 1;
            docCount = atol(argv[i]);
        }
        else if (strcmp(argv[i], "-c") == 0) {
            if (i + 1 >= argc) {
                //S_usage_and_exit(argv[0]);
            }
            i += 1;
            category = argv[i];
            printf("Category given: %s\n\n", category);
        }
        else if (strcmp(argv[i], "-T") == 0) {
            if (i + 1 >= argc) {
                S_usage_and_exit(argv[0]);
            }
            i += 1;
            testQuery = argv[i];
            char *opt = (char *)&testQuery[0];
            bool found = false;

            for(j = 0; j < sizeof(g_testopts)/sizeof(TestOpts); j++)
            {
              if (strcmp(opt, g_testopts[j].name) == 0)
              {
                queryType[j] = true;
                printf( "Testing Query: %s\n", g_testopts[j].name);
                found = true;
                break;
              }
            }

            if (!found)
            {
              printf("Invalid option: -T=%s\n", testQuery);
              printf("Valid tests: -T=%s", g_testopts[0].name);
              for(j = 1; j < sizeof(g_testopts)/sizeof(TestOpts); j++)
              {
                printf( ",%s", g_testopts[j].name);
              }
              printf( "\n");
              exit(0);
            }
        }
        else {
            S_usage_and_exit(argv[0]);
        }

        i += 1;
    }

    if (i + 1 != argc) {
        S_usage_and_exit(argv[0]);
    }

    const char *query_c = argv[i];

    printf("Searching for: %s with # of hits: %d \n\n", query_c, numWanted);

#ifdef PERF_INSTRUMENT
  perf_event_init(  (enable_perf_events) (ENABLE_HW_CYCLES_PER |
ENABLE_HW_INSTRS_PER) );
#endif

    char buff1[100];
    sprintf(buff1, "%s%d%s%s", "-", docCount, "-", platform);
    strcat(path_to_index, buff1);
    String        *folder   = Str_newf("%s", path_to_index);
    printf("Index file used: %s\n", path_to_index);

#ifdef PERF_INSTRUMENT
  perf_event_enable ( (enable_perf_events) (ENABLE_HW_CYCLES_PER |
ENABLE_HW_INSTRS_PER) );
  uint64_t beginI = perf_per_instr_event_read();
#endif
    double start = (double)clock();

    IndexSearcher *searcher  = IxSearcher_new((Obj*)folder);
    Schema        *schema    = IxSearcher_Get_Schema(searcher);
    String        *query_str = Str_newf("%s", query_c);

    QueryParser *qparser = QParser_new(schema, NULL, NULL, NULL);
    ProximityQuery *pquery = NULL;

    Query *query = NULL;
    query = QParser_Parse(qparser, query_str);

    String *content_str = Str_newf("content");
#ifdef ENABLE_HIGHLIGHTER
    Highlighter *highlighter
        = Highlighter_new((Searcher*)searcher, (Obj*)query, content_str,
200);
#endif

    if (category)
    {
        String *category_name = Str_newf("category");
        String *category_str  = Str_newf("%s", category);
        TermQuery *category_query
            = TermQuery_new(category_name, (Obj*)category_str);

        Vector *children = Vec_new(2);
        Vec_Push(children, (Obj*)query);
        Vec_Push(children, (Obj*)category_query);
        query = (Query*)ANDQuery_new(children);

        DECREF(children);
        DECREF(category_str);
        DECREF(category_name);
    }

    //To handle proximity queries
    if( queryType[g_testProximity] )
    {
        Vector *terms = Vec_new(0);
        Vec_Push(terms, (Obj*)query_str);

      	String *field_name = Str_newf("content");
        pquery = (Query*)ProximityQuery_new(field_name, terms, 100);

        Vector *children = Vec_new(2);
        Vec_Push(children, (Obj*) query);
        Vec_Push(children, (Obj*) pquery);
        query = (Query*) (children); //???

        DECREF(children);
      	DECREF(field_name);
        DECREF(terms);
    }

    Hits *hits;
    if ( queryType[g_testDefault] )
    {
      hits = IxSearcher_Hits(searcher, (Obj*)query, 0, numWanted, NULL);
    }
    else
    {
      hits = IxSearcher_Hits(searcher, (Obj*)query, 0, numWanted, NULL);
    }

    String *title_str = Str_newf("title");
    String *url_str   = Str_newf("url");
    HitDoc *hit;
    i = 1;

    // Loop over search results.
    while (NULL != (hit = Hits_Next(hits))) {
        String *title = (String*)HitDoc_Extract(hit, title_str);
        char *title_c = Str_To_Utf8(title);

        String *url = (String*)HitDoc_Extract(hit, url_str);
        char *url_c = Str_To_Utf8(url);

#ifdef ENABLE_HIGHLIGHTER
        String *excerpt = Highlighter_Create_Excerpt(highlighter, hit);
        char *excerpt_c = Str_To_Utf8(excerpt);

        printf("Result %d: %s (%s)\n%s\n\n", i, title_c, url_c, excerpt_c);
        free(excerpt_c);
        DECREF(excerpt);
#else
        printf("Result %d: %s (%s)\n\n", i, title_c, url_c);
#endif
        free(url_c);
        free(title_c);
        DECREF(url);
        DECREF(title);
        DECREF(hit);
        i++;
    }

    printf("Search: %8.5f QPS\n", (1 *
((double)CLOCKS_PER_SEC/((double)clock()-start))) );

#ifdef PERF_INSTRUMENT
  printf("================================\n");
  printf("For Searching: %lld instructions\n", (perf_per_instr_event_read()
- beginI) );
#endif

    DECREF(url_str);
    DECREF(title_str);
    DECREF(hits);
    DECREF(query);
    DECREF(query_str);
    if( queryType[g_testProximity] )
    {
      DECREF(pquery);
    }
#ifdef ENABLE_HIGHLIGHTER
    DECREF(highlighter);
#endif
    DECREF(content_str);
    DECREF(qparser);
    DECREF(searcher);
    DECREF(folder);
    return 0;
}





--
View this message in context: http://lucene.472066.n3.nabble.com/lucy-user-ProxmityQuery-in-C-tp4320613p4321024.html
Sent from the lucy-user mailing list archive at Nabble.com.

Mime
View raw message