Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 492AE200C2D for ; Sat, 18 Feb 2017 01:35:44 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id 479B4160B6D; Sat, 18 Feb 2017 00:35:44 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 6C9B3160B57 for ; Sat, 18 Feb 2017 01:35:43 +0100 (CET) Received: (qmail 80003 invoked by uid 500); 18 Feb 2017 00:35:42 -0000 Mailing-List: contact user-help@lucy.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: user@lucy.apache.org Delivered-To: mailing list user@lucy.apache.org Received: (qmail 79992 invoked by uid 500); 18 Feb 2017 00:35:42 -0000 Delivered-To: apmail-incubator-lucy-user@incubator.apache.org Received: (qmail 79989 invoked by uid 500); 18 Feb 2017 00:35:42 -0000 Delivered-To: apmail-lucene-lucy-user@lucene.apache.org Received: (qmail 79986 invoked by uid 99); 18 Feb 2017 00:35:42 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd4-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 18 Feb 2017 00:35:42 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd4-us-west.apache.org (ASF Mail Server at spamd4-us-west.apache.org) with ESMTP id EBEA8C023B for ; Sat, 18 Feb 2017 00:35:41 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd4-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: 3.487 X-Spam-Level: *** X-Spam-Status: No, score=3.487 tagged_above=-999 required=6.31 tests=[DKIM_ADSP_CUSTOM_MED=0.001, NML_ADSP_CUSTOM_MED=1.2, RCVD_IN_DNSWL_NONE=-0.0001, SPF_SOFTFAIL=0.972, URIBL_BLOCKED=0.001, URI_HEX=1.313] autolearn=disabled Received: from mx1-lw-us.apache.org ([10.40.0.8]) by localhost (spamd4-us-west.apache.org [10.40.0.11]) (amavisd-new, port 10024) with ESMTP id cKILjx9sZq3i for ; Sat, 18 Feb 2017 00:35:39 +0000 (UTC) Received: from mwork.nabble.com (mwork.nabble.com [162.253.133.43]) by mx1-lw-us.apache.org (ASF Mail Server at mx1-lw-us.apache.org) with ESMTP id 5E57E5F47D for ; Sat, 18 Feb 2017 00:35:39 +0000 (UTC) Received: from mben.nabble.com (unknown [162.253.133.72]) by mwork.nabble.com (Postfix) with ESMTP id A88262D83CCA4 for ; Fri, 17 Feb 2017 17:35:38 -0700 (MST) Date: Fri, 17 Feb 2017 17:35:38 -0700 (MST) From: kasilak To: lucy-user@lucene.apache.org Message-ID: <1487378138687-4321024.post@n3.nabble.com> In-Reply-To: References: <7376c7b4-3863-9fbd-eab6-b850455a6b74@peknet.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Subject: Re: [lucy-user] ProxmityQuery in C archived-at: Sat, 18 Feb 2017 00:35:44 -0000 Sharing the complete C code for search.c. Please look for g_testProximity to follow the proximity query related changes. As such the default QParser, handles the follow query strings passed from command line arguments without issues. I need not make any code changes. (1) "a AND b" (2) "a or b" (3) "a NOT b" (4) "a b" But what is failing is the case "a b"~100, though my indexed documents have the necessary terms with a span of 100 words. #include #include #include #define CFISH_USE_SHORT_NAMES #define LUCY_USE_SHORT_NAMES #include "Clownfish/String.h" #include "Clownfish/Vector.h" #include "Lucy/Document/HitDoc.h" #include "Lucy/Highlight/Highlighter.h" #include "Lucy/Plan/Schema.h" #include "Lucy/Search/ANDQuery.h" #include "Lucy/Search/Hits.h" #include "Lucy/Search/IndexSearcher.h" #include "Lucy/Search/TermQuery.h" #include "Lucy/Search/QueryParser.h" #include "LucyX/Search/ProximityQuery.h" #include "Lucy/Analysis/Analyzer.h" #include "Clownfish/TestHarness/TestUtils.h" #include "QUtils.h" #include "version.h" char path_to_index[100] = "./lucy_index/lucy_index"; #define ENABLE_HIGHLIGHTER // Test Configuration enum { g_testDefault = 0, //QParser supports BOOLEAN/TERM queries g_testProximity = 1, //To support proximity queries g_testMax = 2 }; typedef struct TestOpts_ { const char* name; }TestOpts; static void S_usage_and_exit(const char *arg0) { printf("Usage: %s [-p platform] [-a angel signals] [-s ] [-c ] \n", arg0); exit(1); } int main(int argc, char *argv[]) { fprintf( stderr, "Search Version: %d.%d\n", MAJOR_VERSION, MINOR_VERSION); bool isEnableAngelSignals = false; uint32_t docCount = 0; uint32_t numWanted = 10; // Initialize the library. lucy_bootstrap_parcel(); const char *category = NULL; const char *platform = NULL; const char *testQuery = NULL; TestOpts g_testopts[] = { { "default" }, { "proximity"}, }; bool queryType[g_testMax] = {false}; int i = 1; uint32_t j; while (i < argc - 1) { if (strcmp(argv[i], "-p") == 0) { if (i + 1 >= argc) { S_usage_and_exit(argv[0]); } i += 1; platform = argv[i]; } else if (strcmp(argv[i], "-a") == 0) { if (i + 1 >= argc) { S_usage_and_exit(argv[0]); } i += 1; isEnableAngelSignals = argv[i]; } else if (strcmp(argv[i], "-s") == 0) { if (i + 1 >= argc) { S_usage_and_exit(argv[0]); } i += 1; docCount = atol(argv[i]); } else if (strcmp(argv[i], "-c") == 0) { if (i + 1 >= argc) { //S_usage_and_exit(argv[0]); } i += 1; category = argv[i]; printf("Category given: %s\n\n", category); } else if (strcmp(argv[i], "-T") == 0) { if (i + 1 >= argc) { S_usage_and_exit(argv[0]); } i += 1; testQuery = argv[i]; char *opt = (char *)&testQuery[0]; bool found = false; for(j = 0; j < sizeof(g_testopts)/sizeof(TestOpts); j++) { if (strcmp(opt, g_testopts[j].name) == 0) { queryType[j] = true; printf( "Testing Query: %s\n", g_testopts[j].name); found = true; break; } } if (!found) { printf("Invalid option: -T=%s\n", testQuery); printf("Valid tests: -T=%s", g_testopts[0].name); for(j = 1; j < sizeof(g_testopts)/sizeof(TestOpts); j++) { printf( ",%s", g_testopts[j].name); } printf( "\n"); exit(0); } } else { S_usage_and_exit(argv[0]); } i += 1; } if (i + 1 != argc) { S_usage_and_exit(argv[0]); } const char *query_c = argv[i]; printf("Searching for: %s with # of hits: %d \n\n", query_c, numWanted); #ifdef PERF_INSTRUMENT perf_event_init( (enable_perf_events) (ENABLE_HW_CYCLES_PER | ENABLE_HW_INSTRS_PER) ); #endif char buff1[100]; sprintf(buff1, "%s%d%s%s", "-", docCount, "-", platform); strcat(path_to_index, buff1); String *folder = Str_newf("%s", path_to_index); printf("Index file used: %s\n", path_to_index); #ifdef PERF_INSTRUMENT perf_event_enable ( (enable_perf_events) (ENABLE_HW_CYCLES_PER | ENABLE_HW_INSTRS_PER) ); uint64_t beginI = perf_per_instr_event_read(); #endif double start = (double)clock(); IndexSearcher *searcher = IxSearcher_new((Obj*)folder); Schema *schema = IxSearcher_Get_Schema(searcher); String *query_str = Str_newf("%s", query_c); QueryParser *qparser = QParser_new(schema, NULL, NULL, NULL); ProximityQuery *pquery = NULL; Query *query = NULL; query = QParser_Parse(qparser, query_str); String *content_str = Str_newf("content"); #ifdef ENABLE_HIGHLIGHTER Highlighter *highlighter = Highlighter_new((Searcher*)searcher, (Obj*)query, content_str, 200); #endif if (category) { String *category_name = Str_newf("category"); String *category_str = Str_newf("%s", category); TermQuery *category_query = TermQuery_new(category_name, (Obj*)category_str); Vector *children = Vec_new(2); Vec_Push(children, (Obj*)query); Vec_Push(children, (Obj*)category_query); query = (Query*)ANDQuery_new(children); DECREF(children); DECREF(category_str); DECREF(category_name); } //To handle proximity queries if( queryType[g_testProximity] ) { Vector *terms = Vec_new(0); Vec_Push(terms, (Obj*)query_str); String *field_name = Str_newf("content"); pquery = (Query*)ProximityQuery_new(field_name, terms, 100); Vector *children = Vec_new(2); Vec_Push(children, (Obj*) query); Vec_Push(children, (Obj*) pquery); query = (Query*) (children); //??? DECREF(children); DECREF(field_name); DECREF(terms); } Hits *hits; if ( queryType[g_testDefault] ) { hits = IxSearcher_Hits(searcher, (Obj*)query, 0, numWanted, NULL); } else { hits = IxSearcher_Hits(searcher, (Obj*)query, 0, numWanted, NULL); } String *title_str = Str_newf("title"); String *url_str = Str_newf("url"); HitDoc *hit; i = 1; // Loop over search results. while (NULL != (hit = Hits_Next(hits))) { String *title = (String*)HitDoc_Extract(hit, title_str); char *title_c = Str_To_Utf8(title); String *url = (String*)HitDoc_Extract(hit, url_str); char *url_c = Str_To_Utf8(url); #ifdef ENABLE_HIGHLIGHTER String *excerpt = Highlighter_Create_Excerpt(highlighter, hit); char *excerpt_c = Str_To_Utf8(excerpt); printf("Result %d: %s (%s)\n%s\n\n", i, title_c, url_c, excerpt_c); free(excerpt_c); DECREF(excerpt); #else printf("Result %d: %s (%s)\n\n", i, title_c, url_c); #endif free(url_c); free(title_c); DECREF(url); DECREF(title); DECREF(hit); i++; } printf("Search: %8.5f QPS\n", (1 * ((double)CLOCKS_PER_SEC/((double)clock()-start))) ); #ifdef PERF_INSTRUMENT printf("================================\n"); printf("For Searching: %lld instructions\n", (perf_per_instr_event_read() - beginI) ); #endif DECREF(url_str); DECREF(title_str); DECREF(hits); DECREF(query); DECREF(query_str); if( queryType[g_testProximity] ) { DECREF(pquery); } #ifdef ENABLE_HIGHLIGHTER DECREF(highlighter); #endif DECREF(content_str); DECREF(qparser); DECREF(searcher); DECREF(folder); return 0; } -- View this message in context: http://lucene.472066.n3.nabble.com/lucy-user-ProxmityQuery-in-C-tp4320613p4321024.html Sent from the lucy-user mailing list archive at Nabble.com.