incubator-lucy-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From goran kent <gorank...@gmail.com>
Subject Re: [lucy-user] ClusterSearcher and excerpt/highlighting
Date Tue, 29 Nov 2011 07:20:38 GMT
On Mon, Nov 28, 2011 at 10:26 PM, Marvin Humphrey
<marvin@rectangular.com> wrote:
> Do you have a Stopalizer as part of your Analyzer chain?

Yes, here's the skinny (stupid gmail doesn't have fixed-width font, sorry):

###
### Client
###

use Lucy::Search::SortSpec;
use LucyX::Remote::ClusterSearcher;

my @SEARCH_FIELDS = qw(title body);
my $schema       = Lucy::Plan::Schema->new;
my $case_folder  = Lucy::Analysis::CaseFolder->new;
my $tokenizer    = Lucy::Analysis::RegexTokenizer->new;
my $stemmer      = Lucy::Analysis::SnowballStemmer->new( language => 'en' );
my $stopfilter   =
    Lucy::Analysis::SnowballStopFilter->new( language => 'en' );
my $polyanalyzer =
    Lucy::Analysis::PolyAnalyzer->new(
        analyzers => [ $case_folder, $tokenizer, $stopfilter, $stemmer, ],
);

# usual schema stuff:
$schema->spec_field(...);

my $searcher = LucyX::Remote::ClusterSearcher->new(...);

my $sort_spec = Lucy::Search::SortSpec->new(
   rules => [ Lucy::Search::SortRule->new(
           field => 'my_field, reverse => 1 ), ],
);

my $query_parser = Lucy::Search::QueryParser->new(
   schema => $schema,
   fields => [@SEARCH_FIELDS],
   default_boolop => 'AND',
);

$query_parser->set_heed_colons(1);

my $parsed_query = $query_parser->parse($query);
my $query_compiler = $parsed_query->make_compiler( searcher => $searcher );

my $hits = eval {
   $searcher->hits(
       query      => $query_compiler,
       sort_spec  => $sort_spec,
       offset     => 0,
       num_wanted => 10,
   );
};

my $body_highlighter = Lucy::Highlight::Highlighter->new(
    searcher       => $searcher,
    query          => $query_compiler,
    field          => 'body',
    excerpt_length => 100,
);

my $title_highlighter = Lucy::Highlight::Highlighter->new(
    searcher       => $searcher,
    query          => $query_compiler,
    field          => 'title',
    excerpt_length => 50,
);

$body_highlighter->set_pre_tag('<em>');
$title_highlighter->set_pre_tag('<em>');
$body_highlighter->set_post_tag('</em>');
$title_highlighter->set_post_tag('</em>');

...usual display stuff...

###
### Server
###

use Lucy::Search::PolySearcher;
use Lucy::Search::IndexSearcher;
use LucyX::Remote::SearchServer;

my $schema      = Lucy::Plan::Schema->new;
my $case_folder = Lucy::Analysis::CaseFolder->new;
my $tokenizer   = Lucy::Analysis::RegexTokenizer->new;
my $stemmer =
    Lucy::Analysis::SnowballStemmer->new( language => 'en' );
my $stopfilter =
    Lucy::Analysis::SnowballStopFilter->new( language => 'en' );
my $polyanalyzer =
    Lucy::Analysis::PolyAnalyzer->new(
    analyzers => [ $case_folder, $tokenizer, $stopfilter, $stemmer, ], );

# usual schema stuff:
$schema->spec_field(...);

foreach my $p (@idx_paths) {
    push @searcher, Lucy::Search::IndexSearcher->new( index => $p, )
}

my $poly_searcher = Lucy::Search::PolySearcher->new(
    schema    => $schema,
    searchers => \@searcher,
);

my $search_server = LucyX::Remote::SearchServer->new(
    searcher   => $poly_searcher,
    port            => 7890,
    password => 'pw',
);

$search_server->serve;

I think that about covers it.  Pretty basic stuff as you can see.
I've tried to keep things as simple as possible.

--
Regards,
gk

Mime
View raw message