lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Marcelo Neves" <marcelo.ne...@xgen.com.br>
Subject RES: BrazilianAnalyzer don't woks with any BooleanQuery
Date Thu, 12 Jul 2012 18:38:20 GMT
Ok. I'm using positions at ANALYZED fields where search is by terms. The others fields, "NOT_ANALYZED",
the search is by complete term, as culture code, url, document code.
The index has documents in three languages (Spanish, English and Portuguese (BR)). When perform
a search, I realize filters using TermoQuery with fields "NOT_ANALYZED" and use PhraseQuery
or PrefixQuery TermoQuery and added in a BoolenQuery with fields analyzed. The same code works
with StandardAnalyzer. By using BrazilianAnalyzer most terms do not return a result of the
stored documents with this analyzer.

In the code attached to "searchExpresion" is the phrase containing the words entered to search
by the end user. I do search containing all the words, or any of the words or the whole phrase.
This method makes my object "BooleanQuery" only for the content (full text).

Below is a sample of the settings of my columns to index. Some values ??are listed here with
the names of the columns.
Below this code snippet, a snippet of the implementation of the search.

[code]
// Here is the configuration of the fields for each document
		document = new DocumentIndex();
		document.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT, HtmlConvert.Convert(content),
IndexField.ANALYZED, StoreField.YES, StoreTermOccurrencesField.WITHPOSITIONSOFFSETS)
		.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML, HtmlConvert.Convert(nodeXml),
IndexField.ANALYZED, StoreField.YES, StoreTermOccurrencesField.WITHPOSITIONSOFFSETS)
		.AddField(ColumnsIndexedDocuments.COLUMN_SITENAME, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_SITENAME],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
		.AddField(ColumnsIndexedDocuments.COLUMN_NODESITEID, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODESITEID],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
		.AddField(ColumnsIndexedDocuments.COLUMN_CLASSNAME, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_CLASSNAME],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
		.AddField(ColumnsIndexedDocuments.COLUMN_NODEID, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODEID],
""), IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
		.AddField(ColumnsIndexedDocuments.COLUMN_NODEACLID, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODEACLID],
""), IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
		.AddField(ColumnsIndexedDocuments.COLUMN_NODEALIASPATH, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODEALIASPATH],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
		.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTNAMEPATH, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTNAMEPATH],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
		.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTURLPATH, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTURLPATH],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
		.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTMENUITEMHIDEINNAVIGATION, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTMENUITEMHIDEINNAVIGATION],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
		.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTCULTURE, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTCULTURE],
"").ToLower(), IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
		.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTNAME, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTNAME],
""), IndexField.ANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
		.AddField(ColumnsIndexedDocuments.COLUMN_NODENAME, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_NODENAME],
""), IndexField.ANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
		.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTTAGS, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTTAGS],
""), IndexField.ANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
		.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTWASPUBLISHFROM, DateTools.DateToString(ValidationHelper.GetDateTime(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTWASPUBLISHFROM],
DataHelper.DATETIME_MIN_BD, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTCULTURE],
"")), DateTools.Resolution.MILLISECOND), IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO)
		.AddField(ColumnsIndexedDocuments.COLUMN_DOCUMENTWASPUBLISHTO, DateTools.DateToString(ValidationHelper.GetDateTime(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTWASPUBLISHTO],
DataHelper.DATETIME_MIN_BD, ValidationHelper.GetString(node[ColumnsIndexedDocuments.COLUMN_DOCUMENTCULTURE],
"")), DateTools.Resolution.MILLISECOND), IndexField.NOTANALYZED, StoreField.YES, StoreTermOccurrencesField.NO);
		IndexManagerWriter.AddDocument(document, Analyzer);
[/code]

[code]
// Here is the conjunction of queries
		public override DataSet Search(string siteName, string searchNodePath, string cultureCode,
string searchExpression, SearchModeEnum searchMode, bool searchChildNodes, string classNames,
bool filterResultsByReadPermission, bool searchOnlyPublished, string whereCondition, string
orderBy, bool combineWithDefaultCulture, string filterCondition, bool searchDocumentHistory,
bool onlyDocumentHistory)
        {
            //Consulta booleana principal
            BooleanQuery queryMain = new BooleanQuery();

            //Filtra a expressão de busca 
            if (!string.IsNullOrEmpty(searchExpression))
            {
                BooleanQuery queryExpression = GetExpressionBooleanQuery(searchExpression,
searchMode, searchDocumentHistory, onlyDocumentHistory);
                queryMain.Add(queryExpression, BooleanClause.Occur.MUST);
            }

            //Filtra os parâmetros
            BooleanQuery queryParameters = GetParametersBooleanQuery(siteName, searchNodePath,
cultureCode, combineWithDefaultCulture, searchOnlyPublished, classNames);
            //queryMain.Add(queryParameters, BooleanClause.Occur.MUST);
            Filter filterParameters = new QueryFilter(queryParameters);

            //Define os analizadores
            Analyzer = GetAnalyzer(cultureCode);            

            //Cria o objeto de parse
            QueryParser queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
Analyzer);

            //Abre o index para leitura
            IndexManagerReader.OpenReader(SearchProvider.DirectoryPathReaderIndex);

            string[] columns = new string[] { ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML, ColumnsIndexedDocuments.COLUMN_DOCUMENTNAME,
ColumnsIndexedDocuments.COLUMN_DOCUMENTNAMEPATH, ColumnsIndexedDocuments.COLUMN_DOCUMENTURLPATH,
ColumnsIndexedDocuments.COLUMN_NODEALIASPATH, ColumnsIndexedDocuments.COLUMN_NODEID, ColumnsIndexedDocuments.COLUMN_NODESITEID,
ColumnsIndexedDocuments.COLUMN_NODEACLID, ColumnsIndexedDocuments.COLUMN_SITENAME, ColumnsIndexedDocuments.COLUMN_CLASSNAME
};
            string fieldHighlight = CMSContext.ViewMode == ViewModeEnum.LiveSite ? ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT
: ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML;
            return IndexManagerReader.Search(queryParser.Parse(queryMain.ToString()), filterParameters,
ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT, Analyzer, NumberMaxSearchResults, columns,
fieldHighlight, searchExpression);
        }
[/code]

[code]
// Here is the method that receives and filters and query q performs abusca and treats the
result
		public DataSet Search(Lucene.Net.Search.Query query, Filter filter, string mainColumnContentName,
Analyzer analyzer, int top, string[] fieldsReturn, string fieldHighlightName, string searchExpression)
        {
            Lucene.Net.Store.Directory directory = FSDirectory.Open(new DirectoryInfo(directoryPath));
            IndexSearcher searcher = new IndexSearcher(directory, true);
            TopDocs topDocs = searcher.Search(query, filter, top);
            DataSet result = null;
            if (topDocs.scoreDocs != null && topDocs.scoreDocs.Length > 0)
            {
                result = new DataSet();
                DataTable docs = new DataTable("SearchResult");
                docs.Columns.Add("Index");
                for (int i = 0; i < fieldsReturn.Length; i++)
                {
                    if (fieldsReturn[i] == fieldHighlightName)
                    {
                        docs.Columns.Add(mainColumnContentName, typeof(string));
                    }
                    else
                    {
                        if (fieldsReturn[i] != fieldHighlightName && fieldsReturn[i]
!= mainColumnContentName)
                        {
                            docs.Columns.Add(fieldsReturn[i], typeof(string));
                        }
                    }
                }
                for (int j = 0; j < topDocs.scoreDocs.Length; j++)
                {   
                    Document document = searcher.Doc(topDocs.scoreDocs[j].doc);          
         
                    List<string> values = new List<string>();
                    values.Add(topDocs.scoreDocs[j].doc.ToString());
                    for (int k = 0; k < fieldsReturn.Length; k++)
                    {
                        if (fieldsReturn[k] == fieldHighlightName)
                        {   
                            FastVectorHighlighter highlighter = new FastVectorHighlighter();
                            string fragment = highlighter.GetBestFragment(highlighter.GetFieldQuery(query),
searcher.GetIndexReader(), topDocs.scoreDocs[j].doc, fieldHighlightName, NumberMaxCaractersHighlight);
                            values.Add(fragment);
                        }
                        else
                        {
                            if (fieldsReturn[k] == mainColumnContentName)
                                continue;
                            values.Add(document.Get(fieldsReturn[k]));
                        }
                    }
                    docs.Rows.Add(values.ToArray());
                }
                result.Tables.Add(docs);
            }
            return result;
        }
[/code]

It's working perfectly just to other analyzers. With BrazilianAnalyzer I search the term "ferramenta"
and returns nothing. There are documents in "es-ES" and "en" in the index with this term.
If I search by filtering the field "DocumentCulture" with value "es-es" (field of culture
not_analyzed and text field [SnowBallAnalyzer ("Spanish")]) has documents. If I filter by
"pt-BR" (field of culture not_analyzed and text field [BrazilianAnalyzer]) comes nothing.

Is it a bug in [BrazilianAnalyzer]?


-----Mensagem original-----
De: Simon Willnauer [mailto:simon.willnauer@gmail.com] 
Enviada em: quinta-feira, 12 de julho de 2012 04:48
Para: java-user@lucene.apache.org
Assunto: Re: BrazilianAnalyzer don't woks with any BooleanQuery

can you tell us more about your index side of things? Are you using positions in the index
since I see PhraseQuery in your code?
Where are you passing the text you are searching for to the BrasilianAnalyzer, I don't see
it in your code. You need to process you text at search time too to get results.

simon

On Wed, Jul 11, 2012 at 5:32 PM, Marcelo Neves <marcelo.neves@xgen.com.br>wrote:

> Hi all,****
>
> ** **
>
> I create a method above que generate my boolean query based in many 
> parameters. The query's on not analyzed fields works perfect in 
> debug.****
>
> When start a search using any analyzed field with BrazilianAnalyzer, 
> always a return empty result (zero docs). I do test in separeted 
> solution with a unique field with Brazilian Analyzer in indexing and 
> searching. If use a BooleanQuery and not queryparse, don't works as 
> expected. Return empty result.****
>
> When change for StandardAnalyzer on Indexing and Serching, without 
> alter other parts of code, works. ****
>
> ** **
>
> Could anyone help me?****
>
> [Code]****
>
> ** **
>
> protected virtual BooleanQuery GetExpressionBooleanQuery(string 
> searchExpression, SearchModeEnum searchMode, bool 
> searchDocumentHistory, bool onlyDocumentHistory)****
>
> {****
>
>                 if (string.IsNullOrEmpty(searchExpression))****
>
>                 {****
>
>                                return null;****
>
>                 }****
>
> ** **
>
>                 BooleanQuery queryWords = new BooleanQuery();****
>
>                 PhraseQuery phrase = new PhraseQuery();****
>
>                 string[] expressions = null;****
>
> ** **
>
>                 expressions = searchExpression.ToLower().Replace("\"",
> "").Split(' ');****
>
>                 for (int i = 0; i < expressions.Length; i++)****
>
>                 {****
>
>                                if
> (!string.IsNullOrEmpty(expressions[i].Trim()))****
>
>                                {****
>
>                                                if (expressions.Length 
> > 1)
> ****
>
>                                                {****
>
>                                                                switch
> (searchMode)****
>
>                                                                {****
>
>
> case SearchModeEnum.ExactPhrase:****
>
>
> if (!onlyDocumentHistory)****
>
>
> {****
>
>
> phrase.Add(new Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
> expressions[i].Trim()), i);****
>
>
> }****
>
>
> if (searchDocumentHistory)****
>
>
> {****
>
>
> phrase.Add(new 
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML,
> expressions[i].Trim()), i);****
>
>
> }****
>
>
> break;****
>
>
> case SearchModeEnum.AnyWord:****
>
>
> if (!onlyDocumentHistory)****
>
>
> {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new 
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
> expressions[i].Trim())), BooleanClause.Occur.SHOULD));****
>
>
> }****
>
>
> if (searchDocumentHistory)****
>
>
> {****
>
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new 
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML,
> expressions[i].Trim())), BooleanClause.Occur.SHOULD));****
>
>
> }****
>
>
> break;****
>
>
> case SearchModeEnum.AllWords:****
>
>
> if (!onlyDocumentHistory)****
>
>
> {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new 
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
> expressions[i].Trim())), BooleanClause.Occur.MUST));****
>
>
> }****
>
>
> if (searchDocumentHistory)****
>
>
> {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new 
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML,
> expressions[i].Trim())), BooleanClause.Occur.MUST));****
>
>
> }****
>
>
> break;****
>
>
> default:****
>
>
> break;****
>
>                                                                }****
>
>                                                }****
>
>                                                else****
>
>                                                {****
>
>                                                                if
> (!onlyDocumentHistory)****
>
>                                                                {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new 
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT,
> expressions[i].Trim())), BooleanClause.Occur.MUST));****
>
>                                                                }****
>
>                                                                if
> (searchDocumentHistory)****
>
>                                                                {****
>
>
> queryWords.Add(new BooleanClause(new TermQuery(new 
> Term(ColumnsIndexedDocuments.COLUMN_DOCUMENTCONTENT_NODEXML,
> expressions[i].Trim())), BooleanClause.Occur.MUST));****
>
>                                                                }****
>
>                                                }****
>
>                                }****
>
>                 }****
>
>                 if (phrase.GetTerms().Length > 0)****
>
>                 {****
>
>                                queryWords.Add(new 
> BooleanClause(phrase,
> BooleanClause.Occur.MUST));****
>
>                 }****
>
>                 return queryWords;****
>
> }****
>
> ** **
>
> [/Code]****
>
> ** **
>
> I wait for help. Please!****
>
> ** **
>
> Thanks!****
>
> ** **
>
> [image: Descrição: marcelo-neves]****
>
> ** **
>



---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message