lucenenet-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Sergey Zaharov <sergozaha...@gmail.com>
Subject Re: Parse query with special characters
Date Fri, 16 Jun 2017 10:32:25 GMT
Hi,

there is full code of console application

using System;
using System.Collections.Generic;
using System.Linq;
using Lucene.Net.Analysis.Core;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers.Classic;
using Lucene.Net.QueryParsers.Flexible.Standard;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.Util;

namespace Lucene4TestWSA
{
    class Program
    {
        private const string FIELD_BODY = "postBody";
        private const string FIELD_SECURITY = "Security";

        private static IndexWriter _writer;
        private static Directory _directory;
        private static WhitespaceAnalyzer _analyzer;
        private static IndexReader _indexReader;
        private static IndexSearcher _searcher;
        private static IndexWriterConfig _cfg;

        private static void AddNewItem(FullTextIndexItem item)
        {
            if (_writer == null) return;
            var doc = new Document();

           var objectText = (item.ObjectText ?? "");
            doc.Add(new TextField(FIELD_BODY, objectText, Field.Store.NO));

            var securCodes = (item.Access == null || item.Access.All(x =>
x.Key == 0))
                ? "?"
                : string.Join(" ", item.Access.Where(x => x.Key !=
0).Select(x => x.Key.ToString() + x.Info).ToList());
            doc.Add(new TextField(FIELD_SECURITY, securCodes.ToLower(),
Field.Store.YES));

            _writer.AddDocument(doc);
        }

        static void Main(string[] args)
        {

            var dir = @"c:\TestLuceneDir";
            if (System.IO.Directory.Exists(dir))
            {
                System.IO.Directory.Delete(dir, true);
            }

            var di = System.IO.Directory.CreateDirectory(dir);
            var _directory = FSDirectory.Open(di);
            _analyzer = new WhitespaceAnalyzer(LuceneVersion.LUCENE_48);
            _cfg = new IndexWriterConfig(LuceneVersion.LUCENE_48,
_analyzer);
            var writer = new IndexWriter(_directory, _cfg);
            writer.Commit();
            writer.Dispose();
            _cfg = null;

            _indexReader = DirectoryReader.Open(_directory);
            _searcher = new IndexSearcher(_indexReader);

            var analyzer = new WhitespaceAnalyzer(LuceneVersion.LUCENE_48);
            _cfg = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
            _writer = new IndexWriter(_directory, _cfg);

            AddNewItem(new FullTextIndexItem
            {
                ObjectText = "111 !!222 333 qqq",

                Access = new List<FullTextIndexItemAccessInfo>()
                {
                    new FullTextIndexItemAccessInfo() { Key = 1037, Info =
"PW???"},
                    new FullTextIndexItemAccessInfo() { Key = 1041, Info =
"P????"}
                }
            });

            AddNewItem(new FullTextIndexItem
            {
                ObjectText = "aaa bbb ccc qqq",
                Access = new List<FullTextIndexItemAccessInfo>()
                {
                    new FullTextIndexItemAccessInfo() { Key = 1037, Info =
"PW???"},
                    new FullTextIndexItemAccessInfo() { Key = 1042, Info =
"PW??C"}
                }
            });

            _writer.Commit();
            _writer.Dispose();
            _writer = null;
            _cfg = null;
            _indexReader = DirectoryReader.Open(_directory);
            _searcher = new IndexSearcher(_indexReader);

            _analyzer = new WhitespaceAnalyzer(LuceneVersion.LUCENE_48);
            var boolQry = new BooleanQuery();

            var parser = new QueryParser(LuceneVersion.LUCENE_48,
FIELD_BODY, _analyzer) { AllowLeadingWildcard = true };
            var textQry = parser.Parse("*22/*");
            boolQry.Add(textQry, Occur.MUST);
            var an = new WhitespaceAnalyzer(LuceneVersion.LUCENE_48);
            var localParser = new QueryParser(LuceneVersion.LUCENE_48,
FIELD_SECURITY, an);

            var localQry = localParser.Parse("1037p????");

            boolQry.Add(localQry, Occur.MUST);

            var qryRes = _searcher.Search(boolQry, 1000);

            Console.WriteLine($"Result found {qryRes.TotalHits}");
            Console.ReadLine();
        }
    }

    public class FullTextIndexItemAccessInfo
    {
        public int Key { get; set; }
        public string Info { get; set; }
    }

    public class FullTextIndexItem
    {
        public string ObjectText { get; set; }
        public List<FullTextIndexItemAccessInfo> Access { get; set; }
    }
}

Hope that would help.

Manz thanks,
Sergey

2017-06-16 9:39 GMT+02:00 Prescott Nasser <geobmx540@hotmail.com>:

> Adding Sergey who isn't subscribed to the mailing list..
>
> ------
>
> Sergey,
>
> Please provide the actual code, not a screenshot. Apparently, the mailing
> list server strips out images from the email, so it is impossible to help
> you without knowing what you are doing.
>
> Thanks,
> Shad Storhaug (NightOwl888)
>
> -----Original Message-----
> From: Sergey Zaharov [mailto:sergozaharov@gmail.com]
> Sent: Friday, June 16, 2017 2:16 PM
> To: user@lucenenet.apache.org
> Subject: Parse query with special characters
>
> Hi all,
>
> could you please help me with next issue: i have a problem with parsing
> strings with some special characters. Example on the screenshot, Also error
> comes when i parse string like "!!22", but in the same time string "!22"
> parsed normally.
> [image: Встроенное изображение 1]
>
> So question is how can i parse ALL possible strings that comes from user?
> may be i need another parser? Otherwise should i clear query string from
> some characters/combinations and if yes, then where can i take that list?
> Probably, it exists some utils that could help normalize query string.
>
> Thanks you in advance.
>
> --
> Best regards, Sergey.
>
> --
> Best regards, Sergey.
>



-- 
Best regards, Sergey.

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message