Return-Path: X-Original-To: apmail-lucene-lucene-net-commits-archive@www.apache.org Delivered-To: apmail-lucene-lucene-net-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 1E05A227A for ; Sat, 7 May 2011 05:28:52 +0000 (UTC) Received: (qmail 4016 invoked by uid 500); 7 May 2011 05:28:52 -0000 Delivered-To: apmail-lucene-lucene-net-commits-archive@lucene.apache.org Received: (qmail 3972 invoked by uid 500); 7 May 2011 05:28:51 -0000 Mailing-List: contact lucene-net-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: lucene-net-dev@lucene.apache.org Delivered-To: mailing list lucene-net-commits@lucene.apache.org Received: (qmail 3958 invoked by uid 99); 7 May 2011 05:28:49 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 07 May 2011 05:28:49 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 07 May 2011 05:28:46 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id C844923888E4; Sat, 7 May 2011 05:28:26 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Date: Sat, 07 May 2011 05:28:26 -0000 To: lucene-net-commits@lucene.apache.org From: pnasser@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110507052826.C844923888E4@eris.apache.org> Subject: [Lucene.Net] svn commit: r1100452 - in /incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core: ./ Search/Regex/ Author: pnasser Date: Sat May 7 05:28:26 2011 New Revision: 1100452 URL: http://svn.apache.org/viewvc?rev=1100452&view=rev Log: LUCENENET-330 Search.Regex minimal port Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/CSharpRegexCapabilities.cs incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/IRegexCapabilities.cs incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/IRegexQueryCapable.cs incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/RegexQuery.cs incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/RegexTermEnum.cs incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/SpanRegexQuery.cs Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Lucene.Net.csproj Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Lucene.Net.csproj URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Lucene.Net.csproj?rev=1100452&r1=1100451&r2=1100452&view=diff ============================================================================== --- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Lucene.Net.csproj (original) +++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Lucene.Net.csproj Sat May 7 05:28:26 2011 @@ -527,6 +527,7 @@ Code + Code @@ -619,6 +620,8 @@ Code + + Code @@ -681,6 +684,8 @@ Code + + Code @@ -727,6 +732,7 @@ + Code Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/CSharpRegexCapabilities.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/CSharpRegexCapabilities.cs?rev=1100452&view=auto ============================================================================== --- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/CSharpRegexCapabilities.cs (added) +++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/CSharpRegexCapabilities.cs Sat May 7 05:28:26 2011 @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search.Regex +{ + /// + /// C# Regex based implementation of . + /// + /// http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java.htm + public class CSharpRegexCapabilities : IRegexCapabilities, IEquatable + { + private System.Text.RegularExpressions.Regex _rPattern; + + /// + /// Called by the constructor of allowing implementations to cache + /// a compiled version of the regular expression pattern. + /// + /// regular expression pattern + public void Compile(string pattern) + { + _rPattern = new System.Text.RegularExpressions.Regex(pattern, + System.Text.RegularExpressions.RegexOptions.Compiled); + } + + /// + /// True on match. + /// + /// text to match + /// true on match + public bool Match(string s) + { + return _rPattern.IsMatch(s); + } + + /// + /// A wise prefix implementation can reduce the term enumeration (and thus performance) + /// of RegexQuery dramatically. + /// + /// static non-regex prefix of the pattern last passed to . + /// May return null + public string Prefix() + { + return null; + } + + /// + /// Indicates whether the current object is equal to another object of the same type. + /// + /// + /// true if the current object is equal to the parameter; otherwise, false. + /// + /// An object to compare with this object + public bool Equals(CSharpRegexCapabilities other) + { + if (other == null) return false; + if (this == other) return true; + + if (_rPattern != null ? !_rPattern.Equals(other._rPattern) : other._rPattern != null) + return false; + + return true; + } + + public override bool Equals(object obj) + { + if (obj as CSharpRegexCapabilities == null) return false; + return Equals((CSharpRegexCapabilities) obj); + } + + public override int GetHashCode() + { + return (_rPattern != null ? _rPattern.GetHashCode() : 0); + } + } +} Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/IRegexCapabilities.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/IRegexCapabilities.cs?rev=1100452&view=auto ============================================================================== --- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/IRegexCapabilities.cs (added) +++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/IRegexCapabilities.cs Sat May 7 05:28:26 2011 @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Lucene.Net.Search.Regex +{ + /// + /// Defines basic operations needed by {@link RegexQuery} for a regular expression implementation. + /// + /// http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/RegexCapabilities.java.htm + public interface IRegexCapabilities + { + /// + /// Called by the constructor of allowing implementations to cache + /// a compiled version of the regular expression pattern. + /// + /// regular expression pattern + void Compile(string pattern); + + /// + /// True on match. + /// + /// text to match + /// true on match + bool Match(string s); + + /// + /// A wise prefix implementation can reduce the term enumeration (and thus performance) + /// of RegexQuery dramatically. + /// + /// static non-regex prefix of the pattern last passed to . + /// May return null + string Prefix(); + } +} Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/IRegexQueryCapable.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/IRegexQueryCapable.cs?rev=1100452&view=auto ============================================================================== --- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/IRegexQueryCapable.cs (added) +++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/IRegexQueryCapable.cs Sat May 7 05:28:26 2011 @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Lucene.Net.Search.Regex +{ + /// + /// Defines methods for regular expression supporting queries to use. + /// + /// http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/RegexQueryCapable.java.htm + public interface IRegexQueryCapable + { + void SetRegexImplementation(IRegexCapabilities impl); + IRegexCapabilities GetRegexImplementation(); + } +} Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/RegexQuery.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/RegexQuery.cs?rev=1100452&view=auto ============================================================================== --- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/RegexQuery.cs (added) +++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/RegexQuery.cs Sat May 7 05:28:26 2011 @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Index; + +namespace Lucene.Net.Search.Regex +{ + /// + /// Regular expression based query. + /// + /// http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/RegexQuery.java.htm + public class RegexQuery : MultiTermQuery, IRegexQueryCapable, IEquatable + { + private IRegexCapabilities _regexImpl = new CSharpRegexCapabilities(); + + public RegexQuery(Term term) : base(term) + { + } + + /// Construct the enumeration to be used, expanding the pattern term. + public override FilteredTermEnum GetEnum(IndexReader reader) + { + Term term = new Term(GetTerm().Field(), GetTerm().Text()); + return new RegexTermEnum(reader, term, _regexImpl); + } + + public void SetRegexImplementation(IRegexCapabilities impl) + { + _regexImpl = impl; + } + + public IRegexCapabilities GetRegexImplementation() + { + return _regexImpl; + } + + /// + /// Indicates whether the current object is equal to another object of the same type. + /// + /// + /// true if the current object is equal to the parameter; otherwise, false. + /// + /// An object to compare with this object + public bool Equals(RegexQuery other) + { + if (other == null) return false; + if (this == other) return true; + + if (!base.Equals(other)) return false; + return _regexImpl.Equals(other._regexImpl); + } + + public override bool Equals(object obj) + { + if ((obj == null) || (obj as RegexQuery == null)) return false; + if (this == obj) return true; + + return Equals((RegexQuery) obj); + } + + public override int GetHashCode() + { + return 29 * base.GetHashCode() + _regexImpl.GetHashCode(); + } + } +} Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/RegexTermEnum.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/RegexTermEnum.cs?rev=1100452&view=auto ============================================================================== --- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/RegexTermEnum.cs (added) +++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/RegexTermEnum.cs Sat May 7 05:28:26 2011 @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Lucene.Net.Index; + +namespace Lucene.Net.Search.Regex +{ + /// + /// Subclass of FilteredTermEnum for enumerating all terms that match the + /// specified regular expression term using the specified regular expression + /// implementation. + /// Term enumerations are always ordered by Term.compareTo(). Each term in + /// the enumeration is greater than all that precede it. + /// + /// http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/RegexTermEnum.java.htm + public class RegexTermEnum : FilteredTermEnum + { + private string _sField = ""; + private string _sPre = ""; + private bool _bEndEnum; + private readonly IRegexCapabilities _regexImpl; + + public RegexTermEnum(IndexReader reader, Term term, IRegexCapabilities regexImpl) + { + _sField = term.Field(); + string sText = term.Text(); + + _regexImpl = regexImpl; + + _regexImpl.Compile(sText); + + _sPre = _regexImpl.Prefix() ?? ""; + + SetEnum(reader.Terms(new Term(term.Field(), _sPre))); + } + + /// Equality compare on the term + public override bool TermCompare(Term term) + { + if (_sField == term.Field()) + { + string sSearchText = term.Text(); + if (sSearchText.StartsWith(_sPre)) return _regexImpl.Match(sSearchText); + } //eif + + _bEndEnum = true; + return false; + } + + /// Equality measure on the term + public override float Difference() + { + // TODO: adjust difference based on distance of searchTerm.text() and term().text() + return 1.0F; + } + + /// Indicates the end of the enumeration has been reached + public override bool EndEnum() + { + return _bEndEnum; + } + + //public override void Close() + //{ + // base.Close(); + // _sField = null; + //} + } +} Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/SpanRegexQuery.cs URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/SpanRegexQuery.cs?rev=1100452&view=auto ============================================================================== --- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/SpanRegexQuery.cs (added) +++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/core/Search/Regex/SpanRegexQuery.cs Sat May 7 05:28:26 2011 @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Text; +using Lucene.Net.Index; +using Lucene.Net.Search.Spans; +using Lucene.Net.Util; + +namespace Lucene.Net.Search.Regex +{ + /// + /// A SpanQuery version of allowing regular expression queries to be nested + /// within other SpanQuery subclasses. + /// + /// http://www.java2s.com/Open-Source/Java-Document/Net/lucene-connector/org/apache/lucene/search/regex/SpanRegexQuery.java.htm + public class SpanRegexQuery : SpanQuery, IRegexQueryCapable, IEquatable + { + private IRegexCapabilities _regexImpl = new CSharpRegexCapabilities(); + private readonly Term _term; + + public SpanRegexQuery(Term term) + { + _term = term; + } + + public Term GetTerm() + { + return _term; + } + + public override string ToString(string field) + { + StringBuilder sb = new StringBuilder(); + sb.Append("SpanRegexQuery("); + sb.Append(_term); + sb.Append(')'); + sb.Append(ToStringUtils.Boost(GetBoost())); + return sb.ToString(); + } + + public override Query Rewrite(IndexReader reader) + { + RegexQuery orig = new RegexQuery(_term); + orig.SetRegexImplementation(_regexImpl); + + // RegexQuery (via MultiTermQuery).Rewrite always returns a BooleanQuery + orig.SetRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); //@@ + BooleanQuery bq = (BooleanQuery) orig.Rewrite(reader); + + BooleanClause[] clauses = bq.GetClauses(); + SpanQuery[] sqs = new SpanQuery[clauses.Length]; + for (int i = 0; i < clauses.Length; i++) + { + BooleanClause clause = clauses[i]; + + // Clauses from RegexQuery.Rewrite are always TermQuery's + TermQuery tq = (TermQuery) clause.GetQuery(); + + sqs[i] = new SpanTermQuery(tq.GetTerm()); + sqs[i].SetBoost(tq.GetBoost()); + } //efor + + SpanOrQuery query = new SpanOrQuery(sqs); + query.SetBoost(orig.GetBoost()); + + return query; + } + + /// Expert: Returns the matches for this query in an index. Used internally + /// to search for spans. + /// + public override Spans.Spans GetSpans(IndexReader reader) + { + throw new InvalidOperationException("Query should have been rewritten"); + } + + /// Returns the name of the field matched by this query. + public override string GetField() + { + return _term.Field(); + } + + /// Returns a collection of all terms matched by this query. + /// use extractTerms instead + /// + /// + /// + public override IList GetTerms() + { + IList terms = new List {_term}; + return terms; + } + + public void SetRegexImplementation(IRegexCapabilities impl) + { + _regexImpl = impl; + } + + public IRegexCapabilities GetRegexImplementation() + { + return _regexImpl; + } + + /// + /// Indicates whether the current object is equal to another object of the same type. + /// + /// + /// true if the current object is equal to the parameter; otherwise, false. + /// + /// An object to compare with this object. + /// + public bool Equals(SpanRegexQuery other) + { + if (other == null) return false; + if (this == other) return true; + + if (!_regexImpl.Equals(other._regexImpl)) return false; + if (!_term.Equals(other._term)) return false; + + return true; + } + + /// + /// True if this object equals the specified object. + /// + /// object + /// true on equality + public override bool Equals(object obj) + { + if (obj as SpanRegexQuery == null) return false; + + return Equals((SpanRegexQuery) obj); + } + + /// + /// Get hash code for this object. + /// + /// hash code + public override int GetHashCode() + { + return 29 * _regexImpl.GetHashCode() + _term.GetHashCode(); + } + } +}