Return-Path: Delivered-To: apmail-lucene-java-commits-archive@www.apache.org Received: (qmail 57216 invoked from network); 3 Aug 2009 03:39:47 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 3 Aug 2009 03:39:47 -0000 Received: (qmail 55426 invoked by uid 500); 3 Aug 2009 03:39:52 -0000 Delivered-To: apmail-lucene-java-commits-archive@lucene.apache.org Received: (qmail 55374 invoked by uid 500); 3 Aug 2009 03:39:52 -0000 Mailing-List: contact java-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: java-dev@lucene.apache.org Delivered-To: mailing list java-commits@lucene.apache.org Received: (qmail 55365 invoked by uid 99); 3 Aug 2009 03:39:52 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 03 Aug 2009 03:39:52 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 03 Aug 2009 03:39:45 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id D751B23889C1; Mon, 3 Aug 2009 03:38:58 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r800191 [7/12] - in /lucene/java/trunk: ./ contrib/ contrib/queryparser/ contrib/queryparser/src/ contrib/queryparser/src/java/ contrib/queryparser/src/java/org/ contrib/queryparser/src/java/org/apache/ contrib/queryparser/src/java/org/apac... Date: Mon, 03 Aug 2009 03:38:50 -0000 To: java-commits@lucene.apache.org From: buschmi@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090803033858.D751B23889C1@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Added: lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/JavaCharStream.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/JavaCharStream.java?rev=800191&view=auto ============================================================================== --- lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/JavaCharStream.java (added) +++ lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/JavaCharStream.java Mon Aug 3 03:38:44 2009 @@ -0,0 +1,617 @@ +/* Generated By:JavaCC: Do not edit this line. JavaCharStream.java Version 4.1 */ +/* JavaCCOptions:STATIC=false,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package org.apache.lucene.queryParser.original.parser; + +/** + * An implementation of interface CharStream, where the stream is assumed to + * contain only ASCII characters (with java-like unicode escape processing). + */ + +public +class JavaCharStream +{ + /** Whether parser is static. */ + public static final boolean staticFlag = false; + + static final int hexval(char c) throws java.io.IOException { + switch(c) + { + case '0' : + return 0; + case '1' : + return 1; + case '2' : + return 2; + case '3' : + return 3; + case '4' : + return 4; + case '5' : + return 5; + case '6' : + return 6; + case '7' : + return 7; + case '8' : + return 8; + case '9' : + return 9; + + case 'a' : + case 'A' : + return 10; + case 'b' : + case 'B' : + return 11; + case 'c' : + case 'C' : + return 12; + case 'd' : + case 'D' : + return 13; + case 'e' : + case 'E' : + return 14; + case 'f' : + case 'F' : + return 15; + } + + throw new java.io.IOException(); // Should never come here + } + +/** Position in buffer. */ + public int bufpos = -1; + int bufsize; + int available; + int tokenBegin; + protected int bufline[]; + protected int bufcolumn[]; + + protected int column = 0; + protected int line = 1; + + protected boolean prevCharIsCR = false; + protected boolean prevCharIsLF = false; + + protected java.io.Reader inputStream; + + protected char[] nextCharBuf; + protected char[] buffer; + protected int maxNextCharInd = 0; + protected int nextCharInd = -1; + protected int inBuf = 0; + protected int tabSize = 8; + + protected void setTabSize(int i) { tabSize = i; } + protected int getTabSize(int i) { return tabSize; } + + protected void ExpandBuff(boolean wrapAround) + { + char[] newbuffer = new char[bufsize + 2048]; + int newbufline[] = new int[bufsize + 2048]; + int newbufcolumn[] = new int[bufsize + 2048]; + + try + { + if (wrapAround) + { + System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin); + System.arraycopy(buffer, 0, newbuffer, bufsize - tokenBegin, bufpos); + buffer = newbuffer; + + System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin); + System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos); + bufline = newbufline; + + System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin); + System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos); + bufcolumn = newbufcolumn; + + bufpos += (bufsize - tokenBegin); + } + else + { + System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin); + buffer = newbuffer; + + System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin); + bufline = newbufline; + + System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin); + bufcolumn = newbufcolumn; + + bufpos -= tokenBegin; + } + } + catch (Throwable t) + { + throw new Error(t.getMessage()); + } + + available = (bufsize += 2048); + tokenBegin = 0; + } + + protected void FillBuff() throws java.io.IOException + { + int i; + if (maxNextCharInd == 4096) + maxNextCharInd = nextCharInd = 0; + + try { + if ((i = inputStream.read(nextCharBuf, maxNextCharInd, + 4096 - maxNextCharInd)) == -1) + { + inputStream.close(); + throw new java.io.IOException(); + } + else + maxNextCharInd += i; + return; + } + catch(java.io.IOException e) { + if (bufpos != 0) + { + --bufpos; + backup(0); + } + else + { + bufline[bufpos] = line; + bufcolumn[bufpos] = column; + } + throw e; + } + } + + protected char ReadByte() throws java.io.IOException + { + if (++nextCharInd >= maxNextCharInd) + FillBuff(); + + return nextCharBuf[nextCharInd]; + } + +/** @return starting character for token. */ + public char BeginToken() throws java.io.IOException + { + if (inBuf > 0) + { + --inBuf; + + if (++bufpos == bufsize) + bufpos = 0; + + tokenBegin = bufpos; + return buffer[bufpos]; + } + + tokenBegin = 0; + bufpos = -1; + + return readChar(); + } + + protected void AdjustBuffSize() + { + if (available == bufsize) + { + if (tokenBegin > 2048) + { + bufpos = 0; + available = tokenBegin; + } + else + ExpandBuff(false); + } + else if (available > tokenBegin) + available = bufsize; + else if ((tokenBegin - available) < 2048) + ExpandBuff(true); + else + available = tokenBegin; + } + + protected void UpdateLineColumn(char c) + { + column++; + + if (prevCharIsLF) + { + prevCharIsLF = false; + line += (column = 1); + } + else if (prevCharIsCR) + { + prevCharIsCR = false; + if (c == '\n') + { + prevCharIsLF = true; + } + else + line += (column = 1); + } + + switch (c) + { + case '\r' : + prevCharIsCR = true; + break; + case '\n' : + prevCharIsLF = true; + break; + case '\t' : + column--; + column += (tabSize - (column % tabSize)); + break; + default : + break; + } + + bufline[bufpos] = line; + bufcolumn[bufpos] = column; + } + +/** Read a character. */ + public char readChar() throws java.io.IOException + { + if (inBuf > 0) + { + --inBuf; + + if (++bufpos == bufsize) + bufpos = 0; + + return buffer[bufpos]; + } + + char c; + + if (++bufpos == available) + AdjustBuffSize(); + + if ((buffer[bufpos] = c = ReadByte()) == '\\') + { + UpdateLineColumn(c); + + int backSlashCnt = 1; + + for (;;) // Read all the backslashes + { + if (++bufpos == available) + AdjustBuffSize(); + + try + { + if ((buffer[bufpos] = c = ReadByte()) != '\\') + { + UpdateLineColumn(c); + // found a non-backslash char. + if ((c == 'u') && ((backSlashCnt & 1) == 1)) + { + if (--bufpos < 0) + bufpos = bufsize - 1; + + break; + } + + backup(backSlashCnt); + return '\\'; + } + } + catch(java.io.IOException e) + { + // We are returning one backslash so we should only backup (count-1) + if (backSlashCnt > 1) + backup(backSlashCnt-1); + + return '\\'; + } + + UpdateLineColumn(c); + backSlashCnt++; + } + + // Here, we have seen an odd number of backslash's followed by a 'u' + try + { + while ((c = ReadByte()) == 'u') + ++column; + + buffer[bufpos] = c = (char)(hexval(c) << 12 | + hexval(ReadByte()) << 8 | + hexval(ReadByte()) << 4 | + hexval(ReadByte())); + + column += 4; + } + catch(java.io.IOException e) + { + throw new Error("Invalid escape character at line " + line + + " column " + column + "."); + } + + if (backSlashCnt == 1) + return c; + else + { + backup(backSlashCnt - 1); + return '\\'; + } + } + else + { + UpdateLineColumn(c); + return c; + } + } + + @Deprecated + /** + * @deprecated + * @see #getEndColumn + */ + public int getColumn() { + return bufcolumn[bufpos]; + } + + @Deprecated + /** + * @deprecated + * @see #getEndLine + */ + public int getLine() { + return bufline[bufpos]; + } + +/** Get end column. */ + public int getEndColumn() { + return bufcolumn[bufpos]; + } + +/** Get end line. */ + public int getEndLine() { + return bufline[bufpos]; + } + +/** @return column of token start */ + public int getBeginColumn() { + return bufcolumn[tokenBegin]; + } + +/** @return line number of token start */ + public int getBeginLine() { + return bufline[tokenBegin]; + } + +/** Retreat. */ + public void backup(int amount) { + + inBuf += amount; + if ((bufpos -= amount) < 0) + bufpos += bufsize; + } + +/** Constructor. */ + public JavaCharStream(java.io.Reader dstream, + int startline, int startcolumn, int buffersize) + { + inputStream = dstream; + line = startline; + column = startcolumn - 1; + + available = bufsize = buffersize; + buffer = new char[buffersize]; + bufline = new int[buffersize]; + bufcolumn = new int[buffersize]; + nextCharBuf = new char[4096]; + } + +/** Constructor. */ + public JavaCharStream(java.io.Reader dstream, + int startline, int startcolumn) + { + this(dstream, startline, startcolumn, 4096); + } + +/** Constructor. */ + public JavaCharStream(java.io.Reader dstream) + { + this(dstream, 1, 1, 4096); + } +/** Reinitialise. */ + public void ReInit(java.io.Reader dstream, + int startline, int startcolumn, int buffersize) + { + inputStream = dstream; + line = startline; + column = startcolumn - 1; + + if (buffer == null || buffersize != buffer.length) + { + available = bufsize = buffersize; + buffer = new char[buffersize]; + bufline = new int[buffersize]; + bufcolumn = new int[buffersize]; + nextCharBuf = new char[4096]; + } + prevCharIsLF = prevCharIsCR = false; + tokenBegin = inBuf = maxNextCharInd = 0; + nextCharInd = bufpos = -1; + } + +/** Reinitialise. */ + public void ReInit(java.io.Reader dstream, + int startline, int startcolumn) + { + ReInit(dstream, startline, startcolumn, 4096); + } + +/** Reinitialise. */ + public void ReInit(java.io.Reader dstream) + { + ReInit(dstream, 1, 1, 4096); + } +/** Constructor. */ + public JavaCharStream(java.io.InputStream dstream, String encoding, int startline, + int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException + { + this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize); + } + +/** Constructor. */ + public JavaCharStream(java.io.InputStream dstream, int startline, + int startcolumn, int buffersize) + { + this(new java.io.InputStreamReader(dstream), startline, startcolumn, 4096); + } + +/** Constructor. */ + public JavaCharStream(java.io.InputStream dstream, String encoding, int startline, + int startcolumn) throws java.io.UnsupportedEncodingException + { + this(dstream, encoding, startline, startcolumn, 4096); + } + +/** Constructor. */ + public JavaCharStream(java.io.InputStream dstream, int startline, + int startcolumn) + { + this(dstream, startline, startcolumn, 4096); + } + +/** Constructor. */ + public JavaCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException + { + this(dstream, encoding, 1, 1, 4096); + } + +/** Constructor. */ + public JavaCharStream(java.io.InputStream dstream) + { + this(dstream, 1, 1, 4096); + } + +/** Reinitialise. */ + public void ReInit(java.io.InputStream dstream, String encoding, int startline, + int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException + { + ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize); + } + +/** Reinitialise. */ + public void ReInit(java.io.InputStream dstream, int startline, + int startcolumn, int buffersize) + { + ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize); + } +/** Reinitialise. */ + public void ReInit(java.io.InputStream dstream, String encoding, int startline, + int startcolumn) throws java.io.UnsupportedEncodingException + { + ReInit(dstream, encoding, startline, startcolumn, 4096); + } +/** Reinitialise. */ + public void ReInit(java.io.InputStream dstream, int startline, + int startcolumn) + { + ReInit(dstream, startline, startcolumn, 4096); + } +/** Reinitialise. */ + public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException + { + ReInit(dstream, encoding, 1, 1, 4096); + } + +/** Reinitialise. */ + public void ReInit(java.io.InputStream dstream) + { + ReInit(dstream, 1, 1, 4096); + } + + /** @return token image as String */ + public String GetImage() + { + if (bufpos >= tokenBegin) + return new String(buffer, tokenBegin, bufpos - tokenBegin + 1); + else + return new String(buffer, tokenBegin, bufsize - tokenBegin) + + new String(buffer, 0, bufpos + 1); + } + + /** @return suffix */ + public char[] GetSuffix(int len) + { + char[] ret = new char[len]; + + if ((bufpos + 1) >= len) + System.arraycopy(buffer, bufpos - len + 1, ret, 0, len); + else + { + System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0, + len - bufpos - 1); + System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1); + } + + return ret; + } + + /** Set buffers back to null when finished. */ + public void Done() + { + nextCharBuf = null; + buffer = null; + bufline = null; + bufcolumn = null; + } + + /** + * Method to adjust line and column numbers for the start of a token. + */ + public void adjustBeginLineColumn(int newLine, int newCol) + { + int start = tokenBegin; + int len; + + if (bufpos >= tokenBegin) + { + len = bufpos - tokenBegin + inBuf + 1; + } + else + { + len = bufsize - tokenBegin + bufpos + 1 + inBuf; + } + + int i = 0, j = 0, k = 0; + int nextColDiff = 0, columnDiff = 0; + + while (i < len && bufline[j = start % bufsize] == bufline[k = ++start % bufsize]) + { + bufline[j] = newLine; + nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j]; + bufcolumn[j] = newCol + columnDiff; + columnDiff = nextColDiff; + i++; + } + + if (i < len) + { + bufline[j] = newLine++; + bufcolumn[j] = newCol + columnDiff; + + while (i++ < len) + { + if (bufline[j = start % bufsize] != bufline[++start % bufsize]) + bufline[j] = newLine++; + else + bufline[j] = newLine; + } + } + + line = bufline[j]; + column = bufcolumn[j]; + } + +} +/* JavaCC - OriginalChecksum=065d79d49fcd02f542903038e37bd9d9 (do not edit this line) */ Propchange: lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/JavaCharStream.java ------------------------------------------------------------------------------ svn:eol-style = native Added: lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParser.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParser.java?rev=800191&view=auto ============================================================================== --- lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParser.java (added) +++ lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParser.java Mon Aug 3 03:38:44 2009 @@ -0,0 +1,955 @@ +/* Generated By:JavaCC: Do not edit this line. OriginalSyntaxParser.java */ +package org.apache.lucene.queryParser.original.parser; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.StringReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Vector; + +import org.apache.lucene.messages.Message; +import org.apache.lucene.messages.MessageImpl; +import org.apache.lucene.queryParser.core.QueryNodeError; +import org.apache.lucene.queryParser.core.QueryNodeException; +import org.apache.lucene.queryParser.core.QueryNodeParseException; +import org.apache.lucene.queryParser.core.messages.QueryParserMessages; +import org.apache.lucene.queryParser.core.nodes.AndQueryNode; +import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode; +import org.apache.lucene.queryParser.core.nodes.BoostQueryNode; +import org.apache.lucene.queryParser.core.nodes.FieldQueryNode; +import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode; +import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode; +import org.apache.lucene.queryParser.core.nodes.GroupQueryNode; +import org.apache.lucene.queryParser.core.nodes.OpaqueQueryNode; +import org.apache.lucene.queryParser.core.nodes.OrQueryNode; +import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode; +import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode; +import org.apache.lucene.queryParser.core.nodes.PrefixWildcardQueryNode; +import org.apache.lucene.queryParser.core.nodes.SlopQueryNode; +import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode; +import org.apache.lucene.queryParser.core.nodes.QueryNode; +import org.apache.lucene.queryParser.core.nodes.QueryNodeImpl; +import org.apache.lucene.queryParser.core.nodes.QuotedFieldQueryNode; +import org.apache.lucene.queryParser.core.nodes.WildcardQueryNode; +import org.apache.lucene.queryParser.core.parser.SyntaxParser; + +@SuppressWarnings("all") +public class OriginalSyntaxParser implements SyntaxParser, OriginalSyntaxParserConstants { + + private static final int CONJ_NONE =0; + private static final int CONJ_AND =2; + private static final int CONJ_OR =2; + + + // syntax parser constructor + public OriginalSyntaxParser() { + this(new StringReader("")); + } + /** Parses a query string, returning a {@link org.apache.lucene.queryParser.core.nodes.QueryNode}. + * @param query the query string to be parsed. + * @throws ParseException if the parsing fails + */ + public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException { + ReInit(new StringReader(query.toString())); + try { + // TopLevelQuery is a Query followed by the end-of-input (EOF) + QueryNode querynode = TopLevelQuery(field); + return querynode; + } + catch (ParseException tme) { + tme.setQuery(query); + throw tme; + } + catch (Error tme) { + Message message = new MessageImpl(QueryParserMessages.INVALID_SYNTAX_CANNOT_PARSE, query, tme.getMessage()); + QueryNodeParseException e = new QueryNodeParseException(tme); + e.setQuery(query); + e.setNonLocalizedMessage(message); + throw e; + } + } + +// * Query ::= ( Clause )* +// * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) + final public int Conjunction() throws ParseException { + int ret = CONJ_NONE; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case AND: + case OR: + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case AND: + jj_consume_token(AND); + ret = CONJ_AND; + break; + case OR: + jj_consume_token(OR); + ret = CONJ_OR; + break; + default: + jj_la1[0] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + break; + default: + jj_la1[1] = jj_gen; + ; + } + {if (true) return ret;} + throw new Error("Missing return statement in function"); + } + + final public ModifierQueryNode.Modifier Modifiers() throws ParseException { + ModifierQueryNode.Modifier ret = ModifierQueryNode.Modifier.MOD_NONE; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case NOT: + case PLUS: + case MINUS: + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case PLUS: + jj_consume_token(PLUS); + ret = ModifierQueryNode.Modifier.MOD_REQ; + break; + case MINUS: + jj_consume_token(MINUS); + ret = ModifierQueryNode.Modifier.MOD_NOT; + break; + case NOT: + jj_consume_token(NOT); + ret = ModifierQueryNode.Modifier.MOD_NOT; + break; + default: + jj_la1[2] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + break; + default: + jj_la1[3] = jj_gen; + ; + } + {if (true) return ret;} + throw new Error("Missing return statement in function"); + } + +// This makes sure that there is no garbage after the query string + final public QueryNode TopLevelQuery(CharSequence field) throws ParseException { + QueryNode q; + q = Query(field); + jj_consume_token(0); + {if (true) return q;} + throw new Error("Missing return statement in function"); + } + +// These changes were made to introduce operator precedence: +// - Clause() now returns a QueryNode. +// - The modifiers are consumed by Clause() and returned as part of the QueryNode Object +// - Query does not consume conjunctions (AND, OR) anymore. +// - This is now done by two new non-terminals: ConjClause and DisjClause +// The parse tree looks similar to this: +// Query ::= DisjQuery ( DisjQuery )* +// DisjQuery ::= ConjQuery ( OR ConjQuery )* +// ConjQuery ::= Clause ( AND Clause )* +// Clause ::= [ Modifier ] ... + final public QueryNode Query(CharSequence field) throws ParseException { + Vector clauses = null; + QueryNode c, first=null; + first = DisjQuery(field); + label_1: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case NOT: + case PLUS: + case MINUS: + case LPAREN: + case STAR: + case QUOTED: + case TERM: + case PREFIXTERM: + case WILDTERM: + case RANGEIN_START: + case RANGEEX_START: + case NUMBER: + ; + break; + default: + jj_la1[4] = jj_gen; + break label_1; + } + c = DisjQuery(field); + if (clauses == null) { + clauses = new Vector(); + clauses.addElement(first); + } + clauses.addElement(c); + } + if (clauses != null) { + {if (true) return new BooleanQueryNode(clauses);} + } else { + {if (true) return first;} + } + throw new Error("Missing return statement in function"); + } + + final public QueryNode DisjQuery(CharSequence field) throws ParseException { + QueryNode first, c; + Vector clauses = null; + first = ConjQuery(field); + label_2: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case OR: + ; + break; + default: + jj_la1[5] = jj_gen; + break label_2; + } + jj_consume_token(OR); + c = ConjQuery(field); + if (clauses == null) { + clauses = new Vector(); + clauses.addElement(first); + } + clauses.addElement(c); + } + if (clauses != null) { + {if (true) return new OrQueryNode(clauses);} + } else { + {if (true) return first;} + } + throw new Error("Missing return statement in function"); + } + + final public QueryNode ConjQuery(CharSequence field) throws ParseException { + QueryNode first, c; + Vector clauses = null; + first = ModClause(field); + label_3: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case AND: + ; + break; + default: + jj_la1[6] = jj_gen; + break label_3; + } + jj_consume_token(AND); + c = ModClause(field); + if (clauses == null) { + clauses = new Vector(); + clauses.addElement(first); + } + clauses.addElement(c); + } + if (clauses != null) { + {if (true) return new AndQueryNode(clauses);} + } else { + {if (true) return first;} + } + throw new Error("Missing return statement in function"); + } + +// QueryNode Query(CharSequence field) : +// { +// List clauses = new ArrayList(); +// List modifiers = new ArrayList(); +// QueryNode q, firstQuery=null; +// ModifierQueryNode.Modifier mods; +// int conj; +// } +// { +// mods=Modifiers() q=Clause(field) +// { +// if (mods == ModifierQueryNode.Modifier.MOD_NONE) firstQuery=q; +// +// // do not create modifier nodes with MOD_NONE +// if (mods != ModifierQueryNode.Modifier.MOD_NONE) { +// q = new ModifierQueryNode(q, mods); +// } +// clauses.add(q); +// } +// ( +// conj=Conjunction() mods=Modifiers() q=Clause(field) +// { +// // do not create modifier nodes with MOD_NONE +// if (mods != ModifierQueryNode.Modifier.MOD_NONE) { +// q = new ModifierQueryNode(q, mods); +// } +// clauses.add(q); +// //TODO: figure out what to do with AND and ORs +// } +// )* +// { +// if (clauses.size() == 1 && firstQuery != null) +// return firstQuery; +// else { +// return new BooleanQueryNode(clauses); +// } +// } +// } + final public QueryNode ModClause(CharSequence field) throws ParseException { + QueryNode q; + ModifierQueryNode.Modifier mods; + mods = Modifiers(); + q = Clause(field); + if (mods != ModifierQueryNode.Modifier.MOD_NONE) { + q = new ModifierQueryNode(q, mods); + } + {if (true) return q;} + throw new Error("Missing return statement in function"); + } + + final public QueryNode Clause(CharSequence field) throws ParseException { + QueryNode q; + Token fieldToken=null, boost=null; + boolean group = false; + if (jj_2_1(2)) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case TERM: + fieldToken = jj_consume_token(TERM); + jj_consume_token(COLON); + field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image); + break; + case STAR: + jj_consume_token(STAR); + jj_consume_token(COLON); + field="*"; + break; + default: + jj_la1[7] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } else { + ; + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case STAR: + case QUOTED: + case TERM: + case PREFIXTERM: + case WILDTERM: + case RANGEIN_START: + case RANGEEX_START: + case NUMBER: + q = Term(field); + break; + case LPAREN: + jj_consume_token(LPAREN); + q = Query(field); + jj_consume_token(RPAREN); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[8] = jj_gen; + ; + } + group=true; + break; + default: + jj_la1[9] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + if (boost != null) { + float f = (float)1.0; + try { + f = Float.valueOf(boost.image).floatValue(); + // avoid boosting null queries, such as those caused by stop words + if (q != null) { + q = new BoostQueryNode(q, f); + } + } catch (Exception ignored) { + /* Should this be handled somehow? (defaults to "no boost", if + * boost number is invalid) + */ + } + } + if (group) { q = new GroupQueryNode(q);} + {if (true) return q;} + throw new Error("Missing return statement in function"); + } + + final public QueryNode Term(CharSequence field) throws ParseException { + Token term, boost=null, fuzzySlop=null, goop1, goop2; + boolean prefix = false; + boolean wildcard = false; + boolean fuzzy = false; + QueryNode q =null; + ParametricQueryNode qLower, qUpper; + float defaultMinSimilarity = 0.5f; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case STAR: + case TERM: + case PREFIXTERM: + case WILDTERM: + case NUMBER: + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case TERM: + term = jj_consume_token(TERM); + q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); + break; + case STAR: + term = jj_consume_token(STAR); + wildcard=true; q = new WildcardQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); + break; + case PREFIXTERM: + term = jj_consume_token(PREFIXTERM); + prefix=true; q = new PrefixWildcardQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); + break; + case WILDTERM: + term = jj_consume_token(WILDTERM); + wildcard=true; q = new WildcardQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); + break; + case NUMBER: + term = jj_consume_token(NUMBER); + break; + default: + jj_la1[10] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy=true; + break; + default: + jj_la1[11] = jj_gen; + ; + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy=true; + break; + default: + jj_la1[12] = jj_gen; + ; + } + break; + default: + jj_la1[13] = jj_gen; + ; + } + if (!wildcard && !prefix && fuzzy) { + float fms = defaultMinSimilarity; + try { + fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); + } catch (Exception ignored) { } + if(fms < 0.0f || fms > 1.0f){ + {if (true) throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));} + } + q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn); + } + break; + case RANGEIN_START: + jj_consume_token(RANGEIN_START); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case RANGEIN_GOOP: + goop1 = jj_consume_token(RANGEIN_GOOP); + break; + case RANGEIN_QUOTED: + goop1 = jj_consume_token(RANGEIN_QUOTED); + break; + default: + jj_la1[14] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case RANGEIN_TO: + jj_consume_token(RANGEIN_TO); + break; + default: + jj_la1[15] = jj_gen; + ; + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case RANGEIN_GOOP: + goop2 = jj_consume_token(RANGEIN_GOOP); + break; + case RANGEIN_QUOTED: + goop2 = jj_consume_token(RANGEIN_QUOTED); + break; + default: + jj_la1[16] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + jj_consume_token(RANGEIN_END); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[17] = jj_gen; + ; + } + if (goop1.kind == RANGEIN_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + } + if (goop2.kind == RANGEIN_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } + + qLower = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.GE, + EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn); + qUpper = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.LE, + EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn); + q = new ParametricRangeQueryNode(qLower, qUpper); + break; + case RANGEEX_START: + jj_consume_token(RANGEEX_START); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case RANGEEX_GOOP: + goop1 = jj_consume_token(RANGEEX_GOOP); + break; + case RANGEEX_QUOTED: + goop1 = jj_consume_token(RANGEEX_QUOTED); + break; + default: + jj_la1[18] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case RANGEEX_TO: + jj_consume_token(RANGEEX_TO); + break; + default: + jj_la1[19] = jj_gen; + ; + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case RANGEEX_GOOP: + goop2 = jj_consume_token(RANGEEX_GOOP); + break; + case RANGEEX_QUOTED: + goop2 = jj_consume_token(RANGEEX_QUOTED); + break; + default: + jj_la1[20] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + jj_consume_token(RANGEEX_END); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[21] = jj_gen; + ; + } + if (goop1.kind == RANGEEX_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + } + if (goop2.kind == RANGEEX_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } + qLower = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.GT, + EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn); + qUpper = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.LT, + EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn); + q = new ParametricRangeQueryNode(qLower, qUpper); + break; + case QUOTED: + term = jj_consume_token(QUOTED); + q = new QuotedFieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image.substring(1, term.image.length()-1)), term.beginColumn + 1, term.endColumn - 1); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + break; + default: + jj_la1[22] = jj_gen; + ; + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[23] = jj_gen; + ; + } + int phraseSlop = 0; + + if (fuzzySlop != null) { + try { + phraseSlop = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); + q = new SlopQueryNode(q, phraseSlop); + } + catch (Exception ignored) { + /* Should this be handled somehow? (defaults to "no PhraseSlop", if + * slop number is invalid) + */ + } + } + break; + default: + jj_la1[24] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + if (boost != null) { + float f = (float)1.0; + try { + f = Float.valueOf(boost.image).floatValue(); + // avoid boosting null queries, such as those caused by stop words + if (q != null) { + q = new BoostQueryNode(q, f); + } + } catch (Exception ignored) { + /* Should this be handled somehow? (defaults to "no boost", if + * boost number is invalid) + */ + } + } + {if (true) return q;} + throw new Error("Missing return statement in function"); + } + + private boolean jj_2_1(int xla) { + jj_la = xla; jj_lastpos = jj_scanpos = token; + try { return !jj_3_1(); } + catch(LookaheadSuccess ls) { return true; } + finally { jj_save(0, xla); } + } + + private boolean jj_3R_5() { + if (jj_scan_token(STAR)) return true; + if (jj_scan_token(COLON)) return true; + return false; + } + + private boolean jj_3R_4() { + if (jj_scan_token(TERM)) return true; + if (jj_scan_token(COLON)) return true; + return false; + } + + private boolean jj_3_1() { + Token xsp; + xsp = jj_scanpos; + if (jj_3R_4()) { + jj_scanpos = xsp; + if (jj_3R_5()) return true; + } + return false; + } + + /** Generated Token Manager. */ + public OriginalSyntaxParserTokenManager token_source; + JavaCharStream jj_input_stream; + /** Current token. */ + public Token token; + /** Next token. */ + public Token jj_nt; + private int jj_ntk; + private Token jj_scanpos, jj_lastpos; + private int jj_la; + private int jj_gen; + final private int[] jj_la1 = new int[25]; + static private int[] jj_la1_0; + static private int[] jj_la1_1; + static { + jj_la1_init_0(); + jj_la1_init_1(); + } + private static void jj_la1_init_0() { + jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x3ed3c00,0x200,0x100,0x90000,0x20000,0x3ed2000,0x2690000,0x100000,0x100000,0x20000,0x30000000,0x4000000,0x30000000,0x20000,0x0,0x40000000,0x0,0x20000,0x100000,0x20000,0x3ed0000,}; + } + private static void jj_la1_init_1() { + jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3,0x0,0x3,0x0,0x0,0x0,0x0,}; + } + final private JJCalls[] jj_2_rtns = new JJCalls[1]; + private boolean jj_rescan = false; + private int jj_gc = 0; + + /** Constructor with InputStream. */ + public OriginalSyntaxParser(java.io.InputStream stream) { + this(stream, null); + } + /** Constructor with InputStream and supplied encoding */ + public OriginalSyntaxParser(java.io.InputStream stream, String encoding) { + try { jj_input_stream = new JavaCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } + token_source = new OriginalSyntaxParserTokenManager(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 25; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + /** Reinitialise. */ + public void ReInit(java.io.InputStream stream) { + ReInit(stream, null); + } + /** Reinitialise. */ + public void ReInit(java.io.InputStream stream, String encoding) { + try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } + token_source.ReInit(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 25; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + /** Constructor. */ + public OriginalSyntaxParser(java.io.Reader stream) { + jj_input_stream = new JavaCharStream(stream, 1, 1); + token_source = new OriginalSyntaxParserTokenManager(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 25; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + /** Reinitialise. */ + public void ReInit(java.io.Reader stream) { + jj_input_stream.ReInit(stream, 1, 1); + token_source.ReInit(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 25; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + /** Constructor with generated Token Manager. */ + public OriginalSyntaxParser(OriginalSyntaxParserTokenManager tm) { + token_source = tm; + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 25; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + /** Reinitialise. */ + public void ReInit(OriginalSyntaxParserTokenManager tm) { + token_source = tm; + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 25; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + private Token jj_consume_token(int kind) throws ParseException { + Token oldToken; + if ((oldToken = token).next != null) token = token.next; + else token = token.next = token_source.getNextToken(); + jj_ntk = -1; + if (token.kind == kind) { + jj_gen++; + if (++jj_gc > 100) { + jj_gc = 0; + for (int i = 0; i < jj_2_rtns.length; i++) { + JJCalls c = jj_2_rtns[i]; + while (c != null) { + if (c.gen < jj_gen) c.first = null; + c = c.next; + } + } + } + return token; + } + token = oldToken; + jj_kind = kind; + throw generateParseException(); + } + + static private final class LookaheadSuccess extends java.lang.Error { } + final private LookaheadSuccess jj_ls = new LookaheadSuccess(); + private boolean jj_scan_token(int kind) { + if (jj_scanpos == jj_lastpos) { + jj_la--; + if (jj_scanpos.next == null) { + jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.getNextToken(); + } else { + jj_lastpos = jj_scanpos = jj_scanpos.next; + } + } else { + jj_scanpos = jj_scanpos.next; + } + if (jj_rescan) { + int i = 0; Token tok = token; + while (tok != null && tok != jj_scanpos) { i++; tok = tok.next; } + if (tok != null) jj_add_error_token(kind, i); + } + if (jj_scanpos.kind != kind) return true; + if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls; + return false; + } + + +/** Get the next Token. */ + final public Token getNextToken() { + if (token.next != null) token = token.next; + else token = token.next = token_source.getNextToken(); + jj_ntk = -1; + jj_gen++; + return token; + } + +/** Get the specific Token. */ + final public Token getToken(int index) { + Token t = token; + for (int i = 0; i < index; i++) { + if (t.next != null) t = t.next; + else t = t.next = token_source.getNextToken(); + } + return t; + } + + private int jj_ntk() { + if ((jj_nt=token.next) == null) + return (jj_ntk = (token.next=token_source.getNextToken()).kind); + else + return (jj_ntk = jj_nt.kind); + } + + private java.util.List jj_expentries = new java.util.ArrayList(); + private int[] jj_expentry; + private int jj_kind = -1; + private int[] jj_lasttokens = new int[100]; + private int jj_endpos; + + private void jj_add_error_token(int kind, int pos) { + if (pos >= 100) return; + if (pos == jj_endpos + 1) { + jj_lasttokens[jj_endpos++] = kind; + } else if (jj_endpos != 0) { + jj_expentry = new int[jj_endpos]; + for (int i = 0; i < jj_endpos; i++) { + jj_expentry[i] = jj_lasttokens[i]; + } + jj_entries_loop: for (java.util.Iterator it = jj_expentries.iterator(); it.hasNext();) { + int[] oldentry = (int[])(it.next()); + if (oldentry.length == jj_expentry.length) { + for (int i = 0; i < jj_expentry.length; i++) { + if (oldentry[i] != jj_expentry[i]) { + continue jj_entries_loop; + } + } + jj_expentries.add(jj_expentry); + break jj_entries_loop; + } + } + if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind; + } + } + + /** Generate ParseException. */ + public ParseException generateParseException() { + jj_expentries.clear(); + boolean[] la1tokens = new boolean[34]; + if (jj_kind >= 0) { + la1tokens[jj_kind] = true; + jj_kind = -1; + } + for (int i = 0; i < 25; i++) { + if (jj_la1[i] == jj_gen) { + for (int j = 0; j < 32; j++) { + if ((jj_la1_0[i] & (1< jj_gen) { + jj_la = p.arg; jj_lastpos = jj_scanpos = p.first; + switch (i) { + case 0: jj_3_1(); break; + } + } + p = p.next; + } while (p != null); + } catch(LookaheadSuccess ls) { } + } + jj_rescan = false; + } + + private void jj_save(int index, int xla) { + JJCalls p = jj_2_rtns[index]; + while (p.gen > jj_gen) { + if (p.next == null) { p = p.next = new JJCalls(); break; } + p = p.next; + } + p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla; + } + + static final class JJCalls { + int gen; + Token first; + int arg; + JJCalls next; + } + +} Propchange: lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParser.java ------------------------------------------------------------------------------ svn:eol-style = native Added: lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParser.jj URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParser.jj?rev=800191&view=auto ============================================================================== --- lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParser.jj (added) +++ lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParser.jj Mon Aug 3 03:38:44 2009 @@ -0,0 +1,484 @@ +/** + * Original file is based on the TextParser.jj from lucene 2.3 + */ + +options { + STATIC=false; + JAVA_UNICODE_ESCAPE=true; + USER_CHAR_STREAM=false; + IGNORE_CASE=false; + JDK_VERSION="1.5"; +} + +PARSER_BEGIN(OriginalSyntaxParser) +package org.apache.lucene.queryParser.original.parser; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.StringReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Vector; + +import org.apache.lucene.messages.Message; +import org.apache.lucene.messages.MessageImpl; +import org.apache.lucene.queryParser.core.QueryNodeError; +import org.apache.lucene.queryParser.core.QueryNodeException; +import org.apache.lucene.queryParser.core.QueryNodeParseException; +import org.apache.lucene.queryParser.core.messages.QueryParserMessages; +import org.apache.lucene.queryParser.core.nodes.AndQueryNode; +import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode; +import org.apache.lucene.queryParser.core.nodes.BoostQueryNode; +import org.apache.lucene.queryParser.core.nodes.FieldQueryNode; +import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode; +import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode; +import org.apache.lucene.queryParser.core.nodes.GroupQueryNode; +import org.apache.lucene.queryParser.core.nodes.OpaqueQueryNode; +import org.apache.lucene.queryParser.core.nodes.OrQueryNode; +import org.apache.lucene.queryParser.core.nodes.ParametricQueryNode; +import org.apache.lucene.queryParser.core.nodes.ParametricRangeQueryNode; +import org.apache.lucene.queryParser.core.nodes.PrefixWildcardQueryNode; +import org.apache.lucene.queryParser.core.nodes.SlopQueryNode; +import org.apache.lucene.queryParser.core.nodes.ProximityQueryNode; +import org.apache.lucene.queryParser.core.nodes.QueryNode; +import org.apache.lucene.queryParser.core.nodes.QueryNodeImpl; +import org.apache.lucene.queryParser.core.nodes.QuotedFieldQueryNode; +import org.apache.lucene.queryParser.core.nodes.WildcardQueryNode; +import org.apache.lucene.queryParser.core.parser.SyntaxParser; + +@SuppressWarnings("all") +public class OriginalSyntaxParser implements SyntaxParser { + + private static final int CONJ_NONE =0; + private static final int CONJ_AND =2; + private static final int CONJ_OR =2; + + + // syntax parser constructor + public OriginalSyntaxParser() { + this(new StringReader("")); + } + /** Parses a query string, returning a {@link org.apache.lucene.queryParser.core.nodes.QueryNode}. + * @param query the query string to be parsed. + * @throws ParseException if the parsing fails + */ + public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException { + ReInit(new StringReader(query.toString())); + try { + // TopLevelQuery is a Query followed by the end-of-input (EOF) + QueryNode querynode = TopLevelQuery(field); + return querynode; + } + catch (ParseException tme) { + tme.setQuery(query); + throw tme; + } + catch (Error tme) { + Message message = new MessageImpl(QueryParserMessages.INVALID_SYNTAX_CANNOT_PARSE, query, tme.getMessage()); + QueryNodeParseException e = new QueryNodeParseException(tme); + e.setQuery(query); + e.setNonLocalizedMessage(message); + throw e; + } + } + +} + +PARSER_END(OriginalSyntaxParser) + +/* ***************** */ +/* Token Definitions */ +/* ***************** */ + +<*> TOKEN : { + <#_NUM_CHAR: ["0"-"9"] > +// every character that follows a backslash is considered as an escaped character +| <#_ESCAPED_CHAR: "\\" ~[] > +| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", + "[", "]", "\"", "{", "}", "~", "*", "?", "\\" ] + | <_ESCAPED_CHAR> ) > +| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) > +| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > +| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > +} + + SKIP : { + < <_WHITESPACE>> +} + + TOKEN : { + +| +| +| +| +| +| +| +| +| : Boost +| )* "\""> +| (<_TERM_CHAR>)* > +| )+ ( "." (<_NUM_CHAR>)+ )? )? > +| (<_TERM_CHAR>)* "*" ) > +| | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > +| : RangeIn +| : RangeEx +} + + TOKEN : { +)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT +} + + TOKEN : { + +| : DEFAULT +| +| +} + + TOKEN : { + +| : DEFAULT +| +| +} + +// * Query ::= ( Clause )* +// * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) + +int Conjunction() : { + int ret = CONJ_NONE; +} +{ + [ + { ret = CONJ_AND; } + | { ret = CONJ_OR; } + ] + { return ret; } +} + +ModifierQueryNode.Modifier Modifiers() : { + ModifierQueryNode.Modifier ret = ModifierQueryNode.Modifier.MOD_NONE; +} +{ + [ + { ret = ModifierQueryNode.Modifier.MOD_REQ; } + | { ret = ModifierQueryNode.Modifier.MOD_NOT; } + | { ret = ModifierQueryNode.Modifier.MOD_NOT; } + ] + { return ret; } +} + +// This makes sure that there is no garbage after the query string +QueryNode TopLevelQuery(CharSequence field) : +{ + QueryNode q; +} +{ + q=Query(field) + { + return q; + } +} + +// These changes were made to introduce operator precedence: +// - Clause() now returns a QueryNode. +// - The modifiers are consumed by Clause() and returned as part of the QueryNode Object +// - Query does not consume conjunctions (AND, OR) anymore. +// - This is now done by two new non-terminals: ConjClause and DisjClause +// The parse tree looks similar to this: +// Query ::= DisjQuery ( DisjQuery )* +// DisjQuery ::= ConjQuery ( OR ConjQuery )* +// ConjQuery ::= Clause ( AND Clause )* +// Clause ::= [ Modifier ] ... + + +QueryNode Query(CharSequence field) : +{ + Vector clauses = null; + QueryNode c, first=null; +} +{ + first=DisjQuery(field) + ( + c=DisjQuery(field) + { + if (clauses == null) { + clauses = new Vector(); + clauses.addElement(first); + } + clauses.addElement(c); + } + )* + { + if (clauses != null) { + return new BooleanQueryNode(clauses); + } else { + return first; + } + } +} + +QueryNode DisjQuery(CharSequence field) : { + QueryNode first, c; + Vector clauses = null; +} +{ + first = ConjQuery(field) + ( + c=ConjQuery(field) + { + if (clauses == null) { + clauses = new Vector(); + clauses.addElement(first); + } + clauses.addElement(c); + } + )* + { + if (clauses != null) { + return new OrQueryNode(clauses); + } else { + return first; + } + } +} + +QueryNode ConjQuery(CharSequence field) : { + QueryNode first, c; + Vector clauses = null; +} +{ + first = ModClause(field) + ( + c=ModClause(field) + { + if (clauses == null) { + clauses = new Vector(); + clauses.addElement(first); + } + clauses.addElement(c); + } + )* + { + if (clauses != null) { + return new AndQueryNode(clauses); + } else { + return first; + } + } +} + +// QueryNode Query(CharSequence field) : +// { +// List clauses = new ArrayList(); +// List modifiers = new ArrayList(); +// QueryNode q, firstQuery=null; +// ModifierQueryNode.Modifier mods; +// int conj; +// } +// { +// mods=Modifiers() q=Clause(field) +// { +// if (mods == ModifierQueryNode.Modifier.MOD_NONE) firstQuery=q; +// +// // do not create modifier nodes with MOD_NONE +// if (mods != ModifierQueryNode.Modifier.MOD_NONE) { +// q = new ModifierQueryNode(q, mods); +// } +// clauses.add(q); +// } +// ( +// conj=Conjunction() mods=Modifiers() q=Clause(field) +// { +// // do not create modifier nodes with MOD_NONE +// if (mods != ModifierQueryNode.Modifier.MOD_NONE) { +// q = new ModifierQueryNode(q, mods); +// } +// clauses.add(q); +// //TODO: figure out what to do with AND and ORs +// } +// )* +// { +// if (clauses.size() == 1 && firstQuery != null) +// return firstQuery; +// else { +// return new BooleanQueryNode(clauses); +// } +// } +// } + +QueryNode ModClause(CharSequence field) : { + QueryNode q; + ModifierQueryNode.Modifier mods; +} +{ + mods=Modifiers() q= Clause(field) { + if (mods != ModifierQueryNode.Modifier.MOD_NONE) { + q = new ModifierQueryNode(q, mods); + } + return q; + } +} + +QueryNode Clause(CharSequence field) : { + QueryNode q; + Token fieldToken=null, boost=null; + boolean group = false; +} +{ + [ + LOOKAHEAD(2) + ( + fieldToken= {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);} + | {field="*";} + ) + ] + + ( + q=Term(field) + | q=Query(field) ( boost=)? {group=true;} + + ) + { + if (boost != null) { + float f = (float)1.0; + try { + f = Float.valueOf(boost.image).floatValue(); + // avoid boosting null queries, such as those caused by stop words + if (q != null) { + q = new BoostQueryNode(q, f); + } + } catch (Exception ignored) { + /* Should this be handled somehow? (defaults to "no boost", if + * boost number is invalid) + */ + } + } + if (group) { q = new GroupQueryNode(q);} + return q; + } +} + + +QueryNode Term(CharSequence field) : { + Token term, boost=null, fuzzySlop=null, goop1, goop2; + boolean prefix = false; + boolean wildcard = false; + boolean fuzzy = false; + QueryNode q =null; + ParametricQueryNode qLower, qUpper; + float defaultMinSimilarity = 0.5f; +} +{ + ( + ( + term= { q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); } + | term= { wildcard=true; q = new WildcardQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); } + | term= { prefix=true; q = new PrefixWildcardQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); } + | term= { wildcard=true; q = new WildcardQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); } + | term= + ) + [ fuzzySlop= { fuzzy=true; } ] + [ boost= [ fuzzySlop= { fuzzy=true; } ] ] + { + if (!wildcard && !prefix && fuzzy) { + float fms = defaultMinSimilarity; + try { + fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); + } catch (Exception ignored) { } + if(fms < 0.0f || fms > 1.0f){ + throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS)); + } + q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn); + } + } + | ( ( goop1=|goop1= ) + [ ] ( goop2=|goop2= ) + ) + [ boost= ] + { + if (goop1.kind == RANGEIN_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + } + if (goop2.kind == RANGEIN_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } + + qLower = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.GE, + EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn); + qUpper = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.LE, + EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn); + q = new ParametricRangeQueryNode(qLower, qUpper); + } + | ( ( goop1=|goop1= ) + [ ] ( goop2=|goop2= ) + ) + [ boost= ] + { + if (goop1.kind == RANGEEX_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + } + if (goop2.kind == RANGEEX_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } + qLower = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.GT, + EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn); + qUpper = new ParametricQueryNode(field, ParametricQueryNode.CompareOperator.LT, + EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn); + q = new ParametricRangeQueryNode(qLower, qUpper); + } + | term= {q = new QuotedFieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image.substring(1, term.image.length()-1)), term.beginColumn + 1, term.endColumn - 1);} + [ fuzzySlop= ] + [ boost= ] + { + int phraseSlop = 0; + + if (fuzzySlop != null) { + try { + phraseSlop = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); + q = new SlopQueryNode(q, phraseSlop); + } + catch (Exception ignored) { + /* Should this be handled somehow? (defaults to "no PhraseSlop", if + * slop number is invalid) + */ + } + } + + } + ) + { + if (boost != null) { + float f = (float)1.0; + try { + f = Float.valueOf(boost.image).floatValue(); + // avoid boosting null queries, such as those caused by stop words + if (q != null) { + q = new BoostQueryNode(q, f); + } + } catch (Exception ignored) { + /* Should this be handled somehow? (defaults to "no boost", if + * boost number is invalid) + */ + } + } + return q; + } +} Propchange: lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParser.jj ------------------------------------------------------------------------------ svn:eol-style = native Added: lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParserConstants.java URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParserConstants.java?rev=800191&view=auto ============================================================================== --- lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParserConstants.java (added) +++ lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParserConstants.java Mon Aug 3 03:38:44 2009 @@ -0,0 +1,125 @@ +/* Generated By:JavaCC: Do not edit this line. OriginalSyntaxParserConstants.java */ +package org.apache.lucene.queryParser.original.parser; + + +/** + * Token literal values and constants. + * Generated by org.javacc.parser.OtherFilesGen#start() + */ +public interface OriginalSyntaxParserConstants { + + /** End of File. */ + int EOF = 0; + /** RegularExpression Id. */ + int _NUM_CHAR = 1; + /** RegularExpression Id. */ + int _ESCAPED_CHAR = 2; + /** RegularExpression Id. */ + int _TERM_START_CHAR = 3; + /** RegularExpression Id. */ + int _TERM_CHAR = 4; + /** RegularExpression Id. */ + int _WHITESPACE = 5; + /** RegularExpression Id. */ + int _QUOTED_CHAR = 6; + /** RegularExpression Id. */ + int AND = 8; + /** RegularExpression Id. */ + int OR = 9; + /** RegularExpression Id. */ + int NOT = 10; + /** RegularExpression Id. */ + int PLUS = 11; + /** RegularExpression Id. */ + int MINUS = 12; + /** RegularExpression Id. */ + int LPAREN = 13; + /** RegularExpression Id. */ + int RPAREN = 14; + /** RegularExpression Id. */ + int COLON = 15; + /** RegularExpression Id. */ + int STAR = 16; + /** RegularExpression Id. */ + int CARAT = 17; + /** RegularExpression Id. */ + int QUOTED = 18; + /** RegularExpression Id. */ + int TERM = 19; + /** RegularExpression Id. */ + int FUZZY_SLOP = 20; + /** RegularExpression Id. */ + int PREFIXTERM = 21; + /** RegularExpression Id. */ + int WILDTERM = 22; + /** RegularExpression Id. */ + int RANGEIN_START = 23; + /** RegularExpression Id. */ + int RANGEEX_START = 24; + /** RegularExpression Id. */ + int NUMBER = 25; + /** RegularExpression Id. */ + int RANGEIN_TO = 26; + /** RegularExpression Id. */ + int RANGEIN_END = 27; + /** RegularExpression Id. */ + int RANGEIN_QUOTED = 28; + /** RegularExpression Id. */ + int RANGEIN_GOOP = 29; + /** RegularExpression Id. */ + int RANGEEX_TO = 30; + /** RegularExpression Id. */ + int RANGEEX_END = 31; + /** RegularExpression Id. */ + int RANGEEX_QUOTED = 32; + /** RegularExpression Id. */ + int RANGEEX_GOOP = 33; + + /** Lexical state. */ + int Boost = 0; + /** Lexical state. */ + int RangeEx = 1; + /** Lexical state. */ + int RangeIn = 2; + /** Lexical state. */ + int DEFAULT = 3; + + /** Literal token values. */ + String[] tokenImage = { + "", + "<_NUM_CHAR>", + "<_ESCAPED_CHAR>", + "<_TERM_START_CHAR>", + "<_TERM_CHAR>", + "<_WHITESPACE>", + "<_QUOTED_CHAR>", + "", + "", + "", + "", + "\"+\"", + "\"-\"", + "\"(\"", + "\")\"", + "\":\"", + "\"*\"", + "\"^\"", + "", + "", + "", + "", + "", + "\"[\"", + "\"{\"", + "", + "\"TO\"", + "\"]\"", + "", + "", + "\"TO\"", + "\"}\"", + "", + "", + }; + +} Propchange: lucene/java/trunk/contrib/queryparser/src/java/org/apache/lucene/queryParser/original/parser/OriginalSyntaxParserConstants.java ------------------------------------------------------------------------------ svn:eol-style = native