lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From o...@apache.org
Subject cvs commit: jakarta-lucene/src/java/org/apache/lucene/queryParser QueryParser.jj
Date Sun, 14 Jul 2002 17:16:21 GMT
otis        2002/07/14 10:16:21

  Modified:    src/java/org/apache/lucene/queryParser QueryParser.jj
  Log:
  - Added Péter Halácsy's changes that allow setting of default boolean
    operator.
  
  Revision  Changes    Path
  1.19      +105 -62   jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj
  
  Index: QueryParser.jj
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v
  retrieving revision 1.18
  retrieving revision 1.19
  diff -u -r1.18 -r1.19
  --- QueryParser.jj	25 Jun 2002 00:05:31 -0000	1.18
  +++ QueryParser.jj	14 Jul 2002 17:16:21 -0000	1.19
  @@ -78,7 +78,7 @@
    *
    * The syntax for query strings is as follows:
    * A Query is a series of clauses.
  - * A clause may be prefixed by: 
  + * A clause may be prefixed by:
    * <ul>
    * <li> a plus (<code>+</code>) or a minus (<code>-</code>)
sign, indicating
    * that the clause is required or prohibited respectively; or
  @@ -121,11 +121,11 @@
         QueryParser parser = new QueryParser(field, analyzer);
         return parser.parse(query);
       }
  -    catch (TokenMgrError tme) { 
  +    catch (TokenMgrError tme) {
         throw new ParseException(tme.getMessage());
       }
     }
  -       
  +
     Analyzer analyzer;
     String field;
     int phraseSlop = 0;
  @@ -157,8 +157,30 @@
     /** Gets the default slop for phrases. */
     public int getPhraseSlop() { return phraseSlop; }
   
  -  private void addClause(Vector clauses, int conj, int mods, 
  -                        Query q) {
  +    // CODE ADDED BY PETER HALACSY
  +
  +    /** The actual mode that parses uses to parse queries */
  +    public static final int DEFAULT_OPERATOR_OR  = 0;
  +    public static final int DEFAULT_OPERATOR_AND = 1;
  +
  +    private int mode = DEFAULT_OPERATOR_OR;
  +
  +    /**
  +     * Set the mode of the QueryParser. In classic mode (<code>DEFAULT_OPERATOR_OR</mode>)
  +     * term without any modifiers are considered optional: for example <code>
  +     * capital of Hungary</code> is equal to <code>capital OR of OR Hungary</code>.<br/>
  +     * In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction:
the
  +     * above mentioned query is parsed as <code>capital AND of AND Hungary</code>
  +     */
  +    public void setMode(int mode) {
  +	this.mode = mode;
  +    }
  +
  +    public int getMode() {
  +	return this.mode;
  +    }
  +
  +  private void addClause(Vector clauses, int conj, int mods, Query q) {
       boolean required, prohibited;
   
       // If this term is introduced by AND, make the preceding term required,
  @@ -168,28 +190,49 @@
         if (!c.prohibited)
           c.required = true;
       }
  +    // THIS CODE ADDED PETER HALACSY
  +    if(mode == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
  +	// If this term is introduced by OR, make the preceding term optional,
  +	// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
  +	// notice if the input is a OR b, first term is parsed as required; without
  +	// this modification a OR b would parsed as +a OR b
  +	BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
  +      if (!c.prohibited)
  +          c.required = false;
  +    }
  +    // THIS CODE ADDED BY PETER HALACSY
   
       // We might have been passed a null query; the term might have been
  -    // filtered away by the analyzer. 
  +    // filtered away by the analyzer.
       if (q == null)
         return;
   
  -    // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
  -    // introduced by NOT or -; make sure not to set both.
  -    prohibited = (mods == MOD_NOT);
  -    required = (mods == MOD_REQ);
  -    if (conj == CONJ_AND && !prohibited)
  -      required = true;
  +    if(mode == DEFAULT_OPERATOR_OR) {
  +	// THIS IS THE ORIGINAL CODE
  +        // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
  +        // introduced by NOT or -; make sure not to set both.
  +	prohibited = (mods == MOD_NOT);
  +	required = (mods == MOD_REQ);
  +	if (conj == CONJ_AND && !prohibited) {
  +	    required = true;
  +	}
  +     } else {
  +	 // THIS CODE ADDED BY PETER HALACSY
  +	 // We set PROHIBITED if we're  introduced by NOT or -; We set REQUIRED
  +	 // if not PROHIBITED and not introduced by OR
  +	 prohibited = (mods == MOD_NOT);
  +	 required   = (!prohibited && conj != CONJ_OR);
  +	}
       clauses.addElement(new BooleanClause(q, required, prohibited));
     }
   
  -  private Query getFieldQuery(String field, 
  -                              Analyzer analyzer, 
  +  private Query getFieldQuery(String field,
  +                              Analyzer analyzer,
                                 String queryText) {
       // Use the analyzer to get all the tokens, and then build a TermQuery,
       // PhraseQuery, or nothing based on the term count
  -    
  -    TokenStream source = analyzer.tokenStream(field, 
  +
  +    TokenStream source = analyzer.tokenStream(field,
                                                 new StringReader(queryText));
       Vector v = new Vector();
       org.apache.lucene.analysis.Token t;
  @@ -197,17 +240,17 @@
       while (true) {
         try {
           t = source.next();
  -      } 
  +      }
         catch (IOException e) {
           t = null;
         }
  -      if (t == null) 
  +      if (t == null)
           break;
         v.addElement(t.termText());
       }
  -    if (v.size() == 0) 
  +    if (v.size() == 0)
         return null;
  -    else if (v.size() == 1) 
  +    else if (v.size() == 1)
         return new TermQuery(new Term(field, (String) v.elementAt(0)));
       else {
         PhraseQuery q = new PhraseQuery();
  @@ -219,11 +262,11 @@
       }
     }
   
  -  private Query getRangeQuery(String field, 
  -                              Analyzer analyzer, 
  -                              String part1, 
  +  private Query getRangeQuery(String field,
  +                              Analyzer analyzer,
  +                              String part1,
                                 String part2,
  -                              boolean inclusive) 
  +                              boolean inclusive)
     {
       boolean isDate = false, isNumber = false;
   
  @@ -242,13 +285,13 @@
         // @@@ Add number support
       }
   
  -    return new RangeQuery(new Term(field, part1), 
  -                          new Term(field, part2), 
  +    return new RangeQuery(new Term(field, part1),
  +                          new Term(field, part2),
                             inclusive);
     }
   
     public static void main(String[] args) throws Exception {
  -    QueryParser qp = new QueryParser("field", 
  +    QueryParser qp = new QueryParser("field",
                              new org.apache.lucene.analysis.SimpleAnalyzer());
       Query q = qp.parse(args[0]);
       System.out.println(q.toString("field"));
  @@ -271,10 +314,10 @@
   
   <*> TOKEN : {
     <#_NUM_CHAR:   ["0"-"9"] >
  -| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^", 
  +| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
                             "[", "]", "\"", "{", "}", "~", "*", "?" ] >
  -| <#_TERM_START_CHAR: ( ~[ " ", "\t", "+", "-", "!", "(", ")", ":", "^", 
  -                           "[", "]", "\"", "{", "}", "~", "*", "?" ] 
  +| <#_TERM_START_CHAR: ( ~[ " ", "\t", "+", "-", "!", "(", ")", ":", "^",
  +                           "[", "]", "\"", "{", "}", "~", "*", "?" ]
                          | <_ESCAPED_CHAR> ) >
   | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> ) >
   | <#_WHITESPACE: ( " " | "\t" ) >
  @@ -299,7 +342,7 @@
   | <FUZZY:     "~" >
   | <SLOP:      "~" (<_NUM_CHAR>)+ >
   | <PREFIXTERM:  <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
  -| <WILDTERM:  <_TERM_START_CHAR> 
  +| <WILDTERM:  <_TERM_START_CHAR>
                 (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
   | <RANGEIN_START: "[" > : RangeIn
   | <RANGEEX_START: "{" > : RangeEx
  @@ -326,23 +369,23 @@
   // *   Query  ::= ( Clause )*
   // *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
   
  -int Conjunction() : { 
  +int Conjunction() : {
     int ret = CONJ_NONE;
   }
   {
  -  [ 
  -    <AND> { ret = CONJ_AND; } 
  +  [
  +    <AND> { ret = CONJ_AND; }
       | <OR>  { ret = CONJ_OR; }
     ]
     { return ret; }
   }
   
  -int Modifiers() : { 
  +int Modifiers() : {
     int ret = MOD_NONE;
   }
   {
  -  [ 
  -     <PLUS> { ret = MOD_REQ; }  
  +  [
  +     <PLUS> { ret = MOD_REQ; }
        | <MINUS> { ret = MOD_NOT; }
        | <NOT> { ret = MOD_NOT; }
     ]
  @@ -353,17 +396,17 @@
   {
     Vector clauses = new Vector();
     Query q, firstQuery=null;
  -  int conj, mods; 
  +  int conj, mods;
   }
   {
  -  mods=Modifiers() q=Clause(field) 
  -  { 
  -    addClause(clauses, CONJ_NONE, mods, q); 
  -    if (mods == MOD_NONE) 
  -        firstQuery=q; 
  +  mods=Modifiers() q=Clause(field)
  +  {
  +    addClause(clauses, CONJ_NONE, mods, q);
  +    if (mods == MOD_NONE)
  +        firstQuery=q;
     }
  -  ( 
  -    conj=Conjunction() mods=Modifiers() q=Clause(field) 
  +  (
  +    conj=Conjunction() mods=Modifiers() q=Clause(field)
       { addClause(clauses, conj, mods, q); }
     )*
       {
  @@ -389,16 +432,16 @@
     ]
   
     (
  -   q=Term(field) 
  +   q=Term(field)
      | <LPAREN> q=Query(field) <RPAREN>
     )
       {
         return q;
       }
   }
  -    
   
  -Query Term(String field) : { 
  +
  +Query Term(String field) : {
     Token term, boost=null, slop=null, goop1, goop2;
     boolean prefix = false;
     boolean wildcard = false;
  @@ -407,7 +450,7 @@
     Query q;
   }
   {
  -  ( 
  +  (
        (
          term=<TERM>
          | term=<PREFIXTERM> { prefix=true; }
  @@ -416,19 +459,19 @@
        )
        [ <FUZZY> { fuzzy=true; } ]
        [ <CARAT> boost=<NUMBER> [ <FUZZY> { fuzzy=true; } ] ]
  -     { 
  +     {
          if (wildcard)
            q = new WildcardQuery(new Term(field, term.image));
  -       else if (prefix) 
  +       else if (prefix)
            q = new PrefixQuery(new Term(field, term.image.substring
                                         (0, term.image.length()-1)));
          else if (fuzzy)
            q = new FuzzyQuery(new Term(field, term.image));
          else
  -         q = getFieldQuery(field, analyzer, term.image); 
  +         q = getFieldQuery(field, analyzer, term.image);
        }
        | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED>
)
  -         [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED>
) 
  +         [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED>
)
            <RANGEIN_END> )
          [ <CARAT> boost=<NUMBER> ]
           {
  @@ -440,7 +483,7 @@
             q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
           }
        | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED>
)
  -         [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED>
) 
  +         [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED>
)
            <RANGEEX_END> )
          [ <CARAT> boost=<NUMBER> ]
           {
  @@ -451,14 +494,14 @@
   
             q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
           }
  -     | term=<QUOTED> 
  +     | term=<QUOTED>
          [ slop=<SLOP> ]
          [ <CARAT> boost=<NUMBER> ]
  -       { 
  -         q = getFieldQuery(field, analyzer, 
  -                           term.image.substring(1, term.image.length()-1)); 
  +       {
  +         q = getFieldQuery(field, analyzer,
  +                           term.image.substring(1, term.image.length()-1));
            if (slop != null && q instanceof PhraseQuery) {
  -           try { 
  +           try {
                int s = Float.valueOf(slop.image.substring(1)).intValue();
                ((PhraseQuery) q).setSlop(s);
              }
  @@ -466,16 +509,16 @@
            }
          }
     )
  -  { 
  +  {
       if (boost != null) {
         float f = (float) 1.0;
  -      try { 
  +      try {
           f = Float.valueOf(boost.image).floatValue();
         }
         catch (Exception ignored) { }
   
         q.setBoost(f);
       }
  -    return q; 
  +    return q;
     }
   }
  
  
  

--
To unsubscribe, e-mail:   <mailto:lucene-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:lucene-dev-help@jakarta.apache.org>


Mime
View raw message