incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amccu...@apache.org
Subject [06/27] git commit: Fixed BLUR-169
Date Mon, 29 Jul 2013 12:57:37 GMT
Fixed BLUR-169


Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/f9f6d39b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/f9f6d39b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/f9f6d39b

Branch: refs/heads/0.2.0-newtypesystem
Commit: f9f6d39ba28a044391267741767e20f2dcb0e857
Parents: 5d6feb2
Author: Aaron McCurry <amccurry@gmail.com>
Authored: Mon Jul 22 20:11:39 2013 -0400
Committer: Aaron McCurry <amccurry@gmail.com>
Committed: Mon Jul 22 20:11:39 2013 -0400

----------------------------------------------------------------------
 .../apache/blur/lucene/search/SuperParser.java  | 228 +++++++++----------
 .../blur/lucene/search/SuperParserTest.java     |  29 ++-
 2 files changed, 123 insertions(+), 134 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/f9f6d39b/blur-query/src/main/java/org/apache/blur/lucene/search/SuperParser.java
----------------------------------------------------------------------
diff --git a/blur-query/src/main/java/org/apache/blur/lucene/search/SuperParser.java b/blur-query/src/main/java/org/apache/blur/lucene/search/SuperParser.java
index 77622af..015e13e 100644
--- a/blur-query/src/main/java/org/apache/blur/lucene/search/SuperParser.java
+++ b/blur-query/src/main/java/org/apache/blur/lucene/search/SuperParser.java
@@ -1,44 +1,31 @@
 package org.apache.blur.lucene.search;
 
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.List;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.blur.analysis.BlurAnalyzer;
+import org.apache.blur.log.Log;
+import org.apache.blur.log.LogFactory;
 import org.apache.blur.thrift.generated.ScoreType;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queryparser.classic.ParseException;
 import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.FilteredQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.Version;
 
 public class SuperParser extends BlurQueryParser {
 
-  private static final String MUST_NOT_STRING = "-";
-  private static final String MUST_STRING = "+";
-  private static final Pattern PATTERN = Pattern.compile("([-+]{0,1})\\s*?super\\s*?\\:\\s*?\\<(.*?)\\>");
+  private static Log LOG = LogFactory.getLog(SuperParser.class);
+
+  private final String _defaultField = SUPER;
+  private static final Pattern PATTERN = Pattern.compile("super\\s*?\\:\\s*?\\<(.*?)\\>");
   private static final Pattern CHECK = Pattern.compile("super\\s*?\\:\\s*?\\<");
   private static final String SEP = ".";
   private final boolean _superSearch;
@@ -46,6 +33,7 @@ public class SuperParser extends BlurQueryParser {
   private final ScoreType _scoreType;
   private final Version _matchVersion;
   private final Term _defaultPrimeDocTerm;
+  private final String _prefixToSub = "______SUPERBASEFIELD_";
 
   public SuperParser(Version matchVersion, BlurAnalyzer a, boolean superSearch, Filter queryFilter,
       ScoreType scoreType, Term defaultPrimeDocTerm) {
@@ -59,124 +47,137 @@ public class SuperParser extends BlurQueryParser {
     setAllowLeadingWildcard(true);
   }
 
-  @Override
-  public Query parse(String query) throws ParseException {
-    Matcher matcher = PATTERN.matcher(query);
-    BooleanQuery booleanQuery = null;
-    int lastEnd = -1;
+  public Query parse(String queryStr) throws ParseException {
+    Matcher matcher = PATTERN.matcher(queryStr);
+    Map<String, Query> subQueries = new HashMap<String, Query>();
+    int subQueryIndex = 0;
+    StringBuilder builder = new StringBuilder();
+    int lastStart = 0;
+    int lastEnd = 0;
     while (matcher.find()) {
       int count = matcher.groupCount();
       int start = matcher.start();
       int end = matcher.end();
-
-      booleanQuery = addExtraQueryInfo(query, booleanQuery, lastEnd, start, end);
-
+      if (lastStart != start) {
+        builder.append(queryStr.substring(lastEnd, start));
+      }
+      String realQuery = queryStr.substring(start, end);
+      LOG.debug("Realquery [{0}]", realQuery);
       for (int i = 0; i < count; i++) {
-        String occurString = matcher.group(i + 1);
-        i++;
         String superQueryStr = matcher.group(i + 1);
         Matcher matcherCheck = CHECK.matcher(superQueryStr);
         if (matcherCheck.find()) {
-          throw new ParseException("Embedded super queries are not allowed [" + query + "].");
+          throw new ParseException("Embedded super queries are not allowed [" + queryStr
+ "].");
         }
-
-        // Adding clause
-        if (booleanQuery == null) {
-          booleanQuery = new BooleanQuery();
+        LOG.debug("Parseable sub query [{0}]", superQueryStr);
+        String key = _prefixToSub + subQueryIndex;
+        QueryParser newParser = getNewParser();
+        Query query = newParser.parse(superQueryStr);
+        if (!isSameGroupName(query)) {
+          throw new ParseException("Super query [" + superQueryStr + "] cannot reference
more than one column family.");
         }
-        Occur occur = getOccur(occurString);
-        QueryParser parser = getNewParser();
-        Query subQuery = parser.parse(superQueryStr);
-        if (!isSameGroupName(subQuery)) {
-          throw new ParseException("Super query [" + occurString + superQueryStr
-              + "] cannot reference more than one column family.");
+        if (_superSearch) {
+          query = newSuperQuery(query);
+        } else {
+          query = wrapFilter(query);
         }
-        booleanQuery.add(newSuperQuery(subQuery), occur);
-        lastEnd = end;
+        subQueries.put(key, query);
+        builder.append(_prefixToSub).append(':').append(subQueryIndex);
+        subQueryIndex++;
       }
+      lastStart = start;
+      lastEnd = end;
     }
-    booleanQuery = addExtraQueryInfo(query, booleanQuery, lastEnd);
-    Query result = booleanQuery;
-    if (result == null) {
-      result = reprocess(super.parse(query));
+    if (lastEnd < queryStr.length()) {
+      builder.append(queryStr.substring(lastEnd));
     }
-    return result;
+    Query query = super.parse(builder.toString());
+    return reprocess(replaceRealQueries(query, subQueries));
   }
 
-  private boolean isSameGroupName(Query query) {
-    if (query instanceof BooleanQuery) {
-      return isSameGroupName((BooleanQuery) query);
-    }
-    return true;
-  }
-
-  private BooleanQuery addExtraQueryInfo(String query, BooleanQuery booleanQuery, int lastEnd,
int start, int end)
-      throws ParseException {
-    if (lastEnd != -1 && start != lastEnd) {
-      // there was text inbetween the matches
-      String missingMatch = query.substring(lastEnd, start);
-      booleanQuery = addExtraQueryInfo(booleanQuery, missingMatch);
-    } else if (lastEnd == -1 && start != 0) {
-      // this means there was text in front of the first super query
-      String missingMatch = query.substring(0, start);
-      booleanQuery = addExtraQueryInfo(booleanQuery, missingMatch);
-    }
-    return booleanQuery;
+  private SuperQuery newSuperQuery(Query query) {
+    return new SuperQuery(wrapFilter(query), _scoreType, _defaultPrimeDocTerm);
   }
 
-  private BooleanQuery addExtraQueryInfo(String query, BooleanQuery booleanQuery, int lastEnd)
throws ParseException {
-    if (lastEnd != -1 && lastEnd < query.length()) {
-      // there was text at the end
-      String missingMatch = query.substring(lastEnd);
-      booleanQuery = addExtraQueryInfo(booleanQuery, missingMatch);
+  private Query wrapFilter(Query query) {
+    if (_queryFilter == null) {
+      return query;
     }
-    return booleanQuery;
+    return new FilteredQuery(query, _queryFilter);
   }
 
-  private BooleanQuery addExtraQueryInfo(BooleanQuery booleanQuery, String missingMatch)
throws ParseException {
-    if (missingMatch.trim().isEmpty()) {
+  private Query replaceRealQueries(Query query, Map<String, Query> subQueries) {
+    if (query instanceof BooleanQuery) {
+      BooleanQuery booleanQuery = (BooleanQuery) query;
+      for (BooleanClause clause : booleanQuery) {
+        clause.setQuery(replaceRealQueries(clause.getQuery(), subQueries));
+      }
       return booleanQuery;
-    }
-    // Adding clause
-    if (booleanQuery == null) {
-      booleanQuery = new BooleanQuery();
-    }
-    QueryParser parser = getNewParser();
-    Query subQuery = parser.parse(missingMatch);
-    if (subQuery instanceof BooleanQuery) {
-      BooleanQuery bq = (BooleanQuery) subQuery;
-      for (BooleanClause clause : bq) {
-        booleanQuery.add(newSuperQuery(clause.getQuery()), clause.getOccur());
+    } else if (query instanceof TermQuery) {
+      TermQuery termQuery = (TermQuery) query;
+      Term term = termQuery.getTerm();
+      if (term.field().equals(_prefixToSub)) {
+        return subQueries.get(getKey(term));
+      } else {
+        return query;
       }
     } else {
-      booleanQuery.add(newSuperQuery(subQuery), Occur.SHOULD);
+      return query;
     }
-    return booleanQuery;
+  }
+
+  private String getKey(Term term) {
+    return term.field() + term.text();
   }
 
   private QueryParser getNewParser() {
-    return new BlurQueryParser(_matchVersion, SUPER, _blurAnalyzer,_fieldNames);
+    return new BlurQueryParser(_matchVersion, _defaultField, _blurAnalyzer, _fieldNames);
   }
 
-  private Occur getOccur(String occurString) {
-    if (occurString.equals(MUST_STRING)) {
-      return Occur.MUST;
+  private Query reprocess(Query query) {
+    if (query == null || !_superSearch) {
+      return wrapFilter(query);
     }
-    if (occurString.equals(MUST_NOT_STRING)) {
-      return Occur.MUST_NOT;
+    if (query instanceof BooleanQuery) {
+      BooleanQuery booleanQuery = (BooleanQuery) query;
+      if (containsSuperQueries(booleanQuery)) {
+        for (BooleanClause bc : booleanQuery) {
+          bc.setQuery(reprocess(bc.getQuery()));
+        }
+      } else {
+        for (BooleanClause bc : booleanQuery) {
+          bc.setQuery(newSuperQuery(bc.getQuery()));
+        }
+      }
+      return booleanQuery;
+    } else if (query instanceof SuperQuery) {
+      return query;
+    } else {
+      return newSuperQuery(query);
     }
-    return Occur.SHOULD;
   }
 
-  private SuperQuery newSuperQuery(Query query) {
-    return new SuperQuery(wrapFilter(query), _scoreType, _defaultPrimeDocTerm);
+  private boolean containsSuperQueries(Query query) {
+    if (query instanceof BooleanQuery) {
+      BooleanQuery booleanQuery = (BooleanQuery) query;
+      for (BooleanClause bc : booleanQuery) {
+        if (containsSuperQueries(bc.getQuery())) {
+          return true;
+        }
+      }
+      return false;
+    } else if (query instanceof SuperQuery) {
+      return true;
+    } else {
+      return false;
+    }
   }
 
-  private Query wrapFilter(Query query) {
-    if (_queryFilter == null) {
-      return query;
+  private boolean isSameGroupName(Query query) {
+    if (query instanceof BooleanQuery) {
+      return isSameGroupName((BooleanQuery) query);
     }
-    return new FilteredQuery(query, _queryFilter);
+    return true;
   }
 
   private boolean isSameGroupName(BooleanQuery booleanQuery) {
@@ -232,25 +233,4 @@ public class SuperParser extends BlurQueryParser {
       return getGroupName(fieldName);
     }
   }
-
-  private Query reprocess(Query query) {
-    if (query == null || !isSuperSearch()) {
-      return wrapFilter(query);
-    }
-    if (query instanceof BooleanQuery) {
-      BooleanQuery booleanQuery = (BooleanQuery) query;
-      List<BooleanClause> clauses = booleanQuery.clauses();
-      for (BooleanClause bc : clauses) {
-        Query q = bc.getQuery();
-        bc.setQuery(newSuperQuery(q));
-      }
-      return booleanQuery;
-    } else {
-      return newSuperQuery(query);
-    }
-  }
-
-  public boolean isSuperSearch() {
-    return _superSearch;
-  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/f9f6d39b/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java
----------------------------------------------------------------------
diff --git a/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java b/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java
index 43c5bb9..8d95427 100644
--- a/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java
+++ b/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java
@@ -71,10 +71,7 @@ public class SuperParserTest {
     booleanQuery.add(new TermQuery(new Term("a.d", "d")), Occur.SHOULD);
     SuperQuery superQuery = new SuperQuery(booleanQuery, ScoreType.SUPER, new Term("_primedoc_"));
 
-    BooleanQuery bq = new BooleanQuery();
-    bq.add(superQuery, Occur.SHOULD);
-
-    assertQuery(bq, query);
+    assertQuery(superQuery, query);
   }
 
   @Test
@@ -85,7 +82,7 @@ public class SuperParserTest {
 
   @Test
   public void test4() throws ParseException {
-    Query query = parser.parse("super:<a.a:a a.d:e a.b:b>  - super:<b.c:c b.d:d>");
+    Query query = parser.parse("super:<a.a:a a.d:e a.b:b>  -super:<b.c:c b.d:d>");
 
     BooleanQuery booleanQuery1 = new BooleanQuery();
     booleanQuery1.add(new TermQuery(new Term("a.a", "a")), Occur.SHOULD);
@@ -110,7 +107,7 @@ public class SuperParserTest {
   public void test5() throws ParseException {
     parser = new SuperParser(LUCENE_VERSION, new BlurAnalyzer(new WhitespaceAnalyzer(LUCENE_VERSION)),
true, null,
         ScoreType.SUPER, new Term("_primedoc_"));
-    Query query = parser.parse("super:<a.a:a a.d:{e TO f} a.b:b a.test:hello\\<>
- super:<g.c:c g.d:d>");
+    Query query = parser.parse("super:<a.a:a a.d:{e TO f} a.b:b a.test:hello\\<>
-super:<g.c:c g.d:d>");
 
     BooleanQuery booleanQuery1 = new BooleanQuery();
     booleanQuery1.add(new TermQuery(new Term("a.a", "a")), Occur.SHOULD);
@@ -147,7 +144,11 @@ public class SuperParserTest {
   @Test
   public void test7() throws ParseException {
     Query q = parseSq("(a.b:cool) (+a.c:cool a.b:cool)");
-    assertQuery(bq(bc(sq(tq("a.b", "cool"))), bc(sq(bq(bc_m(tq("a.c", "cool")), bc(tq("a.b",
"cool")))))), q);
+    BooleanQuery bq = bq(bc(sq(tq("a.b", "cool"))), bc(sq(bq(bc_m(tq("a.c", "cool")), bc(tq("a.b",
"cool"))))));
+
+    System.out.println(q);
+    System.out.println(bq);
+    assertQuery(bq, q);
   }
 
   @Test
@@ -270,13 +271,21 @@ public class SuperParserTest {
   }
 
   @Test
+  public void test25_AND_ORs() throws ParseException {
+    Query q = parseSq("leading AND super:<f1:word1> OR word6 super:<word3> word2");
+    Query q1 = bq(bc_m(sq(tq("super", "leading"))), bc_m(sq(tq("f1", "word1"))), bc(sq(tq("super",
"word6"))),
+        bc(sq(tq("super", "word3"))), bc(sq(tq("super", "word2"))));
+    assertQuery(q1, q);
+  }
+
+  @Test
   public void test26() throws ParseException {
     Query q = parseSq("-leading super:<f1:word1> +word6 super:<word3> word2");
     Query q1 = bq(bc_n(sq(tq("super", "leading"))), bc(sq(tq("f1", "word1"))), bc_m(sq(tq("super",
"word6"))),
         bc(sq(tq("super", "word3"))), bc(sq(tq("super", "word2"))));
     assertQuery(q1, q);
   }
-  
+
   @Test
   public void test27() throws ParseException {
     Query q = parseSq("rowid:1");
@@ -297,8 +306,8 @@ public class SuperParserTest {
   }
 
   public static void assertQuery(Query expected, Query actual) {
-    System.out.println(expected);
-    System.out.println(actual);
+    System.out.println("expected =" + expected);
+    System.out.println("actual   =" + actual);
     assertEqualsQuery(expected, actual);
   }
 


Mime
View raw message