incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amccu...@apache.org
Subject git commit: Many updates to the super parser, found lots of errors and added unit tests.
Date Sun, 16 Jun 2013 14:38:25 GMT
Updated Branches:
  refs/heads/master 65b601c59 -> 50f1eaafd


Many updates to the super parser, found lots of errors and added unit tests.


Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/50f1eaaf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/50f1eaaf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/50f1eaaf

Branch: refs/heads/master
Commit: 50f1eaafd24ede9323b101ea4d9d55dc1e95182f
Parents: 65b601c
Author: Aaron McCurry <amccurry@gmail.com>
Authored: Sun Jun 16 10:37:41 2013 -0400
Committer: Aaron McCurry <amccurry@gmail.com>
Committed: Sun Jun 16 10:37:41 2013 -0400

----------------------------------------------------------------------
 .../org/apache/blur/analysis/BlurAnalyzer.java  |   8 +-
 .../analysis/NoStopWordStandardAnalyzer.java    |  81 ++++++++++
 .../apache/blur/lucene/search/SuperParser.java  | 156 +++++++++++++------
 .../blur/lucene/search/SuperParserTest.java     | 115 +++++++++++---
 .../thrift/generated/AnalyzerDefinition.java    |   4 +-
 .../blur/thrift/generated/ColumnDefinition.java |   4 +-
 .../src/main/scripts/interface/Blur.thrift      |   4 +-
 .../main/scripts/interface/gen-html/Blur.html   |   4 +-
 .../thrift/generated/AnalyzerDefinition.java    |   4 +-
 .../blur/thrift/generated/ColumnDefinition.java |   4 +-
 .../main/scripts/interface/gen-js/Blur_types.js |   4 +-
 .../scripts/interface/gen-perl/Blur/Types.pm    |   4 +-
 .../main/scripts/interface/gen-rb/blur_types.rb |   4 +-
 13 files changed, 298 insertions(+), 98 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50f1eaaf/blur-query/src/main/java/org/apache/blur/analysis/BlurAnalyzer.java
----------------------------------------------------------------------
diff --git a/blur-query/src/main/java/org/apache/blur/analysis/BlurAnalyzer.java b/blur-query/src/main/java/org/apache/blur/analysis/BlurAnalyzer.java
index eca5abc..48d0b90 100644
--- a/blur-query/src/main/java/org/apache/blur/analysis/BlurAnalyzer.java
+++ b/blur-query/src/main/java/org/apache/blur/analysis/BlurAnalyzer.java
@@ -52,7 +52,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.AnalyzerWrapper;
 import org.apache.lucene.analysis.core.KeywordAnalyzer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.document.DoubleField;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@@ -82,7 +82,7 @@ public final class BlurAnalyzer extends AnalyzerWrapper {
     }
   };
 
-  private static final String STANDARD = "org.apache.lucene.analysis.standard.StandardAnalyzer";
+  private static final String STANDARD = "org.apache.blur.analysis.NoStopWordStandardAnalyzer";
   public static final BlurAnalyzer BLANK_ANALYZER = new BlurAnalyzer(new KeywordAnalyzer());
 
   private static final Analyzer ERROR_ANALYZER = new Analyzer() {
@@ -127,13 +127,13 @@ public final class BlurAnalyzer extends AnalyzerWrapper {
     _analyzers.put(RECORD_ID, ERROR_ANALYZER);
     _analyzers.put(PRIME_DOC, ERROR_ANALYZER);
     _analyzers.put(FAMILY, ERROR_ANALYZER);
-    _analyzers.put(SUPER, ERROR_ANALYZER);
+    _analyzers.put(SUPER, new WhitespaceAnalyzer(LUCENE_VERSION));
     load(_analyzers, _analyzerDefinition.columnFamilyDefinitions, _fullTextFields, _subIndexNameLookups,
         _subIndexNames, _fullTextColumnFamilies, _typeLookup, _fieldTypes);
   }
 
   public BlurAnalyzer() {
-    this(new StandardAnalyzer(LUCENE_VERSION));
+    this(new NoStopWordStandardAnalyzer());
   }
 
   private Analyzer getAnalyzer(String name) {

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50f1eaaf/blur-query/src/main/java/org/apache/blur/analysis/NoStopWordStandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/blur-query/src/main/java/org/apache/blur/analysis/NoStopWordStandardAnalyzer.java
b/blur-query/src/main/java/org/apache/blur/analysis/NoStopWordStandardAnalyzer.java
new file mode 100644
index 0000000..c04bcdd
--- /dev/null
+++ b/blur-query/src/main/java/org/apache/blur/analysis/NoStopWordStandardAnalyzer.java
@@ -0,0 +1,81 @@
+package org.apache.blur.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.blur.lucene.LuceneVersionConstant;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+
+public class NoStopWordStandardAnalyzer extends StopwordAnalyzerBase {
+
+  /** Default maximum allowed token length */
+  public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+  /**
+   * An unmodifiable set containing some common English words that are usually
+   * not useful for searching.
+   */
+  public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+
+  public NoStopWordStandardAnalyzer() {
+    super(LuceneVersionConstant.LUCENE_VERSION, CharArraySet.EMPTY_SET);
+  }
+
+  /**
+   * Set maximum allowed token length. If a token is seen that exceeds this
+   * length then it is discarded. This setting only takes effect the next time
+   * tokenStream or tokenStream is called.
+   */
+  public void setMaxTokenLength(int length) {
+    maxTokenLength = length;
+  }
+
+  /**
+   * @see #setMaxTokenLength
+   */
+  public int getMaxTokenLength() {
+    return maxTokenLength;
+  }
+
+  @Override
+  protected TokenStreamComponents createComponents(final String fieldName, final Reader reader)
{
+    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
+    src.setMaxTokenLength(maxTokenLength);
+    TokenStream tok = new StandardFilter(matchVersion, src);
+    tok = new LowerCaseFilter(matchVersion, tok);
+    tok = new StopFilter(matchVersion, tok, stopwords);
+    return new TokenStreamComponents(src, tok) {
+      @Override
+      protected void setReader(final Reader reader) throws IOException {
+        src.setMaxTokenLength(NoStopWordStandardAnalyzer.this.maxTokenLength);
+        super.setReader(reader);
+      }
+    };
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50f1eaaf/blur-query/src/main/java/org/apache/blur/lucene/search/SuperParser.java
----------------------------------------------------------------------
diff --git a/blur-query/src/main/java/org/apache/blur/lucene/search/SuperParser.java b/blur-query/src/main/java/org/apache/blur/lucene/search/SuperParser.java
index 1d00fb8..9c5a323 100644
--- a/blur-query/src/main/java/org/apache/blur/lucene/search/SuperParser.java
+++ b/blur-query/src/main/java/org/apache/blur/lucene/search/SuperParser.java
@@ -48,60 +48,122 @@ public class SuperParser extends QueryParser {
   private static final Pattern PATTERN = Pattern.compile("([-+]{0,1})\\s*?super\\s*?\\:\\s*?\\<(.*?)\\>");
   private static final Pattern CHECK = Pattern.compile("super\\s*?\\:\\s*?\\<");
   private static final String SUPER = "super";
-  private final Map<Query, String> fieldNames = new HashMap<Query, String>();
-  private final boolean superSearch;
-  private final Filter queryFilter;
-  private final ScoreType scoreType;
-  private final BlurAnalyzer blurAnalyzer;
-  private final Version matchVersion;
-  private final Term defaultPrimeDocTerm;
-
-  public SuperParser(Version matchVersion, BlurAnalyzer a, boolean superSearch, Filter queryFilter,
ScoreType scoreType, Term defaultPrimeDocTerm) {
+  private final Map<Query, String> _fieldNames = new HashMap<Query, String>();
+  private final boolean _superSearch;
+  private final Filter _queryFilter;
+  private final ScoreType _scoreType;
+  private final BlurAnalyzer _blurAnalyzer;
+  private final Version _matchVersion;
+  private final Term _defaultPrimeDocTerm;
+  private final boolean _autoGrouping;
+
+  public SuperParser(Version matchVersion, BlurAnalyzer a, boolean superSearch, Filter queryFilter,
+      ScoreType scoreType, Term defaultPrimeDocTerm) {
+    this(matchVersion, a, superSearch, queryFilter, scoreType, defaultPrimeDocTerm, false);
+  }
+
+  public SuperParser(Version matchVersion, BlurAnalyzer a, boolean superSearch, Filter queryFilter,
+      ScoreType scoreType, Term defaultPrimeDocTerm, boolean autoGrouping) {
     super(matchVersion, "super", a);
-    this.matchVersion = matchVersion;
-    this.setAutoGeneratePhraseQueries(true);
-    this.setAllowLeadingWildcard(true);
-    this.superSearch = superSearch;
-    this.queryFilter = queryFilter;
-    this.scoreType = scoreType;
-    this.blurAnalyzer = a;
-    this.defaultPrimeDocTerm = defaultPrimeDocTerm;
+    _matchVersion = matchVersion;
+    _superSearch = superSearch;
+    _queryFilter = queryFilter;
+    _scoreType = scoreType;
+    _blurAnalyzer = a;
+    _defaultPrimeDocTerm = defaultPrimeDocTerm;
+    _autoGrouping = autoGrouping;
+    setAutoGeneratePhraseQueries(true);
+    setAllowLeadingWildcard(true);
   }
 
   @Override
   public Query parse(String query) throws ParseException {
     Matcher matcher = PATTERN.matcher(query);
     BooleanQuery booleanQuery = null;
+    int lastEnd = -1;
     while (matcher.find()) {
       int count = matcher.groupCount();
+      int start = matcher.start();
+      int end = matcher.end();
+
+      booleanQuery = addExtraQueryInfo(query, booleanQuery, lastEnd, start, end);
+
       for (int i = 0; i < count; i++) {
         String occurString = matcher.group(i + 1);
         i++;
         String superQueryStr = matcher.group(i + 1);
         Matcher matcherCheck = CHECK.matcher(superQueryStr);
         if (matcherCheck.find()) {
-          throw new ParseException(
-              "Embedded super queries are not allowed [" + query
-                  + "].");
+          throw new ParseException("Embedded super queries are not allowed [" + query + "].");
         }
 
+        // Adding clause
         if (booleanQuery == null) {
           booleanQuery = new BooleanQuery();
         }
-
         Occur occur = getOccur(occurString);
-        QueryParser parser = new QueryParser(matchVersion, SUPER, blurAnalyzer);
-
+        QueryParser parser = getNewParser();
         Query superQuery = parser.parse(superQueryStr);
-        booleanQuery.add(new SuperQuery(superQuery, scoreType, defaultPrimeDocTerm), occur);
+        booleanQuery.add(new SuperQuery(superQuery, _scoreType, _defaultPrimeDocTerm), occur);
+        lastEnd = end;
       }
     }
+    booleanQuery = addExtraQueryInfo(query, booleanQuery, lastEnd);
     if (booleanQuery == null) {
       return reprocess(super.parse(query));
     }
     return booleanQuery;
   }
 
+  private BooleanQuery addExtraQueryInfo(String query, BooleanQuery booleanQuery, int lastEnd,
int start, int end)
+      throws ParseException {
+    if (lastEnd != -1 && start != lastEnd) {
+      // there was text inbetween the matches
+      String missingMatch = query.substring(lastEnd, start);
+      booleanQuery = addExtraQueryInfo(booleanQuery, missingMatch);
+    } else if (lastEnd == -1 && start != 0) {
+      // this means there was text in front of the first super query
+      String missingMatch = query.substring(0, start);
+      booleanQuery = addExtraQueryInfo(booleanQuery, missingMatch);
+    }
+    return booleanQuery;
+  }
+
+  private BooleanQuery addExtraQueryInfo(String query, BooleanQuery booleanQuery, int lastEnd)
throws ParseException {
+    if (lastEnd != -1 && lastEnd < query.length()) {
+      // there was text at the end
+      String missingMatch = query.substring(lastEnd);
+      booleanQuery = addExtraQueryInfo(booleanQuery, missingMatch);
+    }
+    return booleanQuery;
+  }
+
+  private BooleanQuery addExtraQueryInfo(BooleanQuery booleanQuery, String missingMatch)
throws ParseException {
+    if (missingMatch.trim().isEmpty()) {
+      return booleanQuery;
+    }
+    // Adding clause
+    if (booleanQuery == null) {
+      booleanQuery = new BooleanQuery();
+    }
+    QueryParser parser = getNewParser();
+    Query subQuery = parser.parse(missingMatch);
+    if (subQuery instanceof BooleanQuery) {
+      BooleanQuery bq = (BooleanQuery) subQuery;
+      for (BooleanClause clause : bq) {
+        booleanQuery.add(new SuperQuery(clause.getQuery(), _scoreType, _defaultPrimeDocTerm),
clause.getOccur());
+      }
+    } else {
+      booleanQuery.add(new SuperQuery(subQuery, _scoreType, _defaultPrimeDocTerm), Occur.SHOULD);
+    }
+    return booleanQuery;
+  }
+
+  private QueryParser getNewParser() {
+    QueryParser parser = new QueryParser(_matchVersion, SUPER, _blurAnalyzer);
+    return parser;
+  }
+
   private Occur getOccur(String occurString) {
     if (occurString.equals(MUST_STRING)) {
       return Occur.MUST;
@@ -113,23 +175,18 @@ public class SuperParser extends QueryParser {
   }
 
   @Override
-  protected Query newFuzzyQuery(Term term, float minimumSimilarity,
-      int prefixLength) {
+  protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
     String field = term.field();
-    TYPE type = blurAnalyzer.getTypeLookup(field);
+    TYPE type = _blurAnalyzer.getTypeLookup(field);
     if (type != TYPE.TEXT) {
-      throw new RuntimeException("Field [" + field + "] is type [" + type
-          + "] which does not support fuzzy queries.");
+      throw new RuntimeException("Field [" + field + "] is type [" + type + "] which does
not support fuzzy queries.");
     }
-    return addField(
-        super.newFuzzyQuery(term, minimumSimilarity, prefixLength),
-        term.field());
+    return addField(super.newFuzzyQuery(term, minimumSimilarity, prefixLength), term.field());
   }
 
   @Override
   protected Query newMatchAllDocsQuery() {
-    return addField(super.newMatchAllDocsQuery(), UUID.randomUUID()
-        .toString());
+    return addField(super.newMatchAllDocsQuery(), UUID.randomUUID().toString());
   }
 
   @Override
@@ -161,31 +218,26 @@ public class SuperParser extends QueryParser {
   @Override
   protected Query newPrefixQuery(Term prefix) {
     String field = prefix.field();
-    TYPE type = blurAnalyzer.getTypeLookup(field);
+    TYPE type = _blurAnalyzer.getTypeLookup(field);
     if (type != TYPE.TEXT) {
-      throw new RuntimeException("Field [" + field + "] is type [" + type
-          + "] which does not support prefix queries.");
+      throw new RuntimeException("Field [" + field + "] is type [" + type + "] which does
not support prefix queries.");
     }
     return addField(super.newPrefixQuery(prefix), field);
   }
 
   @Override
-  protected Query newRangeQuery(String field, String part1, String part2,
-      boolean startInclusive, boolean endInclusive) {
-    Query q = blurAnalyzer.getNewRangeQuery(field, part1, part2,
-        startInclusive, endInclusive);
+  protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive,
boolean endInclusive) {
+    Query q = _blurAnalyzer.getNewRangeQuery(field, part1, part2, startInclusive, endInclusive);
     if (q != null) {
       return addField(q, field);
     }
-    return addField(super.newRangeQuery(field, part1, part2,
-        startInclusive, endInclusive), field);
+    return addField(super.newRangeQuery(field, part1, part2, startInclusive, endInclusive),
field);
   }
 
   @Override
   protected Query newTermQuery(Term term) {
     String field = term.field();
-    Query q = blurAnalyzer.getNewRangeQuery(field, term.text(),
-        term.text(), true, true);
+    Query q = _blurAnalyzer.getNewRangeQuery(field, term.text(), term.text(), true, true);
     if (q != null) {
       return addField(q, field);
     }
@@ -198,7 +250,7 @@ public class SuperParser extends QueryParser {
       return new MatchAllDocsQuery();
     }
     String field = t.field();
-    TYPE type = blurAnalyzer.getTypeLookup(field);
+    TYPE type = _blurAnalyzer.getTypeLookup(field);
     if (type != TYPE.TEXT) {
       throw new RuntimeException("Field [" + field + "] is type [" + type
           + "] which does not support wildcard queries.");
@@ -207,14 +259,14 @@ public class SuperParser extends QueryParser {
   }
 
   private SuperQuery newSuperQuery(Query query) {
-    return new SuperQuery(wrapFilter(query), scoreType, defaultPrimeDocTerm);
+    return new SuperQuery(wrapFilter(query), _scoreType, _defaultPrimeDocTerm);
   }
 
   private Query wrapFilter(Query query) {
-    if (queryFilter == null) {
+    if (_queryFilter == null) {
       return query;
     }
-    return new FilteredQuery(query, queryFilter);
+    return new FilteredQuery(query, _queryFilter);
   }
 
   // private boolean isSameGroupName(BooleanQuery booleanQuery) {
@@ -267,6 +319,7 @@ public class SuperParser extends QueryParser {
   // return getGroupName(fieldName);
   // }
   // }
+
   private Query reprocess(Query query) {
     if (query == null || !isSuperSearch()) {
       return wrapFilter(query);
@@ -293,12 +346,13 @@ public class SuperParser extends QueryParser {
       return newSuperQuery(query);
     }
   }
+
   private Query addField(Query q, String field) {
-    fieldNames.put(q, field);
+    _fieldNames.put(q, field);
     return q;
   }
 
   public boolean isSuperSearch() {
-    return superSearch;
+    return _superSearch;
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50f1eaaf/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java
----------------------------------------------------------------------
diff --git a/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java b/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java
index 92e9f21..497b11a 100644
--- a/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java
+++ b/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java
@@ -1,21 +1,22 @@
 package org.apache.blur.lucene.search;
 
 import static org.apache.blur.lucene.LuceneVersionConstant.LUCENE_VERSION;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
 
 import java.util.List;
 
-import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.blur.analysis.BlurAnalyzer;
+import org.apache.blur.analysis.NoStopWordStandardAnalyzer;
 import org.apache.blur.thrift.generated.AnalyzerDefinition;
 import org.apache.blur.thrift.generated.ColumnDefinition;
 import org.apache.blur.thrift.generated.ColumnFamilyDefinition;
 import org.apache.blur.thrift.generated.ScoreType;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queryparser.classic.ParseException;
-import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.NumericRangeQuery;
 import org.apache.lucene.search.Query;
@@ -33,7 +34,7 @@ public class SuperParserTest {
   @Before
   public void setup() {
     AnalyzerDefinition ad = new AnalyzerDefinition();
-    ad.setDefaultDefinition(new ColumnDefinition(StandardAnalyzer.class.getName(), true,
null));
+    ad.setDefaultDefinition(new ColumnDefinition(NoStopWordStandardAnalyzer.class.getName(),
true, null));
     ColumnFamilyDefinition cfDef = new ColumnFamilyDefinition();
     cfDef.putToColumnDefinitions("id_l", new ColumnDefinition("long", false, null));
     cfDef.putToColumnDefinitions("id_d", new ColumnDefinition("double", false, null));
@@ -41,7 +42,7 @@ public class SuperParserTest {
     cfDef.putToColumnDefinitions("id_i", new ColumnDefinition("integer", false, null));
     ad.putToColumnFamilyDefinitions("a", cfDef);
     analyzer = new BlurAnalyzer(ad);
-    parser = new SuperParser(LUCENE_VERSION, new BlurAnalyzer(new WhitespaceAnalyzer(LUCENE_VERSION)),
true, null,  ScoreType.SUPER, new Term("_primedoc_"));
+    parser = new SuperParser(LUCENE_VERSION, analyzer, true, null, ScoreType.SUPER, new Term("_primedoc_"));
   }
 
   @Test
@@ -56,8 +57,8 @@ public class SuperParserTest {
 
     BooleanQuery bq = new BooleanQuery();
     bq.add(superQuery, Occur.MUST);
-    
-    assertEquals(bq, query);
+
+    assertQuery(bq, query);
 
   }
 
@@ -73,7 +74,7 @@ public class SuperParserTest {
     BooleanQuery bq = new BooleanQuery();
     bq.add(superQuery, Occur.SHOULD);
 
-    assertEquals(bq, query);
+    assertQuery(bq, query);
   }
 
   @Test
@@ -102,12 +103,13 @@ public class SuperParserTest {
     booleanQuery.add(superQuery1, Occur.SHOULD);
     booleanQuery.add(superQuery2, Occur.MUST_NOT);
 
-    assertEquals(booleanQuery, query);
+    assertQuery(booleanQuery, query);
   }
 
   @Test
   public void testParser5() throws ParseException {
-
+    parser = new SuperParser(LUCENE_VERSION, new BlurAnalyzer(new WhitespaceAnalyzer(LUCENE_VERSION)),
true, null,
+        ScoreType.SUPER, new Term("_primedoc_"));
     Query query = parser.parse("super:<a:a d:{e TO f} b:b test:hello\\<> - super:<c:c
d:d>");
 
     BooleanQuery booleanQuery1 = new BooleanQuery();
@@ -128,12 +130,12 @@ public class SuperParserTest {
     booleanQuery.add(superQuery1, Occur.SHOULD);
     booleanQuery.add(superQuery2, Occur.MUST_NOT);
 
-    assertEquals(booleanQuery, query);
+    assertQuery(booleanQuery, query);
   }
 
   @Test
   public void testParser6() throws ParseException {
-    SuperParser parser = new SuperParser(LUCENE_VERSION, analyzer, true, null,  ScoreType.SUPER,
new Term("_primedoc_"));
+    SuperParser parser = new SuperParser(LUCENE_VERSION, analyzer, true, null, ScoreType.SUPER,
new Term("_primedoc_"));
     try {
       parser.parse("super : <a:a d:{e TO d} b:b super:<test:hello\\<>> super:<c:c
d:d>");
       fail();
@@ -141,7 +143,7 @@ public class SuperParserTest {
       // should throw an error
     }
   }
-  
+
   @Test
   public void test7() throws ParseException {
     Query q = parseSq("(a.b:cool) (+a.c:cool a.b:cool)");
@@ -196,19 +198,19 @@ public class SuperParserTest {
     Query q1 = rq_i("a.id_l", 0L, 2L);
     assertQuery(sq(q1), q);
   }
-  
+
   @Test
   public void test16() throws ParseException {
     Query q = parseSq("a.id_d:[0 TO 2]");
     assertQuery(sq(rq_i("a.id_d", 0.0D, 2.0D)), q);
   }
-  
+
   @Test
   public void test17() throws ParseException {
     Query q = parseSq("a.id_f:[0 TO 2]");
     assertQuery(sq(rq_i("a.id_f", 0.0F, 2.0F)), q);
   }
-  
+
   @Test
   public void test18() throws ParseException {
     Query q = parseSq("a.id_i:[0 TO 2]");
@@ -216,6 +218,65 @@ public class SuperParserTest {
     assertQuery(sq(q1), q);
   }
 
+  @Test
+  public void test19() throws ParseException {
+    Query q = parseSq("word1");
+    Query q1 = sq(tq("super", "word1"));
+    assertQuery(q1, q);
+  }
+
+  @Test
+  public void test20() throws ParseException {
+    Query q = parseSq("word1 word2");
+    Query q1 = bq(bc(sq(tq("super", "word1"))), bc(sq(tq("super", "word2"))));
+    assertQuery(q1, q);
+  }
+
+  @Test
+  public void test21() throws ParseException {
+    Query q = parseSq("super:<f1:word1> word2");
+    Query q1 = bq(bc(sq(tq("f1", "word1"))), bc(sq(tq("super", "word2"))));
+    assertQuery(q1, q);
+  }
+
+  @Test
+  public void test22() throws ParseException {
+    Query q = parseSq("super:<f1:word1> word2 super:<word3>");
+    Query q1 = bq(bc(sq(tq("f1", "word1"))), bc(sq(tq("super", "word2"))), bc(sq(tq("super",
"word3"))));
+    assertQuery(q1, q);
+  }
+
+  @Test
+  public void test23() throws ParseException {
+    Query q = parseSq("super:<f1:word1>  super:<word3> word2");
+    Query q1 = bq(bc(sq(tq("f1", "word1"))), bc(sq(tq("super", "word3"))), bc(sq(tq("super",
"word2"))));
+    assertQuery(q1, q);
+  }
+
+  @Test
+  public void test24() throws ParseException {
+    Query q = parseSq("super:<f1:word1> +word6 super:<word3> word2");
+    Query q1 = bq(bc(sq(tq("f1", "word1"))), bc_m(sq(tq("super", "word6"))), bc(sq(tq("super",
"word3"))),
+        bc(sq(tq("super", "word2"))));
+    assertQuery(q1, q);
+  }
+
+  @Test
+  public void test25() throws ParseException {
+    Query q = parseSq("+leading super:<f1:word1> +word6 super:<word3> word2");
+    Query q1 = bq(bc_m(sq(tq("super", "leading"))), bc(sq(tq("f1", "word1"))), bc_m(sq(tq("super",
"word6"))),
+        bc(sq(tq("super", "word3"))), bc(sq(tq("super", "word2"))));
+    assertQuery(q1, q);
+  }
+
+  @Test
+  public void test26() throws ParseException {
+    Query q = parseSq("-leading super:<f1:word1> +word6 super:<word3> word2");
+    Query q1 = bq(bc_n(sq(tq("super", "leading"))), bc(sq(tq("f1", "word1"))), bc_m(sq(tq("super",
"word6"))),
+        bc(sq(tq("super", "word3"))), bc(sq(tq("super", "word2"))));
+    assertQuery(q1, q);
+  }
+
   public static BooleanClause bc_m(Query q) {
     return new BooleanClause(q, Occur.MUST);
   }
@@ -244,8 +305,7 @@ public class SuperParserTest {
       assertEqualsTermQuery((TermQuery) expected, (TermQuery) actual);
     } else if (expected instanceof NumericRangeQuery<?>) {
       assertEqualsNumericRangeQuery((NumericRangeQuery<?>) expected, (NumericRangeQuery<?>)
actual);
-    }
-    else {
+    } else {
       fail("Type [" + expected.getClass() + "] not supported");
     }
   }
@@ -259,7 +319,7 @@ public class SuperParserTest {
   public static void assertEqualsNumericRangeQuery(NumericRangeQuery<?> expected, NumericRangeQuery<?>
actual) {
     assertEquals(expected, actual);
   }
-  
+
   public static void assertEqualsSuperQuery(SuperQuery expected, SuperQuery actual) {
     assertEquals(expected.getQuery(), actual.getQuery());
   }
@@ -290,15 +350,15 @@ public class SuperParserTest {
     assertEquals(booleanClause1.getOccur(), booleanClause2.getOccur());
     assertEqualsQuery(booleanClause1.getQuery(), booleanClause2.getQuery());
   }
-  
+
   private Query rq_i(String field, float min, float max) {
     return NumericRangeQuery.newFloatRange(field, min, max, true, true);
   }
-  
+
   private Query rq_i(String field, int min, int max) {
     return NumericRangeQuery.newIntRange(field, min, max, true, true);
   }
-  
+
   private Query rq_i(String field, double min, double max) {
     return NumericRangeQuery.newDoubleRange(field, min, max, true, true);
   }
@@ -324,8 +384,13 @@ public class SuperParserTest {
   }
 
   private Query parseSq(String qstr) throws ParseException {
-    SuperParser superParser = new SuperParser(LUCENE_VERSION, analyzer, true, null,  ScoreType.SUPER,
new Term("_primedoc_"));
+    return parseSq(qstr, false);
+  }
+
+  private Query parseSq(String qstr, boolean autoGrouping) throws ParseException {
+    SuperParser superParser = new SuperParser(LUCENE_VERSION, analyzer, true, null, ScoreType.SUPER,
new Term(
+        "_primedoc_"), autoGrouping);
     return superParser.parse(qstr);
   }
-  
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50f1eaaf/blur-thrift/src/main/java/org/apache/blur/thrift/generated/AnalyzerDefinition.java
----------------------------------------------------------------------
diff --git a/blur-thrift/src/main/java/org/apache/blur/thrift/generated/AnalyzerDefinition.java
b/blur-thrift/src/main/java/org/apache/blur/thrift/generated/AnalyzerDefinition.java
index 37adc0b..57fa502 100644
--- a/blur-thrift/src/main/java/org/apache/blur/thrift/generated/AnalyzerDefinition.java
+++ b/blur-thrift/src/main/java/org/apache/blur/thrift/generated/AnalyzerDefinition.java
@@ -168,7 +168,7 @@ public class AnalyzerDefinition implements org.apache.blur.thirdparty.thrift_0_9
   }
 
   public AnalyzerDefinition() {
-    this.fullTextAnalyzerClassName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
+    this.fullTextAnalyzerClassName = "org.apache.blur.analysis.NoStopWordStandardAnalyzer";
 
   }
 
@@ -217,7 +217,7 @@ public class AnalyzerDefinition implements org.apache.blur.thirdparty.thrift_0_9
   @Override
   public void clear() {
     this.defaultDefinition = null;
-    this.fullTextAnalyzerClassName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
+    this.fullTextAnalyzerClassName = "org.apache.blur.analysis.NoStopWordStandardAnalyzer";
 
     this.columnFamilyDefinitions = null;
   }

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50f1eaaf/blur-thrift/src/main/java/org/apache/blur/thrift/generated/ColumnDefinition.java
----------------------------------------------------------------------
diff --git a/blur-thrift/src/main/java/org/apache/blur/thrift/generated/ColumnDefinition.java
b/blur-thrift/src/main/java/org/apache/blur/thrift/generated/ColumnDefinition.java
index 5b1f20e..3c66dcc 100644
--- a/blur-thrift/src/main/java/org/apache/blur/thrift/generated/ColumnDefinition.java
+++ b/blur-thrift/src/main/java/org/apache/blur/thrift/generated/ColumnDefinition.java
@@ -152,7 +152,7 @@ public class ColumnDefinition implements org.apache.blur.thirdparty.thrift_0_9_0
   }
 
   public ColumnDefinition() {
-    this.analyzerClassName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
+    this.analyzerClassName = "org.apache.blur.analysis.NoStopWordStandardAnalyzer";
 
   }
 
@@ -200,7 +200,7 @@ public class ColumnDefinition implements org.apache.blur.thirdparty.thrift_0_9_0
 
   @Override
   public void clear() {
-    this.analyzerClassName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
+    this.analyzerClassName = "org.apache.blur.analysis.NoStopWordStandardAnalyzer";
 
     setFullTextIndexIsSet(false);
     this.fullTextIndex = false;

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50f1eaaf/distribution/src/main/scripts/interface/Blur.thrift
----------------------------------------------------------------------
diff --git a/distribution/src/main/scripts/interface/Blur.thrift b/distribution/src/main/scripts/interface/Blur.thrift
index 448d9ed..a0f5c87 100644
--- a/distribution/src/main/scripts/interface/Blur.thrift
+++ b/distribution/src/main/scripts/interface/Blur.thrift
@@ -587,7 +587,7 @@ struct AlternateColumnDefinition {
  *
  */
 struct ColumnDefinition {
-  1:string analyzerClassName = "org.apache.lucene.analysis.standard.StandardAnalyzer",
+  1:string analyzerClassName = "org.apache.blur.analysis.NoStopWordStandardAnalyzer",
   2:bool fullTextIndex,
   3:map<string,AlternateColumnDefinition> alternateColumnDefinitions
 }
@@ -617,7 +617,7 @@ struct AnalyzerDefinition {
   /**
    *
    */
-  2:string fullTextAnalyzerClassName = "org.apache.lucene.analysis.standard.StandardAnalyzer",
+  2:string fullTextAnalyzerClassName = "org.apache.blur.analysis.NoStopWordStandardAnalyzer",
   /**
    *
    */

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50f1eaaf/distribution/src/main/scripts/interface/gen-html/Blur.html
----------------------------------------------------------------------
diff --git a/distribution/src/main/scripts/interface/gen-html/Blur.html b/distribution/src/main/scripts/interface/gen-html/Blur.html
index 85f9d62..063415b 100644
--- a/distribution/src/main/scripts/interface/gen-html/Blur.html
+++ b/distribution/src/main/scripts/interface/gen-html/Blur.html
@@ -392,7 +392,7 @@ shards that are complete, etc.
 </table><br/><p/>
 <br/></div><div class="definition"><h3 id="Struct_ColumnDefinition">Struct:
ColumnDefinition</h3>
 <table class="table-bordered table-striped table-condensed"><thead><th>Key</th><th>Field</th><th>Type</th><th>Description</th><th>Requiredness</th><th>Default
value</th></thead>
-<tr><td>1</td><td>analyzerClassName</td><td><code>string</code></td><td></td><td>default</td><td>"org.apache.lucene.analysis.standard.StandardAnalyzer"</td></tr>
+<tr><td>1</td><td>analyzerClassName</td><td><code>string</code></td><td></td><td>default</td><td>"org.apache.blur.analysis.NoStopWordStandardAnalyzer"</td></tr>
 <tr><td>2</td><td>fullTextIndex</td><td><code>bool</code></td><td></td><td>default</td><td></td></tr>
 <tr><td>3</td><td>alternateColumnDefinitions</td><td><code>map&lt;<code>string</code>,
<code><a href="Blur.html#Struct_AlternateColumnDefinition">AlternateColumnDefinition</a></code>&gt;</code></td><td></td><td>default</td><td></td></tr>
 </table><br/><p/>
@@ -408,7 +408,7 @@ shards that are complete, etc.
 <tr><td>1</td><td>defaultDefinition</td><td><code><a
href="Blur.html#Struct_ColumnDefinition">ColumnDefinition</a></code></td><td>
 </td><td>default</td><td></td></tr>
 <tr><td>2</td><td>fullTextAnalyzerClassName</td><td><code>string</code></td><td>
-</td><td>default</td><td>"org.apache.lucene.analysis.standard.StandardAnalyzer"</td></tr>
+</td><td>default</td><td>"org.apache.blur.analysis.NoStopWordStandardAnalyzer"</td></tr>
 <tr><td>3</td><td>columnFamilyDefinitions</td><td><code>map&lt;<code>string</code>,
<code><a href="Blur.html#Struct_ColumnFamilyDefinition">ColumnFamilyDefinition</a></code>&gt;</code></td><td>
 </td><td>default</td><td></td></tr>
 </table><br/><p/>

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50f1eaaf/distribution/src/main/scripts/interface/gen-java/org/apache/blur/thrift/generated/AnalyzerDefinition.java
----------------------------------------------------------------------
diff --git a/distribution/src/main/scripts/interface/gen-java/org/apache/blur/thrift/generated/AnalyzerDefinition.java
b/distribution/src/main/scripts/interface/gen-java/org/apache/blur/thrift/generated/AnalyzerDefinition.java
index 37adc0b..57fa502 100644
--- a/distribution/src/main/scripts/interface/gen-java/org/apache/blur/thrift/generated/AnalyzerDefinition.java
+++ b/distribution/src/main/scripts/interface/gen-java/org/apache/blur/thrift/generated/AnalyzerDefinition.java
@@ -168,7 +168,7 @@ public class AnalyzerDefinition implements org.apache.blur.thirdparty.thrift_0_9
   }
 
   public AnalyzerDefinition() {
-    this.fullTextAnalyzerClassName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
+    this.fullTextAnalyzerClassName = "org.apache.blur.analysis.NoStopWordStandardAnalyzer";
 
   }
 
@@ -217,7 +217,7 @@ public class AnalyzerDefinition implements org.apache.blur.thirdparty.thrift_0_9
   @Override
   public void clear() {
     this.defaultDefinition = null;
-    this.fullTextAnalyzerClassName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
+    this.fullTextAnalyzerClassName = "org.apache.blur.analysis.NoStopWordStandardAnalyzer";
 
     this.columnFamilyDefinitions = null;
   }

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50f1eaaf/distribution/src/main/scripts/interface/gen-java/org/apache/blur/thrift/generated/ColumnDefinition.java
----------------------------------------------------------------------
diff --git a/distribution/src/main/scripts/interface/gen-java/org/apache/blur/thrift/generated/ColumnDefinition.java
b/distribution/src/main/scripts/interface/gen-java/org/apache/blur/thrift/generated/ColumnDefinition.java
index 5b1f20e..3c66dcc 100644
--- a/distribution/src/main/scripts/interface/gen-java/org/apache/blur/thrift/generated/ColumnDefinition.java
+++ b/distribution/src/main/scripts/interface/gen-java/org/apache/blur/thrift/generated/ColumnDefinition.java
@@ -152,7 +152,7 @@ public class ColumnDefinition implements org.apache.blur.thirdparty.thrift_0_9_0
   }
 
   public ColumnDefinition() {
-    this.analyzerClassName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
+    this.analyzerClassName = "org.apache.blur.analysis.NoStopWordStandardAnalyzer";
 
   }
 
@@ -200,7 +200,7 @@ public class ColumnDefinition implements org.apache.blur.thirdparty.thrift_0_9_0
 
   @Override
   public void clear() {
-    this.analyzerClassName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
+    this.analyzerClassName = "org.apache.blur.analysis.NoStopWordStandardAnalyzer";
 
     setFullTextIndexIsSet(false);
     this.fullTextIndex = false;

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50f1eaaf/distribution/src/main/scripts/interface/gen-js/Blur_types.js
----------------------------------------------------------------------
diff --git a/distribution/src/main/scripts/interface/gen-js/Blur_types.js b/distribution/src/main/scripts/interface/gen-js/Blur_types.js
index 9b6d6af..56581fd 100644
--- a/distribution/src/main/scripts/interface/gen-js/Blur_types.js
+++ b/distribution/src/main/scripts/interface/gen-js/Blur_types.js
@@ -2566,7 +2566,7 @@ AlternateColumnDefinition.prototype.write = function(output) {
 };
 
 ColumnDefinition = function(args) {
-  this.analyzerClassName = 'org.apache.lucene.analysis.standard.StandardAnalyzer';
+  this.analyzerClassName = 'org.apache.blur.analysis.NoStopWordStandardAnalyzer';
   this.fullTextIndex = null;
   this.alternateColumnDefinitions = null;
   if (args) {
@@ -2782,7 +2782,7 @@ ColumnFamilyDefinition.prototype.write = function(output) {
 
 AnalyzerDefinition = function(args) {
   this.defaultDefinition = null;
-  this.fullTextAnalyzerClassName = 'org.apache.lucene.analysis.standard.StandardAnalyzer';
+  this.fullTextAnalyzerClassName = 'org.apache.blur.analysis.NoStopWordStandardAnalyzer';
   this.columnFamilyDefinitions = null;
   if (args) {
     if (args.defaultDefinition !== undefined) {

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50f1eaaf/distribution/src/main/scripts/interface/gen-perl/Blur/Types.pm
----------------------------------------------------------------------
diff --git a/distribution/src/main/scripts/interface/gen-perl/Blur/Types.pm b/distribution/src/main/scripts/interface/gen-perl/Blur/Types.pm
index dec017f..42a1a87 100644
--- a/distribution/src/main/scripts/interface/gen-perl/Blur/Types.pm
+++ b/distribution/src/main/scripts/interface/gen-perl/Blur/Types.pm
@@ -2770,7 +2770,7 @@ sub new {
   my $classname = shift;
   my $self      = {};
   my $vals      = shift || {};
-  $self->{analyzerClassName} = "org.apache.lucene.analysis.standard.StandardAnalyzer";
+  $self->{analyzerClassName} = "org.apache.blur.analysis.NoStopWordStandardAnalyzer";
   $self->{fullTextIndex} = undef;
   $self->{alternateColumnDefinitions} = undef;
   if (UNIVERSAL::isa($vals,'HASH')) {
@@ -2997,7 +2997,7 @@ sub new {
   my $self      = {};
   my $vals      = shift || {};
   $self->{defaultDefinition} = undef;
-  $self->{fullTextAnalyzerClassName} = "org.apache.lucene.analysis.standard.StandardAnalyzer";
+  $self->{fullTextAnalyzerClassName} = "org.apache.blur.analysis.NoStopWordStandardAnalyzer";
   $self->{columnFamilyDefinitions} = undef;
   if (UNIVERSAL::isa($vals,'HASH')) {
     if (defined $vals->{defaultDefinition}) {

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50f1eaaf/distribution/src/main/scripts/interface/gen-rb/blur_types.rb
----------------------------------------------------------------------
diff --git a/distribution/src/main/scripts/interface/gen-rb/blur_types.rb b/distribution/src/main/scripts/interface/gen-rb/blur_types.rb
index cc2f45c..646d5eb 100644
--- a/distribution/src/main/scripts/interface/gen-rb/blur_types.rb
+++ b/distribution/src/main/scripts/interface/gen-rb/blur_types.rb
@@ -682,7 +682,7 @@ module Blur
     ALTERNATECOLUMNDEFINITIONS = 3
 
     FIELDS = {
-      ANALYZERCLASSNAME => {:type => ::Thrift::Types::STRING, :name => 'analyzerClassName',
:default => %q"org.apache.lucene.analysis.standard.StandardAnalyzer"},
+      ANALYZERCLASSNAME => {:type => ::Thrift::Types::STRING, :name => 'analyzerClassName',
:default => %q"org.apache.blur.analysis.NoStopWordStandardAnalyzer"},
       FULLTEXTINDEX => {:type => ::Thrift::Types::BOOL, :name => 'fullTextIndex'},
       ALTERNATECOLUMNDEFINITIONS => {:type => ::Thrift::Types::MAP, :name => 'alternateColumnDefinitions',
:key => {:type => ::Thrift::Types::STRING}, :value => {:type => ::Thrift::Types::STRUCT,
:class => ::Blur::AlternateColumnDefinition}}
     }
@@ -727,7 +727,7 @@ module Blur
       # 
       DEFAULTDEFINITION => {:type => ::Thrift::Types::STRUCT, :name => 'defaultDefinition',
:class => ::Blur::ColumnDefinition},
       # 
-      FULLTEXTANALYZERCLASSNAME => {:type => ::Thrift::Types::STRING, :name => 'fullTextAnalyzerClassName',
:default => %q"org.apache.lucene.analysis.standard.StandardAnalyzer"},
+      FULLTEXTANALYZERCLASSNAME => {:type => ::Thrift::Types::STRING, :name => 'fullTextAnalyzerClassName',
:default => %q"org.apache.blur.analysis.NoStopWordStandardAnalyzer"},
       # 
       COLUMNFAMILYDEFINITIONS => {:type => ::Thrift::Types::MAP, :name => 'columnFamilyDefinitions',
:key => {:type => ::Thrift::Types::STRING}, :value => {:type => ::Thrift::Types::STRUCT,
:class => ::Blur::ColumnFamilyDefinition}}
     }


Mime
View raw message