incubator-blur-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Aaron McCurry <amccu...@gmail.com>
Subject Re: [1/2] git commit: Adding stop word option to the text type.
Date Tue, 27 Aug 2013 13:12:37 GMT
Not sure I follow your question, but if I understand correctly.  The
difference is that you can define properties per FieldTypeDefinition
instance.  So even though the text type is the same class it can be
configured separately for each column definition.  So that means you could
have different stopWord files for a "doc.body" column as the "summary.body"
column.  Is that your question?

Aaron


On Tue, Aug 27, 2013 at 8:55 AM, Tim Williams <williamstw@gmail.com> wrote:

> I admit I've not followed along very closely with the new type system,
> but the new configure signatures seem confusing. How do you know
> whether to put a given key/val in properties vs. configuration?
>
> --tim
>
>
> On Tue, Aug 27, 2013 at 8:34 AM,  <amccurry@apache.org> wrote:
> > Updated Branches:
> >   refs/heads/master 4cfcce30a -> d9f51871c
> >
> >
> > Adding stop word option to the text type.
> >
> >
> > Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
> > Commit:
> http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/5e417475
> > Tree:
> http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/5e417475
> > Diff:
> http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/5e417475
> >
> > Branch: refs/heads/master
> > Commit: 5e417475390fa5e89333270933a98a6538db8367
> > Parents: 4c6ef4d
> > Author: Aaron McCurry <amccurry@gmail.com>
> > Authored: Tue Aug 27 08:33:50 2013 -0400
> > Committer: Aaron McCurry <amccurry@gmail.com>
> > Committed: Tue Aug 27 08:33:50 2013 -0400
> >
> > ----------------------------------------------------------------------
> >  .../apache/blur/analysis/BaseFieldManager.java  | 15 +++++----
> >  .../blur/analysis/FieldTypeDefinition.java      |  5 +--
> >  .../apache/blur/analysis/HdfsFieldManager.java  |  4 +--
> >  .../analysis/type/DateFieldTypeDefinition.java  |  3 +-
> >  .../type/DoubleFieldTypeDefinition.java         |  3 +-
> >  .../type/FieldLessFieldTypeDefinition.java      |  3 +-
> >  .../analysis/type/FloatFieldTypeDefinition.java |  3 +-
> >  .../analysis/type/IntFieldTypeDefinition.java   |  3 +-
> >  .../analysis/type/LongFieldTypeDefinition.java  |  3 +-
> >  .../type/StoredFieldTypeDefinition.java         |  3 +-
> >  .../type/StringFieldTypeDefinition.java         |  3 +-
> >  .../analysis/type/TextFieldTypeDefinition.java  | 32
> +++++++++++++++++---
> >  ...lPointVectorStrategyFieldTypeDefinition.java |  3 +-
> >  ...vePrefixTreeStrategyFieldTypeDefinition.java |  3 +-
> >  ...ryPrefixTreeStrategyFieldTypeDefinition.java |  3 +-
> >  .../blur/analysis/BaseFieldManagerTest.java     |  3 +-
> >  .../blur/lucene/search/SuperParserTest.java     |  3 +-
> >  docs/data-model.html                            |  3 +-
> >  18 files changed, 70 insertions(+), 28 deletions(-)
> > ----------------------------------------------------------------------
> >
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java
> b/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java
> > index 62c900e..55c7d70 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java
> > @@ -47,6 +47,7 @@ import org.apache.blur.log.LogFactory;
> >  import org.apache.blur.thrift.generated.Column;
> >  import org.apache.blur.thrift.generated.Record;
> >  import org.apache.blur.utils.BlurConstants;
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.analysis.Analyzer;
> >  import org.apache.lucene.analysis.AnalyzerWrapper;
> >  import org.apache.lucene.document.Field;
> > @@ -72,6 +73,7 @@ public abstract class BaseFieldManager extends
> FieldManager {
> >    private final boolean _defaultMissingFieldLessIndexing;
> >    private final boolean _strict;
> >    private final FieldTypeDefinition _fieldLessFieldTypeDefinition;
> > +  private final Configuration _configuration;
> >
> >    public static FieldType ID_TYPE;
> >    static {
> > @@ -88,13 +90,14 @@ public abstract class BaseFieldManager extends
> FieldManager {
> >      SUPER_FIELD_TYPE.setOmitNorms(true);
> >    }
> >
> > -  public BaseFieldManager(String fieldLessField, final Analyzer
> defaultAnalyzerForQuerying) throws IOException {
> > -    this(fieldLessField, defaultAnalyzerForQuerying, true, null, false,
> null);
> > +  public BaseFieldManager(String fieldLessField, final Analyzer
> defaultAnalyzerForQuerying, Configuration configuration)
> > +      throws IOException {
> > +    this(fieldLessField, defaultAnalyzerForQuerying, true, null, false,
> null, configuration);
> >    }
> >
> >    public BaseFieldManager(String fieldLessField, final Analyzer
> defaultAnalyzerForQuerying, boolean strict,
> >        String defaultMissingFieldType, boolean
> defaultMissingFieldLessIndexing,
> > -      Map<String, String> defaultMissingFieldProps) throws IOException {
> > +      Map<String, String> defaultMissingFieldProps, Configuration
> configuration) throws IOException {
> >      registerType(TextFieldTypeDefinition.class);
> >      registerType(StringFieldTypeDefinition.class);
> >      registerType(StoredFieldTypeDefinition.class);
> > @@ -111,7 +114,7 @@ public abstract class BaseFieldManager extends
> FieldManager {
> >      _defaultMissingFieldLessIndexing = defaultMissingFieldLessIndexing;
> >      _defaultMissingFieldType = defaultMissingFieldType;
> >      _defaultMissingFieldProps = defaultMissingFieldProps;
> > -
> > +    _configuration = configuration;
> >      _fieldLessFieldTypeDefinition = new FieldLessFieldTypeDefinition();
> >
> >      _baseAnalyzerForQuery = new AnalyzerWrapper() {
> > @@ -390,9 +393,9 @@ public abstract class BaseFieldManager extends
> FieldManager {
> >        throw new RuntimeException(e);
> >      }
> >      if (props == null) {
> > -      fieldTypeDefinition.configure(fieldName, EMPTY_MAP);
> > +      fieldTypeDefinition.configure(fieldName, EMPTY_MAP,
> _configuration);
> >      } else {
> > -      fieldTypeDefinition.configure(fieldName, props);
> > +      fieldTypeDefinition.configure(fieldName, props, _configuration);
> >      }
> >      fieldTypeDefinition.setFieldLessIndexed(fieldLessIndexed);
> >      return fieldTypeDefinition;
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/FieldTypeDefinition.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/FieldTypeDefinition.java
> b/blur-query/src/main/java/org/apache/blur/analysis/FieldTypeDefinition.java
> > index 19a67a3..9898ff2 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/FieldTypeDefinition.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/FieldTypeDefinition.java
> > @@ -22,6 +22,7 @@ import java.util.Iterator;
> >  import java.util.Map;
> >
> >  import org.apache.blur.thrift.generated.Column;
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.analysis.Analyzer;
> >  import org.apache.lucene.document.Field;
> >  import org.apache.lucene.search.Query;
> > @@ -68,7 +69,8 @@ public abstract class FieldTypeDefinition {
> >     * @param properties
> >     *          the properties.
> >     */
> > -  public abstract void configure(String fieldNameForThisInstance,
> Map<String, String> properties);
> > +  public abstract void configure(String fieldNameForThisInstance,
> Map<String, String> properties,
> > +      Configuration configuration);
> >
> >    /**
> >     * Gets the {@link Field}s for indexing from a single Column.
> > @@ -212,5 +214,4 @@ public abstract class FieldTypeDefinition {
> >      this._properties = properties;
> >    }
> >
> > -
> >  }
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/HdfsFieldManager.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/HdfsFieldManager.java
> b/blur-query/src/main/java/org/apache/blur/analysis/HdfsFieldManager.java
> > index 3bd6a93..a293c91 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/HdfsFieldManager.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/HdfsFieldManager.java
> > @@ -39,7 +39,7 @@ import org.apache.lucene.analysis.Analyzer;
> >
> >  public class HdfsFieldManager extends BaseFieldManager {
> >
> > -  private static final List<String> EMPTY_LIST = Arrays.asList(new
> String[]{});
> > +  private static final List<String> EMPTY_LIST = Arrays.asList(new
> String[] {});
> >
> >    public static abstract class Lock {
> >
> > @@ -84,7 +84,7 @@ public class HdfsFieldManager extends BaseFieldManager
> {
> >        Configuration configuration, boolean strict, String
> defaultMissingFieldType,
> >        boolean defaultMissingFieldLessIndexing, Map<String, String>
> defaultMissingFieldProps) throws IOException {
> >      super(fieldLessField, defaultAnalyzerForQuerying, strict,
> defaultMissingFieldType, defaultMissingFieldLessIndexing,
> > -        defaultMissingFieldProps);
> > +        defaultMissingFieldProps, configuration);
> >      _storagePath = storagePath;
> >      _configuration = configuration;
> >      _fileSystem = _storagePath.getFileSystem(_configuration);
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/type/DateFieldTypeDefinition.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/type/DateFieldTypeDefinition.java
> b/blur-query/src/main/java/org/apache/blur/analysis/type/DateFieldTypeDefinition.java
> > index adfd03b..e3c8cc3 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/type/DateFieldTypeDefinition.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/type/DateFieldTypeDefinition.java
> > @@ -25,6 +25,7 @@ import java.util.Map;
> >  import java.util.concurrent.TimeUnit;
> >
> >  import org.apache.blur.thrift.generated.Column;
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.document.Field;
> >  import org.apache.lucene.document.FieldType;
> >  import org.apache.lucene.document.LongField;
> > @@ -47,7 +48,7 @@ public class DateFieldTypeDefinition extends
> NumericFieldTypeDefinition {
> >    }
> >
> >    @Override
> > -  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties) {
> > +  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties, Configuration configuration) {
> >      final String dateFormat = properties.get(DATE_FORMAT);
> >      if (dateFormat == null) {
> >        throw new RuntimeException("The property [" + DATE_FORMAT + "]
> can not be null.");
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/type/DoubleFieldTypeDefinition.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/type/DoubleFieldTypeDefinition.java
> b/blur-query/src/main/java/org/apache/blur/analysis/type/DoubleFieldTypeDefinition.java
> > index 4c38d33..3e2db14 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/type/DoubleFieldTypeDefinition.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/type/DoubleFieldTypeDefinition.java
> > @@ -19,6 +19,7 @@ package org.apache.blur.analysis.type;
> >  import java.util.Map;
> >
> >  import org.apache.blur.thrift.generated.Column;
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.document.DoubleField;
> >  import org.apache.lucene.document.Field;
> >  import org.apache.lucene.document.FieldType;
> > @@ -37,7 +38,7 @@ public class DoubleFieldTypeDefinition extends
> NumericFieldTypeDefinition {
> >    }
> >
> >    @Override
> > -  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties) {
> > +  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties, Configuration configuration) {
> >      String precisionStepStr = properties.get(NUMERIC_PRECISION_STEP);
> >      if (precisionStepStr != null) {
> >        _precisionStep = Integer.parseInt(precisionStepStr);
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/type/FieldLessFieldTypeDefinition.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/type/FieldLessFieldTypeDefinition.java
> b/blur-query/src/main/java/org/apache/blur/analysis/type/FieldLessFieldTypeDefinition.java
> > index 2824d85..3d76e72 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/type/FieldLessFieldTypeDefinition.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/type/FieldLessFieldTypeDefinition.java
> > @@ -21,6 +21,7 @@ import java.util.Map;
> >  import org.apache.blur.analysis.FieldTypeDefinition;
> >  import org.apache.blur.analysis.NoStopWordStandardAnalyzer;
> >  import org.apache.blur.thrift.generated.Column;
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.analysis.Analyzer;
> >  import org.apache.lucene.document.Field;
> >  import org.apache.lucene.document.FieldType;
> > @@ -43,7 +44,7 @@ public class FieldLessFieldTypeDefinition extends
> FieldTypeDefinition {
> >    }
> >
> >    @Override
> > -  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties) {
> > +  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties, Configuration configuration) {
> >
> >    }
> >
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/type/FloatFieldTypeDefinition.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/type/FloatFieldTypeDefinition.java
> b/blur-query/src/main/java/org/apache/blur/analysis/type/FloatFieldTypeDefinition.java
> > index fcbdc20..66567c8 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/type/FloatFieldTypeDefinition.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/type/FloatFieldTypeDefinition.java
> > @@ -19,6 +19,7 @@ package org.apache.blur.analysis.type;
> >  import java.util.Map;
> >
> >  import org.apache.blur.thrift.generated.Column;
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.document.Field;
> >  import org.apache.lucene.document.FieldType;
> >  import org.apache.lucene.document.FloatField;
> > @@ -37,7 +38,7 @@ public class FloatFieldTypeDefinition extends
> NumericFieldTypeDefinition {
> >    }
> >
> >    @Override
> > -  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties) {
> > +  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties, Configuration configuration) {
> >      String precisionStepStr = properties.get(NUMERIC_PRECISION_STEP);
> >      if (precisionStepStr != null) {
> >        _precisionStep = Integer.parseInt(precisionStepStr);
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/type/IntFieldTypeDefinition.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/type/IntFieldTypeDefinition.java
> b/blur-query/src/main/java/org/apache/blur/analysis/type/IntFieldTypeDefinition.java
> > index cd26f08..f20694e 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/type/IntFieldTypeDefinition.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/type/IntFieldTypeDefinition.java
> > @@ -19,6 +19,7 @@ package org.apache.blur.analysis.type;
> >  import java.util.Map;
> >
> >  import org.apache.blur.thrift.generated.Column;
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.document.Field;
> >  import org.apache.lucene.document.FieldType;
> >  import org.apache.lucene.document.IntField;
> > @@ -37,7 +38,7 @@ public class IntFieldTypeDefinition extends
> NumericFieldTypeDefinition {
> >    }
> >
> >    @Override
> > -  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties) {
> > +  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties, Configuration configuration) {
> >      String precisionStepStr = properties.get(NUMERIC_PRECISION_STEP);
> >      if (precisionStepStr != null) {
> >        _precisionStep = Integer.parseInt(precisionStepStr);
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/type/LongFieldTypeDefinition.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/type/LongFieldTypeDefinition.java
> b/blur-query/src/main/java/org/apache/blur/analysis/type/LongFieldTypeDefinition.java
> > index aa7d08d..d28b726 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/type/LongFieldTypeDefinition.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/type/LongFieldTypeDefinition.java
> > @@ -19,6 +19,7 @@ package org.apache.blur.analysis.type;
> >  import java.util.Map;
> >
> >  import org.apache.blur.thrift.generated.Column;
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.document.Field;
> >  import org.apache.lucene.document.FieldType;
> >  import org.apache.lucene.document.LongField;
> > @@ -37,7 +38,7 @@ public class LongFieldTypeDefinition extends
> NumericFieldTypeDefinition {
> >    }
> >
> >    @Override
> > -  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties) {
> > +  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties, Configuration configuration) {
> >      String precisionStepStr = properties.get(NUMERIC_PRECISION_STEP);
> >      if (precisionStepStr != null) {
> >        _precisionStep = Integer.parseInt(precisionStepStr);
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/type/StoredFieldTypeDefinition.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/type/StoredFieldTypeDefinition.java
> b/blur-query/src/main/java/org/apache/blur/analysis/type/StoredFieldTypeDefinition.java
> > index 215ee4f..0f40d79 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/type/StoredFieldTypeDefinition.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/type/StoredFieldTypeDefinition.java
> > @@ -21,6 +21,7 @@ import java.util.Map;
> >
> >  import org.apache.blur.analysis.FieldTypeDefinition;
> >  import org.apache.blur.thrift.generated.Column;
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.analysis.Analyzer;
> >  import org.apache.lucene.analysis.core.KeywordAnalyzer;
> >  import org.apache.lucene.document.Field;
> > @@ -37,7 +38,7 @@ public class StoredFieldTypeDefinition extends
> FieldTypeDefinition {
> >    }
> >
> >    @Override
> > -  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties) {
> > +  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties, Configuration configuration) {
> >
> >    }
> >
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/type/StringFieldTypeDefinition.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/type/StringFieldTypeDefinition.java
> b/blur-query/src/main/java/org/apache/blur/analysis/type/StringFieldTypeDefinition.java
> > index 5e3ac2f..8791905 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/type/StringFieldTypeDefinition.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/type/StringFieldTypeDefinition.java
> > @@ -20,6 +20,7 @@ import java.util.Map;
> >
> >  import org.apache.blur.analysis.FieldTypeDefinition;
> >  import org.apache.blur.thrift.generated.Column;
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.analysis.Analyzer;
> >  import org.apache.lucene.analysis.core.KeywordAnalyzer;
> >  import org.apache.lucene.document.Field;
> > @@ -35,7 +36,7 @@ public class StringFieldTypeDefinition extends
> FieldTypeDefinition {
> >    }
> >
> >    @Override
> > -  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties) {
> > +  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties, Configuration configuration) {
> >
> >    }
> >
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/type/TextFieldTypeDefinition.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/type/TextFieldTypeDefinition.java
> b/blur-query/src/main/java/org/apache/blur/analysis/type/TextFieldTypeDefinition.java
> > index 3ff4b23..dd1a515 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/type/TextFieldTypeDefinition.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/type/TextFieldTypeDefinition.java
> > @@ -16,22 +16,33 @@ package org.apache.blur.analysis.type;
> >   * See the License for the specific language governing permissions and
> >   * limitations under the License.
> >   */
> > +import java.io.IOException;
> > +import java.io.InputStreamReader;
> > +import java.io.Reader;
> >  import java.util.Map;
> >
> >  import org.apache.blur.analysis.FieldTypeDefinition;
> >  import org.apache.blur.analysis.NoStopWordStandardAnalyzer;
> > +import org.apache.blur.lucene.LuceneVersionConstant;
> >  import org.apache.blur.thrift.generated.Column;
> > +import org.apache.hadoop.conf.Configuration;
> > +import org.apache.hadoop.fs.FileSystem;
> > +import org.apache.hadoop.fs.Path;
> >  import org.apache.lucene.analysis.Analyzer;
> > +import org.apache.lucene.analysis.standard.StandardAnalyzer;
> >  import org.apache.lucene.document.Field;
> >  import org.apache.lucene.document.FieldType;
> >  import org.apache.lucene.document.TextField;
> >
> >  public class TextFieldTypeDefinition extends FieldTypeDefinition {
> >
> > +  public static final String STOP_WORD_PATH = "stopWordPath";
> >    public static final String NAME = "text";
> >    public static final FieldType TYPE_NOT_STORED;
> >    public static final FieldType TYPE_STORED;
> >
> > +  private Analyzer _analyzer;
> > +
> >    static {
> >      TYPE_STORED = new FieldType(TextField.TYPE_STORED);
> >      TYPE_STORED.setOmitNorms(true);
> > @@ -48,8 +59,21 @@ public class TextFieldTypeDefinition extends
> FieldTypeDefinition {
> >    }
> >
> >    @Override
> > -  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties) {
> > -
> > +  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties, Configuration configuration) {
> > +    String stopWordUri = properties.get(STOP_WORD_PATH);
> > +    if (stopWordUri == null) {
> > +      _analyzer = new NoStopWordStandardAnalyzer();
> > +    } else {
> > +      try {
> > +        Path path = new Path(stopWordUri);
> > +        FileSystem fileSystem = path.getFileSystem(configuration);
> > +        Reader reader = new InputStreamReader(fileSystem.open(path));
> > +        // Reader closed by analyzer
> > +        _analyzer = new
> StandardAnalyzer(LuceneVersionConstant.LUCENE_VERSION, reader);
> > +      } catch (IOException e) {
> > +        throw new RuntimeException(e);
> > +      }
> > +    }
> >    }
> >
> >    @Override
> > @@ -68,12 +92,12 @@ public class TextFieldTypeDefinition extends
> FieldTypeDefinition {
> >
> >    @Override
> >    public Analyzer getAnalyzerForIndex(String fieldName) {
> > -    return new NoStopWordStandardAnalyzer();
> > +    return _analyzer;
> >    }
> >
> >    @Override
> >    public Analyzer getAnalyzerForQuery(String fieldName) {
> > -    return new NoStopWordStandardAnalyzer();
> > +    return _analyzer;
> >    }
> >
> >    @Override
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialPointVectorStrategyFieldTypeDefinition.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialPointVectorStrategyFieldTypeDefinition.java
> b/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialPointVectorStrategyFieldTypeDefinition.java
> > index 093016e..fa3eec6 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialPointVectorStrategyFieldTypeDefinition.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialPointVectorStrategyFieldTypeDefinition.java
> > @@ -20,6 +20,7 @@ import java.util.Arrays;
> >  import java.util.Collection;
> >  import java.util.Map;
> >
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.analysis.Analyzer;
> >  import org.apache.lucene.analysis.core.KeywordAnalyzer;
> >  import org.apache.lucene.spatial.query.SpatialOperation;
> > @@ -40,7 +41,7 @@ public class
> SpatialPointVectorStrategyFieldTypeDefinition extends BaseSpatialFi
> >    }
> >
> >    @Override
> > -  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties) {
> > +  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties, Configuration configuration) {
> >      _ctx = SpatialContext.GEO;
> >      _strategy = new PointVectorStrategy(_ctx, fieldNameForThisInstance);
> >      _shapeReadWriter = new ShapeReadWriter<SpatialContext>(_ctx);
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialRecursivePrefixTreeStrategyFieldTypeDefinition.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialRecursivePrefixTreeStrategyFieldTypeDefinition.java
> b/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialRecursivePrefixTreeStrategyFieldTypeDefinition.java
> > index b6c7798..060ee46 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialRecursivePrefixTreeStrategyFieldTypeDefinition.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialRecursivePrefixTreeStrategyFieldTypeDefinition.java
> > @@ -18,6 +18,7 @@ package org.apache.blur.analysis.type.spatial;
> >   */
> >  import java.util.Map;
> >
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
> >  import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
> >  import org.apache.lucene.spatial.query.SpatialOperation;
> > @@ -35,7 +36,7 @@ public class
> SpatialRecursivePrefixTreeStrategyFieldTypeDefinition extends BaseS
> >    }
> >
> >    @Override
> > -  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties) {
> > +  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties, Configuration configuration) {
> >      _ctx = SpatialContext.GEO;
> >      SpatialPrefixTree grid = getSpatialPrefixTree(properties);
> >      _strategy = new RecursivePrefixTreeStrategy(grid,
> fieldNameForThisInstance);
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialTermQueryPrefixTreeStrategyFieldTypeDefinition.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialTermQueryPrefixTreeStrategyFieldTypeDefinition.java
> b/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialTermQueryPrefixTreeStrategyFieldTypeDefinition.java
> > index a97a813..ed83e1e 100644
> > ---
> a/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialTermQueryPrefixTreeStrategyFieldTypeDefinition.java
> > +++
> b/blur-query/src/main/java/org/apache/blur/analysis/type/spatial/SpatialTermQueryPrefixTreeStrategyFieldTypeDefinition.java
> > @@ -18,6 +18,7 @@ package org.apache.blur.analysis.type.spatial;
> >   */
> >  import java.util.Map;
> >
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.spatial.prefix.TermQueryPrefixTreeStrategy;
> >  import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
> >  import org.apache.lucene.spatial.query.SpatialOperation;
> > @@ -35,7 +36,7 @@ public class
> SpatialTermQueryPrefixTreeStrategyFieldTypeDefinition extends BaseS
> >    }
> >
> >    @Override
> > -  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties) {
> > +  public void configure(String fieldNameForThisInstance, Map<String,
> String> properties, Configuration configuration) {
> >      _ctx = SpatialContext.GEO;
> >      SpatialPrefixTree grid = getSpatialPrefixTree(properties);
> >      _strategy = new TermQueryPrefixTreeStrategy(grid,
> fieldNameForThisInstance);
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/test/java/org/apache/blur/analysis/BaseFieldManagerTest.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/test/java/org/apache/blur/analysis/BaseFieldManagerTest.java
> b/blur-query/src/test/java/org/apache/blur/analysis/BaseFieldManagerTest.java
> > index bee4b88..9b1a39d 100644
> > ---
> a/blur-query/src/test/java/org/apache/blur/analysis/BaseFieldManagerTest.java
> > +++
> b/blur-query/src/test/java/org/apache/blur/analysis/BaseFieldManagerTest.java
> > @@ -27,6 +27,7 @@ import
> org.apache.blur.analysis.type.TextFieldTypeDefinition;
> >  import org.apache.blur.thrift.generated.Column;
> >  import org.apache.blur.thrift.generated.Record;
> >  import org.apache.blur.utils.BlurConstants;
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.analysis.core.KeywordAnalyzer;
> >  import org.apache.lucene.document.Field;
> >  import org.junit.Test;
> > @@ -175,7 +176,7 @@ public class BaseFieldManagerTest {
> >    }
> >
> >    protected BaseFieldManager newFieldManager(boolean create) throws
> IOException {
> > -    return new BaseFieldManager(_fieldLessField, new KeywordAnalyzer())
> {
> > +    return new BaseFieldManager(_fieldLessField, new KeywordAnalyzer(),
> new Configuration()) {
> >        @Override
> >        protected boolean tryToStore(FieldTypeDefinition
> fieldTypeDefinition, String fieldName) {
> >          return true;
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java
> > ----------------------------------------------------------------------
> > diff --git
> a/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java
> b/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java
> > index 738fd8c..baf5ca8 100644
> > ---
> a/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java
> > +++
> b/blur-query/src/test/java/org/apache/blur/lucene/search/SuperParserTest.java
> > @@ -19,6 +19,7 @@ import
> org.apache.blur.analysis.type.spatial.ShapeReadWriter;
> >  import org.apache.blur.analysis.type.spatial.SpatialArgsParser;
> >  import org.apache.blur.thrift.generated.ScoreType;
> >  import org.apache.blur.utils.BlurConstants;
> > +import org.apache.hadoop.conf.Configuration;
> >  import org.apache.lucene.analysis.Analyzer;
> >  import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
> >  import org.apache.lucene.index.Term;
> > @@ -55,7 +56,7 @@ public class SuperParserTest {
> >    }
> >
> >    private BaseFieldManager getFieldManager(Analyzer a) throws
> IOException {
> > -    BaseFieldManager fieldManager = new
> BaseFieldManager(BlurConstants.SUPER, a) {
> > +    BaseFieldManager fieldManager = new
> BaseFieldManager(BlurConstants.SUPER, a, new Configuration()) {
> >        @Override
> >        protected boolean tryToStore(FieldTypeDefinition
> fieldTypeDefinition, String fieldName) {
> >          return true;
> >
> >
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/5e417475/docs/data-model.html
> > ----------------------------------------------------------------------
> > diff --git a/docs/data-model.html b/docs/data-model.html
> > index 7c2da03..c3df371 100644
> > --- a/docs/data-model.html
> > +++ b/docs/data-model.html
> > @@ -300,7 +300,8 @@ To run a query to find all the rows that contain a
> location within 10 miles of g
> >  <pre><code class="json">text</code></pre>
> >                <h5>Property Options:</h5>
> >                <ul>
> > -              <li>None</li>
> > +              <li>&quot;stopWordPath&quot; -Optional- default value
is
> no stop words.  This should be a HDFS path.
> > +       <br/>This will load stop words into the StandardAnalyzer for
> this field, one term per line.</li>
> >                </ul>
> >                   </p>
> >                <h3 id="string_type">String</h3>
> >
>

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message