incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amccu...@apache.org
Subject git commit: Adding a configurable analyzer class name.
Date Tue, 05 Nov 2013 02:23:00 GMT
Updated Branches:
  refs/heads/master f25c729b2 -> ee752f43b


Adding a configurable analyzer class name.


Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/ee752f43
Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/ee752f43
Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/ee752f43

Branch: refs/heads/master
Commit: ee752f43b1a419a5965a69ca6b978234aa5ae8e6
Parents: f25c729
Author: Aaron McCurry <amccurry@gmail.com>
Authored: Mon Nov 4 21:22:40 2013 -0500
Committer: Aaron McCurry <amccurry@gmail.com>
Committed: Mon Nov 4 21:23:03 2013 -0500

----------------------------------------------------------------------
 .../analysis/type/TextFieldTypeDefinition.java  | 47 +++++++++++++++++++-
 docs/data-model.html                            |  2 +
 2 files changed, 47 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ee752f43/blur-query/src/main/java/org/apache/blur/analysis/type/TextFieldTypeDefinition.java
----------------------------------------------------------------------
diff --git a/blur-query/src/main/java/org/apache/blur/analysis/type/TextFieldTypeDefinition.java
b/blur-query/src/main/java/org/apache/blur/analysis/type/TextFieldTypeDefinition.java
index 6b40101..a625247 100644
--- a/blur-query/src/main/java/org/apache/blur/analysis/type/TextFieldTypeDefinition.java
+++ b/blur-query/src/main/java/org/apache/blur/analysis/type/TextFieldTypeDefinition.java
@@ -19,10 +19,13 @@ package org.apache.blur.analysis.type;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
+import java.lang.reflect.Constructor;
 import java.util.Map;
 
 import org.apache.blur.analysis.FieldTypeDefinition;
 import org.apache.blur.analysis.NoStopWordStandardAnalyzer;
+import org.apache.blur.log.Log;
+import org.apache.blur.log.LogFactory;
 import org.apache.blur.lucene.LuceneVersionConstant;
 import org.apache.blur.thrift.generated.Column;
 import org.apache.hadoop.conf.Configuration;
@@ -33,9 +36,13 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.util.Version;
 
 public class TextFieldTypeDefinition extends FieldTypeDefinition {
 
+  private static final Log LOG = LogFactory.getLog(TextFieldTypeDefinition.class);
+
+  public static final String ANALYZER_CLASS = "analyzerClass";
   public static final String STOP_WORD_PATH = "stopWordPath";
   public static final String NAME = "text";
   public static final FieldType TYPE_NOT_STORED;
@@ -61,9 +68,17 @@ public class TextFieldTypeDefinition extends FieldTypeDefinition {
   @Override
   public void configure(String fieldNameForThisInstance, Map<String, String> properties,
Configuration configuration) {
     String stopWordUri = properties.get(STOP_WORD_PATH);
+    String className = properties.get(ANALYZER_CLASS);
     if (stopWordUri == null) {
-      _analyzer = new NoStopWordStandardAnalyzer();
+      if (className == null) {
+        _analyzer = new NoStopWordStandardAnalyzer();
+      } else {
+        _analyzer = instance(className);
+      }
     } else {
+      if (className != null) {
+        LOG.warn("Class name [{0}] ignored do due to the [{1}] property being set.", className,
STOP_WORD_PATH);
+      }
       try {
         Path path = new Path(stopWordUri);
         FileSystem fileSystem = path.getFileSystem(configuration);
@@ -75,6 +90,34 @@ public class TextFieldTypeDefinition extends FieldTypeDefinition {
       }
     }
   }
+  
+  private static Analyzer instance(String name) {
+    try {
+
+      Class<?> clazz = Class.forName(name);
+      Constructor<?>[] constructors = clazz.getConstructors();
+      for (Constructor<?> constructor : constructors) {
+        Class<?>[] parameterTypes = constructor.getParameterTypes();
+        if (parameterTypes.length == 0) {
+          return (Analyzer) constructor.newInstance(new Object[] {});
+        }
+      }
+
+      for (Constructor<?> constructor : constructors) {
+        Class<?>[] parameterTypes = constructor.getParameterTypes();
+        if (parameterTypes.length == 1) {
+          Class<?> type = parameterTypes[0];
+          if (type.equals(Version.class)) {
+            return (Analyzer) constructor.newInstance(new Object[] { LuceneVersionConstant.LUCENE_VERSION
});
+          }
+        }
+      }
+      throw new RuntimeException("Cannot find a default constructor or a constructor that
takes a ["
+          + Version.class.getName() + "]");
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
 
   @Override
   public Iterable<? extends Field> getFieldsForColumn(String family, Column column)
{
@@ -114,7 +157,7 @@ public class TextFieldTypeDefinition extends FieldTypeDefinition {
   public boolean checkSupportForPrefixQuery() {
     return true;
   }
-  
+
   @Override
   public boolean checkSupportForRegexQuery() {
     return true;

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ee752f43/docs/data-model.html
----------------------------------------------------------------------
diff --git a/docs/data-model.html b/docs/data-model.html
index fb7b3f9..4eecbe1 100644
--- a/docs/data-model.html
+++ b/docs/data-model.html
@@ -338,6 +338,8 @@ To run a query to find all the rows that contain a location within 10
miles of g
               <ul>
               <li>&quot;stopWordPath&quot; -Optional- default value is no stop
words.  This should be a HDFS path.
 	<br/>This will load stop words into the StandardAnalyzer for this field, one term
per line.</li>
+	
+	          <li>&quot;analyzerClass&quot; -Optional- default value is a standard
analyzer with no stop words.<br/>This could be any Analyzer class that has a default
constructor or one that takes a Lucene Version enum.</li>
               </ul>
 		  </p>
               <h3 id="string_type">String</h3>


Mime
View raw message