incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amccu...@apache.org
Subject [12/47] git commit: The first Blur type of long! BLUR-1
Date Mon, 03 Sep 2012 00:32:19 GMT
The first Blur type of long! BLUR-1


Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/3f29c70d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/3f29c70d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/3f29c70d

Branch: refs/heads/master
Commit: 3f29c70ddca660cd3983c6afe9d0d7ad989feaeb
Parents: ebbe065
Author: Aaron McCurry <amccurry@gmail.com>
Authored: Tue Aug 28 21:40:42 2012 -0400
Committer: Aaron McCurry <amccurry@gmail.com>
Committed: Tue Aug 28 21:40:42 2012 -0400

----------------------------------------------------------------------
 .../nearinfinity/blur/analysis/BlurAnalyzer.java   |   30 +++++--
 .../nearinfinity/blur/analysis/FieldConverter.java |    9 ++
 .../blur/analysis/FieldConverterUtil.java          |   66 +++++++++++++++
 .../nearinfinity/blur/analysis/LongAnalyzer.java   |   47 ++++++++++
 .../blur/manager/writer/TransactionRecorder.java   |    2 +
 .../nearinfinity/blur/utils/RowIndexWriter.java    |    2 +
 .../blur/analysis/LongAnalyzerTest.java            |   26 ++++--
 .../blur/mapreduce/lib/BlurRecordWriter.java       |   20 +++--
 8 files changed, 178 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3f29c70d/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/BlurAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/BlurAnalyzer.java
b/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/BlurAnalyzer.java
index b6d808b..713bf65 100644
--- a/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/BlurAnalyzer.java
+++ b/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/BlurAnalyzer.java
@@ -73,6 +73,8 @@ public class BlurAnalyzer extends Analyzer {
   private PerFieldAnalyzerWrapper _wrapper;
   private Analyzer _fullTextAnalyzer = new StandardAnalyzer(LUCENE_VERSION);
 
+  private HashMap<String, Analyzer> _analyzers;
+
   public void addSubField(String name) {
     int lastIndexOf = name.lastIndexOf('.');
     String mainFieldName = name.substring(0, lastIndexOf);
@@ -106,13 +108,24 @@ public class BlurAnalyzer extends Analyzer {
     }
     Analyzer defaultAnalyzer = getAnalyzerByClassName(defaultDefinition.getAnalyzerClassName(),
aliases);
     KeywordAnalyzer keywordAnalyzer = new KeywordAnalyzer();
-    Map<String,Analyzer> analyzers = new HashMap<String, Analyzer>();
-    analyzers.put(ROW_ID, keywordAnalyzer);
-    analyzers.put(RECORD_ID, keywordAnalyzer);
-    analyzers.put(PRIME_DOC, keywordAnalyzer);
-    analyzers.put(SUPER, _fullTextAnalyzer);
-    load(analyzers);
-    _wrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer,analyzers);
+    _analyzers = new HashMap<String, Analyzer>();
+    _analyzers.put(ROW_ID, keywordAnalyzer);
+    _analyzers.put(RECORD_ID, keywordAnalyzer);
+    _analyzers.put(PRIME_DOC, keywordAnalyzer);
+    _analyzers.put(SUPER, _fullTextAnalyzer);
+    load(_analyzers);
+    _wrapper = new PerFieldAnalyzerWrapper(defaultAnalyzer,_analyzers);
+  }
+  
+  public FieldConverter getFieldConverter(String name) {
+    if (_analyzers == null) {
+      return null;
+    }
+    Analyzer analyzer = _analyzers.get(name);
+    if (analyzer != null && analyzer instanceof FieldConverter) {
+      return (FieldConverter) analyzer;
+    }
+    return null;
   }
 
   private void load(Map<String, Analyzer> analyzers) {
@@ -164,6 +177,9 @@ public class BlurAnalyzer extends Analyzer {
 
   @SuppressWarnings("unchecked")
   private static Analyzer getAnalyzerByClassName(String className, Map<String, Class<?
extends Analyzer>> aliases) {
+    if (FieldConverterUtil.isType(className)) {
+      return FieldConverterUtil.getAnalyzer(className);
+    }
     try {
       Class<? extends Analyzer> clazz = aliases.get(className);
       if (clazz == null) {

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3f29c70d/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/FieldConverter.java
----------------------------------------------------------------------
diff --git a/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/FieldConverter.java
b/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/FieldConverter.java
new file mode 100644
index 0000000..548fd69
--- /dev/null
+++ b/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/FieldConverter.java
@@ -0,0 +1,9 @@
+package com.nearinfinity.blur.analysis;
+
+import org.apache.lucene.document.Fieldable;
+
+public interface FieldConverter {
+
+  Fieldable convert(Fieldable orig);
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3f29c70d/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/FieldConverterUtil.java
----------------------------------------------------------------------
diff --git a/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/FieldConverterUtil.java
b/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/FieldConverterUtil.java
new file mode 100644
index 0000000..01554af
--- /dev/null
+++ b/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/FieldConverterUtil.java
@@ -0,0 +1,66 @@
+package com.nearinfinity.blur.analysis;
+
+import java.util.List;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Fieldable;
+
+public class FieldConverterUtil {
+
+  private static final String LONG = "long";
+
+  /**
+   * This method runs the converter on each of the fields in the document and
+   * returns the a new document.
+   * 
+   * @param document
+   *          the original document.
+   * @param converter
+   *          the converter.
+   * @return the original document.
+   */
+  public static Document convert(Document document, FieldConverter converter) {
+    List<Fieldable> fields = document.getFields();
+    int size = fields.size();
+    for (int i = 0; i < size; i++) {
+      Fieldable origField = fields.get(i);
+      Fieldable newField = converter.convert(origField);
+      if (newField != null) {
+        fields.set(i, newField);
+      }
+    }
+    return document;
+  }
+
+  public static Document convert(Document document, BlurAnalyzer analyzer) {
+    List<Fieldable> fields = document.getFields();
+    int size = fields.size();
+    for (int i = 0; i < size; i++) {
+      Fieldable origField = fields.get(i);
+      FieldConverter converter = analyzer.getFieldConverter(origField.name());
+      if (converter != null) {
+        Fieldable newField = converter.convert(origField);
+        if (newField != null) {
+          fields.set(i, newField);
+        }
+      }
+    }
+    return document;
+  }
+
+  public static boolean isType(String type) {
+    if (type.startsWith(LONG)) {
+      return true;
+    }
+    return false;
+  }
+
+  public static Analyzer getAnalyzer(String type) {
+    if (type.startsWith(LONG)) {
+      return new LongAnalyzer(type);
+    }
+    throw new RuntimeException("Type [" + type + "] not found.");
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3f29c70d/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/LongAnalyzer.java
----------------------------------------------------------------------
diff --git a/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/LongAnalyzer.java
b/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/LongAnalyzer.java
new file mode 100644
index 0000000..4a58042
--- /dev/null
+++ b/src/blur-core/src/main/java/com/nearinfinity/blur/analysis/LongAnalyzer.java
@@ -0,0 +1,47 @@
+package com.nearinfinity.blur.analysis;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.NumericField;
+import org.apache.lucene.util.NumericUtils;
+
+public class LongAnalyzer extends ReusableAnalyzerBase implements FieldConverter {
+
+  private static final String TYPE = "long";
+
+  private int precisionStepDefault = NumericUtils.PRECISION_STEP_DEFAULT;
+
+  public LongAnalyzer(String typeStr) {
+    if (typeStr.startsWith(TYPE)) {
+      int index = typeStr.indexOf(',');
+      if (index > 0) {
+        String s = typeStr.substring(index + 1);
+        try {
+          precisionStepDefault = Integer.parseInt(s);
+        } catch (NumberFormatException e) {
+          throw new RuntimeException("Can not parser [" + s + "] into an integer for the
precisionStepDefault.");
+        }
+      }
+    } else {
+      throw new RuntimeException("Long type can not parser [" + typeStr + "]");
+    }
+  }
+
+  @Override
+  protected TokenStreamComponents createComponents(final String fieldName, final Reader reader)
{
+    return new TokenStreamComponents(new KeywordTokenizer(reader));
+  }
+
+  @Override
+  public Fieldable convert(Fieldable fieldable) {
+    long value = Long.parseLong(fieldable.stringValue().trim());
+    NumericField field = new NumericField(fieldable.name(), precisionStepDefault, fieldable.isStored()
? Store.YES : Store.NO, true);
+    field.setLongValue(value);
+    return field;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3f29c70d/src/blur-core/src/main/java/com/nearinfinity/blur/manager/writer/TransactionRecorder.java
----------------------------------------------------------------------
diff --git a/src/blur-core/src/main/java/com/nearinfinity/blur/manager/writer/TransactionRecorder.java
b/src/blur-core/src/main/java/com/nearinfinity/blur/manager/writer/TransactionRecorder.java
index 2d9e0b4..0cdd3c2 100644
--- a/src/blur-core/src/main/java/com/nearinfinity/blur/manager/writer/TransactionRecorder.java
+++ b/src/blur-core/src/main/java/com/nearinfinity/blur/manager/writer/TransactionRecorder.java
@@ -28,6 +28,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.search.NRTManager.TrackingIndexWriter;
 
 import com.nearinfinity.blur.analysis.BlurAnalyzer;
+import com.nearinfinity.blur.analysis.FieldConverterUtil;
 import com.nearinfinity.blur.index.IndexWriter;
 import com.nearinfinity.blur.log.Log;
 import com.nearinfinity.blur.log.LogFactory;
@@ -317,6 +318,7 @@ public class TransactionRecorder {
     document.add(new Field(BlurConstants.ROW_ID, rowId, Store.YES, Index.NOT_ANALYZED_NO_NORMS));
     document.add(new Field(BlurConstants.RECORD_ID, record.recordId, Store.YES, Index.NOT_ANALYZED_NO_NORMS));
     RowIndexWriter.addColumns(document, analyzer, builder, record.family, record.columns);
+    FieldConverterUtil.convert(document, analyzer);
     return document;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3f29c70d/src/blur-core/src/main/java/com/nearinfinity/blur/utils/RowIndexWriter.java
----------------------------------------------------------------------
diff --git a/src/blur-core/src/main/java/com/nearinfinity/blur/utils/RowIndexWriter.java b/src/blur-core/src/main/java/com/nearinfinity/blur/utils/RowIndexWriter.java
index 2f3d5d6..15efe4d 100644
--- a/src/blur-core/src/main/java/com/nearinfinity/blur/utils/RowIndexWriter.java
+++ b/src/blur-core/src/main/java/com/nearinfinity/blur/utils/RowIndexWriter.java
@@ -34,6 +34,7 @@ import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 
 import com.nearinfinity.blur.analysis.BlurAnalyzer;
+import com.nearinfinity.blur.analysis.FieldConverterUtil;
 import com.nearinfinity.blur.thrift.generated.Column;
 import com.nearinfinity.blur.thrift.generated.Record;
 import com.nearinfinity.blur.thrift.generated.Row;
@@ -93,6 +94,7 @@ public class RowIndexWriter {
     document.add(new Field(ROW_ID, rowId, Store.YES, Index.NOT_ANALYZED_NO_NORMS));
     document.add(new Field(RECORD_ID, recordId, Store.YES, Index.NOT_ANALYZED_NO_NORMS));
     if (addColumns(document, _analyzer, builder, family, record.columns)) {
+      FieldConverterUtil.convert(document, _analyzer);
       if (!primeDocSet) {
         document.add(BlurConstants.PRIME_DOC_FIELD);
         primeDocSet = true;

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3f29c70d/src/blur-core/src/test/java/com/nearinfinity/blur/analysis/LongAnalyzerTest.java
----------------------------------------------------------------------
diff --git a/src/blur-core/src/test/java/com/nearinfinity/blur/analysis/LongAnalyzerTest.java
b/src/blur-core/src/test/java/com/nearinfinity/blur/analysis/LongAnalyzerTest.java
index c4af678..5d781af 100644
--- a/src/blur-core/src/test/java/com/nearinfinity/blur/analysis/LongAnalyzerTest.java
+++ b/src/blur-core/src/test/java/com/nearinfinity/blur/analysis/LongAnalyzerTest.java
@@ -1,9 +1,9 @@
 package com.nearinfinity.blur.analysis;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
-import java.util.Random;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -22,35 +22,41 @@ import org.apache.lucene.util.Version;
 import org.junit.Test;
 
 import com.nearinfinity.blur.index.IndexWriter;
+import com.nearinfinity.blur.thrift.generated.AnalyzerDefinition;
+import com.nearinfinity.blur.thrift.generated.ColumnDefinition;
+import com.nearinfinity.blur.thrift.generated.ColumnFamilyDefinition;
 
 public class LongAnalyzerTest {
 
   @Test
   public void testLongAnalyzer() throws IOException {
-    LongAnalyzer analyzer = new LongAnalyzer("long");
+    AnalyzerDefinition analyzerDefinition = new AnalyzerDefinition();
+    ColumnFamilyDefinition cfDef = new ColumnFamilyDefinition();
+    cfDef.putToColumnDefinitions("test", new ColumnDefinition("long", true, null));
+    analyzerDefinition.putToColumnFamilyDefinitions("test", cfDef);
+    BlurAnalyzer analyzer = new BlurAnalyzer(analyzerDefinition);
+
     IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, analyzer);
     Directory dir = new RAMDirectory();
     IndexWriter indexWriter = new IndexWriter(dir, conf);
-    Random random = new Random();
-    int max = 17;
-    for (int i = 0; i < 100; i++) {
-      int v = random.nextInt(max);
+    for (int i = 0; i < 1000; i++) {
       Document document = new Document();
-      document.add(new Field("f1", Long.toString(v), Store.YES, Index.ANALYZED_NO_NORMS));
+      String value = Long.toString(i);
+      document.add(new Field("test.test", value, Store.YES, Index.ANALYZED_NO_NORMS));
       FieldConverterUtil.convert(document, analyzer);
       indexWriter.addDocument(document);
     }
     indexWriter.close();
 
     IndexSearcher searcher = new IndexSearcher(IndexReader.open(dir));
-    NumericRangeQuery<Long> query = NumericRangeQuery.newLongRange("f1", 0L, 2L, true,
true);
+    NumericRangeQuery<Long> query = NumericRangeQuery.newLongRange("test.test", 0L,
2L, true, true);
     Query rewrite = searcher.rewrite(query);
     TopDocs docs = searcher.search(rewrite, 100);
     ScoreDoc[] scoreDocs = docs.scoreDocs;
+    assertEquals(3, docs.totalHits);
     for (int i = 0; i < docs.totalHits; i++) {
       Document document = searcher.doc(scoreDocs[i].doc);
-      assertTrue(Integer.parseInt(document.get("f1")) < 3);
+      assertTrue(Integer.parseInt(document.get("test.test")) < 3);
     }
   }
-
 }

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3f29c70d/src/blur-mapred/src/main/java/com/nearinfinity/blur/mapreduce/lib/BlurRecordWriter.java
----------------------------------------------------------------------
diff --git a/src/blur-mapred/src/main/java/com/nearinfinity/blur/mapreduce/lib/BlurRecordWriter.java
b/src/blur-mapred/src/main/java/com/nearinfinity/blur/mapreduce/lib/BlurRecordWriter.java
index da657c8..48912b2 100644
--- a/src/blur-mapred/src/main/java/com/nearinfinity/blur/mapreduce/lib/BlurRecordWriter.java
+++ b/src/blur-mapred/src/main/java/com/nearinfinity/blur/mapreduce/lib/BlurRecordWriter.java
@@ -22,6 +22,8 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.NoLockFactory;
 import org.apache.lucene.util.Version;
 
+import com.nearinfinity.blur.log.Log;
+import com.nearinfinity.blur.log.LogFactory;
 import com.nearinfinity.blur.mapreduce.BlurColumn;
 import com.nearinfinity.blur.mapreduce.BlurRecord;
 import com.nearinfinity.blur.store.hdfs.HdfsDirectory;
@@ -30,6 +32,8 @@ import com.nearinfinity.blur.utils.BlurUtil;
 
 public class BlurRecordWriter extends RecordWriter<Text, BlurRecord> {
 
+  private static Log LOG = LogFactory.getLog(BlurRecordWriter.class);
+
   private Text prevKey = new Text();
   private List<Document> documents = new ArrayList<Document>();
   private IndexWriter writer;
@@ -40,17 +44,18 @@ public class BlurRecordWriter extends RecordWriter<Text, BlurRecord>
{
     int id = context.getTaskAttemptID().getTaskID().getId();
     String shardName = BlurUtil.getShardName(BlurConstants.SHARD_PREFIX, id);
     Path basePath = new Path(outputPath);
-    Path indexPath = new Path(basePath,shardName);
-    
-    //@TODO
+    Path indexPath = new Path(basePath, shardName);
+
+    // @TODO
     Analyzer analyzer = new KeywordAnalyzer();
-    
+
     IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, analyzer);
-    
-    //@TODO setup compressed directory, read compression codec from config, setup progressable
dir, setup lock factory
+
+    // @TODO setup compressed directory, read compression codec from config,
+    // setup progressable dir, setup lock factory
     Directory dir = new HdfsDirectory(indexPath);
     dir.setLockFactory(NoLockFactory.getNoLockFactory());
-    writer = new IndexWriter(dir,conf);
+    writer = new IndexWriter(dir, conf);
   }
 
   @Override
@@ -72,6 +77,7 @@ public class BlurRecordWriter extends RecordWriter<Text, BlurRecord>
{
       document.add(convert(family, column));
     }
     documents.add(document);
+    LOG.error("Needs to use blur analyzer and field converter");
   }
 
   private Field convert(String family, BlurColumn column) {


Mime
View raw message