incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amccu...@apache.org
Subject git commit: Updating field manager to be lucene based instead of Blur Row Record based.
Date Tue, 09 Sep 2014 14:13:24 GMT
Repository: incubator-blur
Updated Branches:
  refs/heads/FieldManagerUpdates [created] 50066bef2


Updating field manager to be lucene based instead of Blur Row Record based.


Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/50066bef
Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/50066bef
Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/50066bef

Branch: refs/heads/FieldManagerUpdates
Commit: 50066bef2c4ed7b7f35403ae1c71dfa4bfc32325
Parents: 3e3ed54
Author: Aaron McCurry <amccurry@gmail.com>
Authored: Tue Sep 9 10:13:13 2014 -0400
Committer: Aaron McCurry <amccurry@gmail.com>
Committed: Tue Sep 9 10:13:13 2014 -0400

----------------------------------------------------------------------
 .../mapreduce/lib/v2/DirectIndexingDriver.java  | 93 ++++++++++++++++++--
 .../mapreduce/lib/v2/LuceneKeyWritable.java     | 22 ++++-
 .../lib/v2/DirectIndexingDriverTest.java        | 13 +--
 .../apache/blur/analysis/BaseFieldManager.java  | 31 +++++--
 4 files changed, 142 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50066bef/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriver.java
----------------------------------------------------------------------
diff --git a/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriver.java
b/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriver.java
index bd56003..6cde1c1 100644
--- a/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriver.java
+++ b/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriver.java
@@ -17,21 +17,30 @@
 package org.apache.blur.mapreduce.lib.v2;
 
 import java.io.IOException;
+import java.util.List;
 
 import org.apache.blur.analysis.FieldManager;
+import org.apache.blur.mapreduce.lib.BlurOutputFormat;
+import org.apache.blur.server.TableContext;
+import org.apache.blur.thrift.generated.TableDescriptor;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.tools.DFSAdmin;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Mapper.Context;
+import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.util.BytesRef;
 
 public class DirectIndexingDriver implements Tool {
 
@@ -39,17 +48,69 @@ public class DirectIndexingDriver implements Tool {
       Mapper<IntWritable, DocumentWritable, LuceneKeyWritable, NullWritable> {
 
     private FieldManager _fieldManager;
+    private TableContext _tableContext;
 
     @Override
-    protected void setup(Context context) throws IOException, InterruptedException {
+    public void run(Context context) throws IOException, InterruptedException {
+      try {
+        super.run(context);
+      } catch (Throwable t) {
+        t.printStackTrace();
+        throw new IOException(t);
+      }
+    }
 
+    @Override
+    protected void setup(Context context) throws IOException, InterruptedException {
+      TableDescriptor tableDescriptor = BlurOutputFormat.getTableDescriptor(context.getConfiguration());
+      _tableContext = TableContext.create(tableDescriptor);
+      _fieldManager = _tableContext.getFieldManager();
     }
 
     @Override
     protected void map(IntWritable key, DocumentWritable value, Context context) throws IOException,
         InterruptedException {
+      System.out.println(key);
+      int shardId = getShardId();
       int documentId = key.get();
+      List<IndexableField> document = value.getDocument();
+      for (IndexableField field : document) {
+        writeField(shardId, documentId, field, context);
+      }
+    }
+
+    private void writeField(int shardId, int documentId, IndexableField field, Context context)
throws IOException,
+        InterruptedException {
+      int fieldId = _fieldManager.getFieldId(field.name());
+      LUCENE_FIELD_TYPE type = LUCENE_FIELD_TYPE.lookupByClass(field.getClass());
+      switch (type) {
+      case StringField:
+        writeStringField(shardId, documentId, fieldId, (StringField) field, context);
+        break;
+      default:
+        throw new IOException("Type [" + type + "] not supported.");
+      }
+    }
+
+    private void writeStringField(int shardId, int documentId, int fieldId, StringField field,
Context context)
+        throws IOException, InterruptedException {
+      String value = field.stringValue();
+      context.write(new LuceneKeyWritable(shardId, fieldId, new BytesRef(value), documentId),
NullWritable.get());
+    }
+
+    private int getShardId() {
+      // TODO Get Shard Id
+      return 0;
+    }
+  }
+
+  public static class DirectIndexingReducer extends
+      Reducer<LuceneKeyWritable, NullWritable, NullWritable, NullWritable> {
 
+    @Override
+    protected void reduce(LuceneKeyWritable key, Iterable<NullWritable> values, Context
context) throws IOException,
+        InterruptedException {
+      System.out.println(key);
     }
 
   }
@@ -68,19 +129,41 @@ public class DirectIndexingDriver implements Tool {
 
   @Override
   public int run(String[] args) throws Exception {
-
+    Configuration configuration = getConf();
     String in = args[0];
+    String out = args[1];
+
+    Path outputPath = new Path(out);
+    FileSystem fileSystem = outputPath.getFileSystem(configuration);
+    outputPath = fileSystem.makeQualified(outputPath);
 
-    Job job = new Job(getConf(), "Lucene Direct Indexing");
+    TableDescriptor tableDescriptor = new TableDescriptor();
+    tableDescriptor.setTableUri(outputPath.toString());
+    tableDescriptor.setName("test");
+    tableDescriptor.setShardCount(1);
+
+    BlurOutputFormat.setTableDescriptor(configuration, tableDescriptor);
+
+    Job job = new Job(configuration, "Lucene Direct Indexing");
     job.setJarByClass(DirectIndexingDriver.class);
     job.setMapperClass(DirectIndexingMapper.class);
+    job.setReducerClass(DirectIndexingReducer.class);
+
     job.setInputFormatClass(SequenceFileInputFormat.class);
 
     job.setOutputFormatClass(NullOutputFormat.class);
     job.setOutputKeyClass(LuceneKeyWritable.class);
     job.setOutputValueClass(NullWritable.class);
 
-    FileInputFormat.addInputPath(job, new Path(in));
+    job.setNumReduceTasks(1);
+
+    Path inputPath = new Path(in);
+
+    FileInputFormat.addInputPath(job, inputPath);
+    FileStatus[] listStatus = fileSystem.listStatus(inputPath);
+    for (FileStatus status : listStatus) {
+      System.out.println(status.getPath());
+    }
 
     if (!job.waitForCompletion(true)) {
       return 1;

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50066bef/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/LuceneKeyWritable.java
----------------------------------------------------------------------
diff --git a/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/LuceneKeyWritable.java
b/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/LuceneKeyWritable.java
index bca3c4f..f48b83f 100644
--- a/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/LuceneKeyWritable.java
+++ b/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/LuceneKeyWritable.java
@@ -63,11 +63,29 @@ public class LuceneKeyWritable implements WritableComparable<LuceneKeyWritable>
 
   }
 
-  public LuceneKeyWritable(int shardId, int fieldId, BytesRef text, Type type, int documentId,
int position) {
+  public LuceneKeyWritable(int shardId, int fieldId, BytesRef text) {
     _shardId = shardId;
     _fieldId = fieldId;
     _text = text;
-    _type = type;
+    _type = Type.SHARD_FIELD_TEXT;
+    _documentId = -1;
+    _position = -1;
+  }
+
+  public LuceneKeyWritable(int shardId, int fieldId, BytesRef text, int documentId) {
+    _shardId = shardId;
+    _fieldId = fieldId;
+    _text = text;
+    _type = Type.SHARD_FIELD_TEXT_DOCUMENTID;
+    _documentId = documentId;
+    _position = -1;
+  }
+
+  public LuceneKeyWritable(int shardId, int fieldId, BytesRef text, int documentId, int position)
{
+    _shardId = shardId;
+    _fieldId = fieldId;
+    _text = text;
+    _type = Type.SHARD_FIELD_TEXT_DOCUMENTID_POSITION;
     _documentId = documentId;
     _position = position;
   }

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50066bef/blur-mapred-hadoop1/src/test/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriverTest.java
----------------------------------------------------------------------
diff --git a/blur-mapred-hadoop1/src/test/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriverTest.java
b/blur-mapred-hadoop1/src/test/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriverTest.java
index d8f9a68..04e2f92 100644
--- a/blur-mapred-hadoop1/src/test/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriverTest.java
+++ b/blur-mapred-hadoop1/src/test/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriverTest.java
@@ -16,6 +16,8 @@
  */
 package org.apache.blur.mapreduce.lib.v2;
 
+import static org.junit.Assert.*;
+
 import java.io.IOException;
 import java.util.Random;
 
@@ -31,15 +33,16 @@ import org.junit.Test;
 
 public class DirectIndexingDriverTest {
 
-//  @Test
+  @Test
   public void testIndexing() throws Exception {
     Configuration configuration = new Configuration();
-    Path path = new Path("./tmp/test_DirectIndexingDriverTest_input/");
-    FileSystem fileSystem = path.getFileSystem(configuration);
-    createInputDocument(fileSystem, configuration, path);
+    Path inputPath = new Path("./tmp/test_DirectIndexingDriverTest_input/");
+    Path outputPath = new Path("./tmp/test_DirectIndexingDriverTest_output/");
+    FileSystem fileSystem = inputPath.getFileSystem(configuration);
+    createInputDocument(fileSystem, configuration, inputPath);
     DirectIndexingDriver directIndexingDriver = new DirectIndexingDriver();
     directIndexingDriver.setConf(configuration);
-    directIndexingDriver.run(new String[] { path.toString() });
+    assertEquals(0, directIndexingDriver.run(new String[] { inputPath.toString(), outputPath.toString()
}));
   }
 
   public static void createInputDocument(FileSystem fileSystem, Configuration configuration,
Path path)

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/50066bef/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java
----------------------------------------------------------------------
diff --git a/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java b/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java
index bc4c486..cc527f8 100644
--- a/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java
+++ b/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java
@@ -361,11 +361,32 @@ public abstract class BaseFieldManager extends FieldManager {
     } else {
       fieldName = baseFieldName;
     }
-    return addFieldTypeDefinition(family, columnName, subColumnName, fieldName, fieldLessIndexed,
fieldType, sortable,
+    return addFieldTypeDefinition(baseFieldName, subColumnName, fieldName, fieldLessIndexed,
fieldType, sortable,
+        props);
+  }
+  
+  public boolean addColumnDefinition(String baseFieldName, String subFieldName, boolean fieldLessIndexed,
+      String fieldType, boolean sortable, Map<String, String> props) throws IOException
{
+    String fieldName;
+    if (subFieldName != null) {
+      FieldTypeDefinition primeFieldTypeDefinition = getFieldTypeDefinition(baseFieldName);
+      if (primeFieldTypeDefinition == null) {
+        throw new IllegalArgumentException("Base field of [" + baseFieldName
+            + "] not found, please add base before adding sub field.");
+      }
+      if (fieldLessIndexed) {
+        throw new IllegalArgumentException("Sub field of [" + subFieldName + "] from base
of [" + baseFieldName
+            + "] cannot be added with fieldLessIndexing set to true.");
+      }
+      fieldName = baseFieldName + "." + subFieldName;
+    } else {
+      fieldName = baseFieldName;
+    }
+    return addFieldTypeDefinition(baseFieldName, subFieldName, fieldName, fieldLessIndexed,
fieldType, sortable,
         props);
   }
 
-  private boolean addFieldTypeDefinition(String family, String columnName, String subColumnName,
String fieldName,
+  private boolean addFieldTypeDefinition(String baseFieldName, String subFieldName, String
fieldName,
       boolean fieldLessIndexed, String fieldType, boolean sortable, Map<String, String>
props) throws IOException {
     FieldTypeDefinition fieldTypeDefinition = getFieldTypeDefinition(fieldName);
     if (fieldTypeDefinition != null) {
@@ -380,7 +401,7 @@ public abstract class BaseFieldManager extends FieldManager {
               + "], this field type definition cannot be added.");
         }
       }
-      setFields(fieldTypeDefinition, family, columnName, subColumnName, fieldLessIndexed,
fieldType, props);
+      setFields(fieldTypeDefinition, baseFieldName, subFieldName, fieldLessIndexed, fieldType,
props);
       if (!tryToStore(fieldTypeDefinition, fieldName)) {
         return false;
       }
@@ -389,8 +410,8 @@ public abstract class BaseFieldManager extends FieldManager {
     return true;
   }
 
-  private void setFields(FieldTypeDefinition fieldTypeDefinition, String family, String columnName,
-      String subColumnName, boolean fieldLessIndexed, String fieldType, Map<String, String>
props) {
+  private void setFields(FieldTypeDefinition fieldTypeDefinition, String baseFieldName,
+      String subFieldName, boolean fieldLessIndexed, String fieldType, Map<String, String>
props) {
     fieldTypeDefinition.setFamily(family);
     fieldTypeDefinition.setColumnName(columnName);
     fieldTypeDefinition.setSubColumnName(subColumnName);


Mime
View raw message