incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amccu...@apache.org
Subject git commit: Starting new indexing process.
Date Tue, 09 Sep 2014 13:07:25 GMT
Repository: incubator-blur
Updated Branches:
  refs/heads/master 77a4f8499 -> 3e3ed548b


Starting new indexing process.


Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/3e3ed548
Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/3e3ed548
Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/3e3ed548

Branch: refs/heads/master
Commit: 3e3ed548bb273377e2fe1db4c967b26477235244
Parents: 77a4f84
Author: Aaron McCurry <amccurry@gmail.com>
Authored: Tue Sep 9 09:06:33 2014 -0400
Committer: Aaron McCurry <amccurry@gmail.com>
Committed: Tue Sep 9 09:06:33 2014 -0400

----------------------------------------------------------------------
 .../mapreduce/lib/v2/DirectIndexingDriver.java  |  95 ++++++++++
 .../blur/mapreduce/lib/v2/DocumentWritable.java | 141 ++++++++++++++
 .../mapreduce/lib/v2/LUCENE_FIELD_TYPE.java     |  94 ++++++++++
 .../mapreduce/lib/v2/LuceneKeyWritable.java     | 182 +++++++++++++++++++
 .../v2/DirectIndexingDriverMiniClusterTest.java |  99 ++++++++++
 .../lib/v2/DirectIndexingDriverTest.java        |  68 +++++++
 .../apache/blur/analysis/BaseFieldManager.java  |   9 +
 .../org/apache/blur/analysis/FieldManager.java  |   5 +
 8 files changed, 693 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3e3ed548/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriver.java
----------------------------------------------------------------------
diff --git a/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriver.java
b/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriver.java
new file mode 100644
index 0000000..bd56003
--- /dev/null
+++ b/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriver.java
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.blur.mapreduce.lib.v2;
+
+import java.io.IOException;
+
+import org.apache.blur.analysis.FieldManager;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.tools.DFSAdmin;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Mapper.Context;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+public class DirectIndexingDriver implements Tool {
+
+  public static class DirectIndexingMapper extends
+      Mapper<IntWritable, DocumentWritable, LuceneKeyWritable, NullWritable> {
+
+    private FieldManager _fieldManager;
+
+    @Override
+    protected void setup(Context context) throws IOException, InterruptedException {
+
+    }
+
+    @Override
+    protected void map(IntWritable key, DocumentWritable value, Context context) throws IOException,
+        InterruptedException {
+      int documentId = key.get();
+
+    }
+
+  }
+
+  private Configuration _conf;
+
+  @Override
+  public Configuration getConf() {
+    return _conf;
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    _conf = conf;
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+
+    String in = args[0];
+
+    Job job = new Job(getConf(), "Lucene Direct Indexing");
+    job.setJarByClass(DirectIndexingDriver.class);
+    job.setMapperClass(DirectIndexingMapper.class);
+    job.setInputFormatClass(SequenceFileInputFormat.class);
+
+    job.setOutputFormatClass(NullOutputFormat.class);
+    job.setOutputKeyClass(LuceneKeyWritable.class);
+    job.setOutputValueClass(NullWritable.class);
+
+    FileInputFormat.addInputPath(job, new Path(in));
+
+    if (!job.waitForCompletion(true)) {
+      return 1;
+    }
+    return 0;
+  }
+
+  public static void main(String[] args) throws Exception {
+    System.exit(ToolRunner.run(new DFSAdmin(), args));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3e3ed548/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/DocumentWritable.java
----------------------------------------------------------------------
diff --git a/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/DocumentWritable.java
b/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/DocumentWritable.java
new file mode 100644
index 0000000..8a88d8a
--- /dev/null
+++ b/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/DocumentWritable.java
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.blur.mapreduce.lib.v2;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexableField;
+
+public class DocumentWritable implements Writable {
+
+  private static final String UTF_8 = "UTF-8";
+  private List<IndexableField> _document = new ArrayList<IndexableField>();
+
+  public DocumentWritable() {
+
+  }
+
+  public DocumentWritable(List<IndexableField> document) {
+    _document = document;
+  }
+
+  public void clear() {
+    _document.clear();
+  }
+
+  public void add(IndexableField field) {
+    _document.add(field);
+  }
+
+  public DocumentWritable(Document document) {
+    _document = document.getFields();
+  }
+
+  public List<IndexableField> getDocument() {
+    return _document;
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    int numberOfFields = _document.size();
+    WritableUtils.writeVInt(out, numberOfFields);
+    for (int i = 0; i < numberOfFields; i++) {
+      IndexableField field = _document.get(i);
+      write(out, field);
+    }
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    _document.clear();
+    int numberOfFields = WritableUtils.readVInt(in);
+    for (int i = 0; i < numberOfFields; i++) {
+      IndexableField field = readField(in);
+      _document.add(field);
+    }
+  }
+
+  private IndexableField readField(DataInput in) throws IOException {
+    LUCENE_FIELD_TYPE type = LUCENE_FIELD_TYPE.lookupByValue(in.readByte());
+    String name = readString(in);
+    switch (type) {
+    case StringField:
+      return readStringField(in, name);
+    default:
+      throw new IOException("Type [" + type + "] not supported.");
+    }
+  }
+
+  private void write(DataOutput out, IndexableField field) throws IOException {
+    LUCENE_FIELD_TYPE type = LUCENE_FIELD_TYPE.lookupByClass(field.getClass());
+    out.writeByte(type.value());
+    writeString(out, field.name());
+    switch (type) {
+    case StringField:
+      writeStringField(out, (StringField) field);
+      return;
+    default:
+      throw new IOException("Type [" + type + "] not supported.");
+    }
+  }
+
+  private void writeStringField(DataOutput out, StringField stringField) throws IOException
{
+    FieldType fieldType = stringField.fieldType();
+    if (fieldType.equals(StringField.TYPE_STORED)) {
+      out.writeBoolean(true);
+    } else if (fieldType.equals(StringField.TYPE_NOT_STORED)) {
+      out.writeBoolean(false);
+    } else {
+      throw new IOException("Non default FieldTypes for StringField not supported.");
+    }
+    writeString(out, stringField.stringValue());
+  }
+
+  private IndexableField readStringField(DataInput in, String name) throws IOException {
+    boolean stored = in.readBoolean();
+    String value = readString(in);
+    if (stored) {
+      return new StringField(name, value, Store.YES);
+    } else {
+      return new StringField(name, value, Store.NO);
+    }
+  }
+
+  private String readString(DataInput in) throws IOException {
+    int length = WritableUtils.readVInt(in);
+    byte[] buf = new byte[length];
+    in.readFully(buf);
+    return new String(buf, UTF_8);
+  }
+
+  private void writeString(DataOutput out, String value) throws IOException {
+    byte[] bs = value.getBytes(UTF_8);
+    WritableUtils.writeVInt(out, bs.length);
+    out.write(bs);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3e3ed548/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/LUCENE_FIELD_TYPE.java
----------------------------------------------------------------------
diff --git a/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/LUCENE_FIELD_TYPE.java
b/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/LUCENE_FIELD_TYPE.java
new file mode 100644
index 0000000..373f4ac
--- /dev/null
+++ b/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/LUCENE_FIELD_TYPE.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.blur.mapreduce.lib.v2;
+
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.lucene.index.IndexableField;
+
+public enum LUCENE_FIELD_TYPE {
+
+  BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField.class, (byte) 0),
+
+  DoubleField(org.apache.lucene.document.DoubleField.class, (byte) 1),
+
+  FloatField(org.apache.lucene.document.FloatField.class, (byte) 2),
+
+  IntField(org.apache.lucene.document.IntField.class, (byte) 3),
+
+  LongField(org.apache.lucene.document.LongField.class, (byte) 4),
+
+  NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField.class, (byte) 5),
+
+  DoubleDocValuesField(org.apache.lucene.document.DoubleDocValuesField.class, (byte) 6),
+
+  FloatDocValuesField(org.apache.lucene.document.FloatDocValuesField.class, (byte) 7),
+
+  SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField.class, (byte) 8),
+
+  SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField.class, (byte)
9),
+
+  StoredField(org.apache.lucene.document.StoredField.class, (byte) 10),
+
+  StringField(org.apache.lucene.document.StringField.class, (byte) 11),
+
+  TextField(org.apache.lucene.document.TextField.class, (byte) 12);
+
+  private final byte _value;
+  private final Class<? extends IndexableField> _clazz;
+
+  private LUCENE_FIELD_TYPE(Class<? extends IndexableField> clazz, byte b) {
+    _value = b;
+    _clazz = clazz;
+  }
+
+  public Class<? extends IndexableField> fieldClass() {
+    return _clazz;
+  }
+
+  public byte value() {
+    return _value;
+  }
+
+  public static LUCENE_FIELD_TYPE lookupByValue(byte value) {
+    LUCENE_FIELD_TYPE type = _lookupByValue.get(value);
+    if (type == null) {
+      throw new RuntimeException("Type for [" + value + "] not found.");
+    }
+    return type;
+  }
+
+  public static LUCENE_FIELD_TYPE lookupByClass(Class<? extends IndexableField> value)
{
+    LUCENE_FIELD_TYPE type = _lookupByClass.get(value);
+    if (type == null) {
+      throw new RuntimeException("Type for [" + value + "] not found.");
+    }
+    return type;
+  }
+
+  private final static Map<Class<? extends IndexableField>, LUCENE_FIELD_TYPE>
_lookupByClass = new ConcurrentHashMap<Class<? extends IndexableField>, LUCENE_FIELD_TYPE>();
+  private final static Map<Byte, LUCENE_FIELD_TYPE> _lookupByValue = new ConcurrentHashMap<Byte,
LUCENE_FIELD_TYPE>();
+
+  static {
+    LUCENE_FIELD_TYPE[] values = LUCENE_FIELD_TYPE.values();
+    for (LUCENE_FIELD_TYPE type : values) {
+      _lookupByClass.put(type.fieldClass(), type);
+      _lookupByValue.put(type.value(), type);
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3e3ed548/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/LuceneKeyWritable.java
----------------------------------------------------------------------
diff --git a/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/LuceneKeyWritable.java
b/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/LuceneKeyWritable.java
new file mode 100644
index 0000000..bca3c4f
--- /dev/null
+++ b/blur-mapred-hadoop1/src/main/java/org/apache/blur/mapreduce/lib/v2/LuceneKeyWritable.java
@@ -0,0 +1,182 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.blur.mapreduce.lib.v2;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.lucene.util.BytesRef;
+
+public class LuceneKeyWritable implements WritableComparable<LuceneKeyWritable> {
+
+  public static enum Type {
+    SHARD_FIELD_TEXT((byte) 0), SHARD_FIELD_TEXT_DOCUMENTID((byte) 1), SHARD_FIELD_TEXT_DOCUMENTID_POSITION((byte)
2);
+
+    private final byte _value;
+
+    private Type(byte value) {
+      _value = value;
+    }
+
+    public byte value() {
+      return _value;
+    }
+
+    public static Type lookup(byte value) {
+      switch (value) {
+      case 0:
+        return SHARD_FIELD_TEXT;
+      case 1:
+        return SHARD_FIELD_TEXT_DOCUMENTID;
+      case 2:
+        return SHARD_FIELD_TEXT_DOCUMENTID_POSITION;
+      default:
+        throw new RuntimeException("Value [" + value + "] not found.");
+      }
+    }
+  }
+
+  private int _shardId;
+  private int _fieldId;
+  private BytesRef _text = new BytesRef();
+  private Type _type;
+  private int _documentId;
+  private int _position;
+
+  public LuceneKeyWritable() {
+
+  }
+
+  public LuceneKeyWritable(int shardId, int fieldId, BytesRef text, Type type, int documentId,
int position) {
+    _shardId = shardId;
+    _fieldId = fieldId;
+    _text = text;
+    _type = type;
+    _documentId = documentId;
+    _position = position;
+  }
+
+  public int getShardId() {
+    return _shardId;
+  }
+
+  public int getFieldId() {
+    return _fieldId;
+  }
+
+  public BytesRef getText() {
+    return _text;
+  }
+
+  public Type getType() {
+    return _type;
+  }
+
+  public int getDocumentId() {
+    return _documentId;
+  }
+
+  public int getPosition() {
+    return _position;
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    _shardId = in.readInt();
+    _fieldId = in.readInt();
+    read(in, _text);
+    _type = Type.lookup(in.readByte());
+    switch (_type) {
+    case SHARD_FIELD_TEXT:
+      return;
+    case SHARD_FIELD_TEXT_DOCUMENTID:
+      _documentId = in.readInt();
+      return;
+    case SHARD_FIELD_TEXT_DOCUMENTID_POSITION:
+      _documentId = in.readInt();
+      _position = in.readInt();
+      return;
+    default:
+      throw new IOException("Type [" + _type + "] not supported.");
+    }
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(_shardId);
+    out.writeInt(_fieldId);
+    write(out, _text);
+    out.writeByte(_type.value());
+    switch (_type) {
+    case SHARD_FIELD_TEXT:
+      return;
+    case SHARD_FIELD_TEXT_DOCUMENTID:
+      out.writeInt(_documentId);
+      return;
+    case SHARD_FIELD_TEXT_DOCUMENTID_POSITION:
+      out.writeInt(_documentId);
+      out.writeInt(_position);
+      return;
+    default:
+      throw new IOException("Type [" + _type + "] not supported.");
+    }
+  }
+
+  private void write(DataOutput out, BytesRef ref) throws IOException {
+    out.writeInt(ref.length);
+    out.write(ref.bytes, ref.offset, ref.length);
+  }
+
+  private void read(DataInput in, BytesRef ref) throws IOException {
+    int len = in.readInt();
+    if (ref.bytes.length < len) {
+      ref.grow(len);
+    }
+    in.readFully(ref.bytes, 0, len);
+    ref.offset = 0;
+    ref.length = len;
+  }
+
+  @Override
+  public int compareTo(LuceneKeyWritable o) {
+    int compareTo = _shardId - o._shardId;
+    if (compareTo == 0) {
+      compareTo = _fieldId - o._fieldId;
+      if (compareTo == 0) {
+        compareTo = _text.compareTo(o._text);
+        switch (_type) {
+        case SHARD_FIELD_TEXT:
+          return compareTo;
+        case SHARD_FIELD_TEXT_DOCUMENTID:
+          return _documentId - o._documentId;
+        case SHARD_FIELD_TEXT_DOCUMENTID_POSITION: {
+          compareTo = _documentId - o._documentId;
+          if (compareTo == 0) {
+            return _position - o._position;
+          }
+          return compareTo;
+        }
+        default:
+          throw new RuntimeException("Type [" + _type + "] not supported.");
+        }
+      }
+    }
+    return compareTo;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3e3ed548/blur-mapred-hadoop1/src/test/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriverMiniClusterTest.java
----------------------------------------------------------------------
diff --git a/blur-mapred-hadoop1/src/test/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriverMiniClusterTest.java
b/blur-mapred-hadoop1/src/test/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriverMiniClusterTest.java
new file mode 100644
index 0000000..2adfbfb
--- /dev/null
+++ b/blur-mapred-hadoop1/src/test/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriverMiniClusterTest.java
@@ -0,0 +1,99 @@
+package org.apache.blur.mapreduce.lib.v2;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.blur.server.TableContext;
+import org.apache.blur.store.buffer.BufferStore;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class DirectIndexingDriverMiniClusterTest {
+
+  private static Configuration conf = new Configuration();
+  private static FileSystem localFs;
+  private static MiniMRCluster mr;
+  private static final Path TEST_ROOT_DIR = new Path("./target/tmp/DirectIndexingDriverTest_tmp");
+  private static JobConf jobConf;
+  private static final Path outDir = new Path(TEST_ROOT_DIR + "/out");
+  private static final Path inDir = new Path(TEST_ROOT_DIR + "/in");
+
+  @BeforeClass
+  public static void setupTest() throws Exception {
+    System.setProperty("test.build.data", "./target/DirectIndexingDriverTest/data");
+    System.setProperty("hadoop.log.dir", "./target/DirectIndexingDriverTest/hadoop_log");
+    try {
+      localFs = FileSystem.getLocal(conf);
+    } catch (IOException io) {
+      throw new RuntimeException("problem getting local fs", io);
+    }
+    mr = new MiniMRCluster(1, "file:///", 1);
+    jobConf = mr.createJobConf();
+    BufferStore.initNewBuffer(128, 128 * 128);
+  }
+
+  @AfterClass
+  public static void teardown() {
+    if (mr != null) {
+      mr.shutdown();
+    }
+    rm(new File("build"));
+  }
+
+  private static void rm(File file) {
+    if (!file.exists()) {
+      return;
+    }
+    if (file.isDirectory()) {
+      for (File f : file.listFiles()) {
+        rm(f);
+      }
+    }
+    file.delete();
+  }
+
+  @Before
+  public void setup() throws IllegalArgumentException, IOException {
+    TableContext.clear();
+    if (localFs.exists(inDir)) {
+      assertTrue(localFs.delete(inDir, true));
+    }
+    if (localFs.exists(outDir)) {
+      assertTrue(localFs.delete(outDir, true));
+    }
+  }
+
+  @Test
+  public void testBlurOutputFormat() throws Exception {
+//    DirectIndexingDriverTest.createInputDocument(localFs, jobConf, inDir);
+//    DirectIndexingDriver driver = new DirectIndexingDriver();
+//    driver.setConf(jobConf);
+//    driver.run(new String[] { inDir.toString(), outDir.toString() });
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3e3ed548/blur-mapred-hadoop1/src/test/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriverTest.java
----------------------------------------------------------------------
diff --git a/blur-mapred-hadoop1/src/test/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriverTest.java
b/blur-mapred-hadoop1/src/test/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriverTest.java
new file mode 100644
index 0000000..d8f9a68
--- /dev/null
+++ b/blur-mapred-hadoop1/src/test/java/org/apache/blur/mapreduce/lib/v2/DirectIndexingDriverTest.java
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.blur.mapreduce.lib.v2;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.Writer;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.Field.Store;
+import org.junit.Test;
+
+public class DirectIndexingDriverTest {
+
+//  @Test
+  public void testIndexing() throws Exception {
+    Configuration configuration = new Configuration();
+    Path path = new Path("./tmp/test_DirectIndexingDriverTest_input/");
+    FileSystem fileSystem = path.getFileSystem(configuration);
+    createInputDocument(fileSystem, configuration, path);
+    DirectIndexingDriver directIndexingDriver = new DirectIndexingDriver();
+    directIndexingDriver.setConf(configuration);
+    directIndexingDriver.run(new String[] { path.toString() });
+  }
+
+  public static void createInputDocument(FileSystem fileSystem, Configuration configuration,
Path path)
+      throws IOException {
+    Writer writer = SequenceFile.createWriter(fileSystem, configuration, new Path(path, "data"),
IntWritable.class,
+        DocumentWritable.class);
+    IntWritable docId = new IntWritable();
+    DocumentWritable documentWritable = new DocumentWritable();
+    int numberOfFields = 10;
+    Random random = new Random();
+    for (int i = 0; i < 100; i++) {
+      docId.set(i);
+      documentWritable.clear();
+      populate(numberOfFields, random, documentWritable);
+      writer.append(docId, documentWritable);
+    }
+    writer.close();
+  }
+
+  public static void populate(int numberOfFields, Random random, DocumentWritable documentWritable)
{
+    for (int i = 0; i < numberOfFields; i++) {
+      long l = random.nextLong();
+      documentWritable.add(new StringField("f" + i, Long.toString(l), Store.YES));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3e3ed548/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java
----------------------------------------------------------------------
diff --git a/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java b/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java
index 1316161..bc4c486 100644
--- a/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java
+++ b/blur-query/src/main/java/org/apache/blur/analysis/BaseFieldManager.java
@@ -56,6 +56,7 @@ import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SortField;
 
@@ -248,6 +249,14 @@ public abstract class BaseFieldManager extends FieldManager {
     return fields;
   }
 
+  public List<IndexableField> getIndexableFields(String fieldname, String fieldValue)
throws IOException {
+    throw new RuntimeException("Not implemented.");
+  }
+
+  public int getFieldId(String fieldName) throws IOException {
+    throw new RuntimeException("Not implemented.");
+  }
+
   private void addFieldExistance(List<Field> fields, Record record) {
     String family = record.getFamily();
     if (family == null) {

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/3e3ed548/blur-query/src/main/java/org/apache/blur/analysis/FieldManager.java
----------------------------------------------------------------------
diff --git a/blur-query/src/main/java/org/apache/blur/analysis/FieldManager.java b/blur-query/src/main/java/org/apache/blur/analysis/FieldManager.java
index bb17ca2..217d0cf 100644
--- a/blur-query/src/main/java/org/apache/blur/analysis/FieldManager.java
+++ b/blur-query/src/main/java/org/apache/blur/analysis/FieldManager.java
@@ -24,6 +24,7 @@ import java.util.Set;
 import org.apache.blur.thrift.generated.Record;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SortField;
 
@@ -39,6 +40,10 @@ public abstract class FieldManager {
    */
   public abstract List<Field> getFields(String rowId, Record record) throws IOException;
 
+  public abstract List<IndexableField> getIndexableFields(String fieldname, String
fieldValue) throws IOException;
+  
+  public abstract int getFieldId(String fieldName) throws IOException;
+
   /**
    * Adds a column definition.
    * 


Mime
View raw message