incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amccu...@apache.org
Subject [01/27] git commit: Fixed BLUR-194
Date Fri, 09 Aug 2013 17:24:06 GMT
Updated Branches:
  refs/heads/0.2.0-newtypesystem 5f77a2b12 -> 2378815f1
  refs/heads/master e110cd739 -> 775bb2154


Fixed BLUR-194


Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/9073d5ad
Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/9073d5ad
Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/9073d5ad

Branch: refs/heads/0.2.0-newtypesystem
Commit: 9073d5ad7c22832763fda598e83b50d6b6de2d46
Parents: 76bf31e
Author: Aaron McCurry <amccurry@gmail.com>
Authored: Tue Aug 6 15:06:16 2013 -0400
Committer: Aaron McCurry <amccurry@gmail.com>
Committed: Tue Aug 6 15:06:16 2013 -0400

----------------------------------------------------------------------
 .../blur/mapreduce/lib/BlurOutputFormat.java    | 27 +++++++++++++-------
 1 file changed, 18 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/9073d5ad/blur-mapred/src/main/java/org/apache/blur/mapreduce/lib/BlurOutputFormat.java
----------------------------------------------------------------------
diff --git a/blur-mapred/src/main/java/org/apache/blur/mapreduce/lib/BlurOutputFormat.java
b/blur-mapred/src/main/java/org/apache/blur/mapreduce/lib/BlurOutputFormat.java
index 39d670b..981fbc9 100644
--- a/blur-mapred/src/main/java/org/apache/blur/mapreduce/lib/BlurOutputFormat.java
+++ b/blur-mapred/src/main/java/org/apache/blur/mapreduce/lib/BlurOutputFormat.java
@@ -20,6 +20,8 @@ import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 import java.util.UUID;
@@ -61,6 +63,7 @@ import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.NoMergePolicy;
 import org.apache.lucene.index.TieredMergePolicy;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
@@ -349,6 +352,7 @@ public class BlurOutputFormat extends OutputFormat<Text, BlurMutate>
{
     private final File _localPath;
     private final int _maxDocumentBufferSize;
     private final IndexWriterConfig _conf;
+    private final IndexWriterConfig _overFlowConf;
     private final Path _newIndex;
     private final boolean _indexLocally;
     private final boolean _optimizeInFlight;
@@ -368,7 +372,6 @@ public class BlurOutputFormat extends OutputFormat<Text, BlurMutate>
{
     private File _localTmpPath;
     private ProgressableDirectory _localTmpDir;
     private String _deletedRowId;
-    
 
     public BlurRecordWriter(Configuration configuration, BlurAnalyzer blurAnalyzer, int attemptId,
String tmpDirName)
         throws IOException {
@@ -394,6 +397,9 @@ public class BlurOutputFormat extends OutputFormat<Text, BlurMutate>
{
       TieredMergePolicy mergePolicy = (TieredMergePolicy) _conf.getMergePolicy();
       mergePolicy.setUseCompoundFile(false);
 
+      _overFlowConf = new IndexWriterConfig(LuceneVersionConstant.LUCENE_VERSION, _analyzer);
+      _overFlowConf.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);
+
       if (_indexLocally) {
         String localDirPath = System.getProperty(JAVA_IO_TMPDIR);
         _localPath = new File(localDirPath, UUID.randomUUID().toString() + ".tmp");
@@ -448,9 +454,6 @@ public class BlurOutputFormat extends OutputFormat<Text, BlurMutate>
{
       }
       _columnCount.increment(record.getColumns().size());
       Document document = TransactionRecorder.convert(blurRecord.getRowId(), record, _analyzer);
-      if (_documents.size() == 0) {
-        document.add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE,
Store.NO));
-      }
       Document dup = _documents.put(recordId, document);
       if (dup != null) {
         _recordDuplicateCount.increment(1);
@@ -476,9 +479,15 @@ public class BlurOutputFormat extends OutputFormat<Text, BlurMutate>
{
         String localDirPath = System.getProperty(JAVA_IO_TMPDIR);
         _localTmpPath = new File(localDirPath, UUID.randomUUID().toString() + ".tmp");
         _localTmpDir = new ProgressableDirectory(FSDirectory.open(_localTmpPath), BlurOutputFormat.getProgressable());
-        _localTmpWriter = new IndexWriter(_localTmpDir, _conf.clone());
+        _localTmpWriter = new IndexWriter(_localTmpDir, _overFlowConf.clone());
+        //The local tmp writer has merging disabled so the first document in is going to
be doc 0.
+        //Therefore the first document added is the prime doc
+        List<Document> docs = new ArrayList<Document>(_documents.values());
+        docs.get(0).add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE,
Store.NO));
+        _localTmpWriter.addDocuments(docs);
+      } else {
+        _localTmpWriter.addDocuments(_documents.values());
       }
-      _localTmpWriter.addDocuments(_documents.values());
       _documents.clear();
     }
 
@@ -517,11 +526,11 @@ public class BlurOutputFormat extends OutputFormat<Text, BlurMutate>
{
           if (_deletedRowId != null) {
             _writer.addDocument(getDeleteDoc());
             _rowDeleteCount.increment(1);
-          } else {
-            LOG.info("This case should never happen, no records to index and no row deletes
to emit.");
           }
         } else {
-          _writer.addDocuments(_documents.values());
+          List<Document> docs = new ArrayList<Document>(_documents.values());
+          docs.get(0).add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE,
Store.NO));
+          _writer.addDocuments(docs);
           _recordRateCounter.mark(_documents.size());
           _documents.clear();
         }


Mime
View raw message