incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amccu...@apache.org
Subject [4/4] git commit: Adding the memory and merge factors to the bulk indexing.
Date Mon, 02 Mar 2015 14:54:52 GMT
Adding the memory and merge factors to the bulk indexing.


Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/da4ad62d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/da4ad62d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/da4ad62d

Branch: refs/heads/master
Commit: da4ad62d6a409af3354dd00d10b1eeea284625c8
Parents: 9b83279
Author: Aaron McCurry <amccurry@gmail.com>
Authored: Mon Mar 2 09:54:41 2015 -0500
Committer: Aaron McCurry <amccurry@gmail.com>
Committed: Mon Mar 2 09:54:41 2015 -0500

----------------------------------------------------------------------
 .../apache/blur/manager/writer/BlurIndexSimpleWriter.java    | 8 ++++++++
 .../src/main/java/org/apache/blur/utils/BlurConstants.java   | 2 ++
 blur-util/src/main/resources/blur-default.properties         | 6 ++++++
 3 files changed, 16 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/da4ad62d/blur-core/src/main/java/org/apache/blur/manager/writer/BlurIndexSimpleWriter.java
----------------------------------------------------------------------
diff --git a/blur-core/src/main/java/org/apache/blur/manager/writer/BlurIndexSimpleWriter.java
b/blur-core/src/main/java/org/apache/blur/manager/writer/BlurIndexSimpleWriter.java
index 975022a..8ed070b 100644
--- a/blur-core/src/main/java/org/apache/blur/manager/writer/BlurIndexSimpleWriter.java
+++ b/blur-core/src/main/java/org/apache/blur/manager/writer/BlurIndexSimpleWriter.java
@@ -19,6 +19,8 @@ package org.apache.blur.manager.writer;
 import static org.apache.blur.lucene.LuceneVersionConstant.LUCENE_VERSION;
 import static org.apache.blur.utils.BlurConstants.ACL_DISCOVER;
 import static org.apache.blur.utils.BlurConstants.ACL_READ;
+import static org.apache.blur.utils.BlurConstants.BLUR_SHARD_INDEX_WRITER_SORT_FACTOR;
+import static org.apache.blur.utils.BlurConstants.BLUR_SHARD_INDEX_WRITER_SORT_MEMORY;
 import static org.apache.blur.utils.BlurConstants.BLUR_SHARD_QUEUE_MAX_INMEMORY_LENGTH;
 
 import java.io.Closeable;
@@ -683,7 +685,13 @@ public class BlurIndexSimpleWriter extends BlurIndex {
         public void performMutate(IndexSearcherCloseable searcher, IndexWriter writer) throws
IOException {
           Configuration configuration = _tableContext.getConfiguration();
 
+          BlurConfiguration blurConfiguration = _tableContext.getBlurConfiguration();
+
           SequenceFile.Sorter sorter = new Sorter(_fileSystem, Text.class, RowMutationWritable.class,
configuration);
+          // This should support up to ~100 GB per shard, probably have
+          // incremental updates in that batch size.
+          sorter.setFactor(blurConfiguration.getInt(BLUR_SHARD_INDEX_WRITER_SORT_FACTOR,
10000));
+          sorter.setMemory(blurConfiguration.getInt(BLUR_SHARD_INDEX_WRITER_SORT_MEMORY,
10 * 1024 * 1024));
 
           _unsortedPaths = getUnsortedFiles();
 

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/da4ad62d/blur-util/src/main/java/org/apache/blur/utils/BlurConstants.java
----------------------------------------------------------------------
diff --git a/blur-util/src/main/java/org/apache/blur/utils/BlurConstants.java b/blur-util/src/main/java/org/apache/blur/utils/BlurConstants.java
index 0cae234..0c4f8ec 100644
--- a/blur-util/src/main/java/org/apache/blur/utils/BlurConstants.java
+++ b/blur-util/src/main/java/org/apache/blur/utils/BlurConstants.java
@@ -76,6 +76,8 @@ public class BlurConstants {
   public static final String BLUR_MAX_RECORDS_PER_ROW_FETCH_REQUEST = "blur.max.records.per.row.fetch.request";
   public static final String BLUR_SHARD_READ_INTERCEPTOR = "blur.shard.read.interceptor";
   public static final String BLUR_SHARD_INTERNAL_SEARCH_THREAD_COUNT = "blur.shard.internal.search.thread.count";
+  public static final String BLUR_SHARD_INDEX_WRITER_SORT_MEMORY = "blur.shard.index.writer.sort.memory";
+  public static final String BLUR_SHARD_INDEX_WRITER_SORT_FACTOR = "blur.shard.index.writer.sort.factor";
 
   public static final String BLUR_SHARD_SERVER_THRIFT_THREAD_COUNT = "blur.shard.server.thrift.thread.count";
   public static final String BLUR_SHARD_CACHE_MAX_TIMETOLIVE = "blur.shard.cache.max.timetolive";

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/da4ad62d/blur-util/src/main/resources/blur-default.properties
----------------------------------------------------------------------
diff --git a/blur-util/src/main/resources/blur-default.properties b/blur-util/src/main/resources/blur-default.properties
index 82ac9a1..5b2c599 100644
--- a/blur-util/src/main/resources/blur-default.properties
+++ b/blur-util/src/main/resources/blur-default.properties
@@ -195,6 +195,12 @@ blur.shard.merge.thread.count=8
 # The threshold for a small merge in bytes. 
 blur.shard.small.merge.threshold=128000000
 
+# The amount of memory used to sort bulk indexing.
+blur.shard.index.writer.sort.memory=10485760
+
+# The merge factor sort bulk indexing.
+blur.shard.index.writer.sort.factor=10000
+
 # The maximum number of clauses in a BooleanQuery
 blur.max.clause.count=1024
 


Mime
View raw message