incubator-allura-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From brond...@apache.org
Subject git commit: [#6346] Add a 'max items in solr index batch' option
Date Thu, 13 Jun 2013 21:03:32 GMT
Updated Branches:
  refs/heads/master 3be1c6c9b -> 1a49d067b


[#6346] Add a 'max items in solr index batch' option

Signed-off-by: Tim Van Steenburgh <tvansteenburgh@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/1a49d067
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/1a49d067
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/1a49d067

Branch: refs/heads/master
Commit: 1a49d067b5343f2c98c87e2a80b2dff79119050c
Parents: 3be1c6c
Author: Tim Van Steenburgh <tvansteenburgh@gmail.com>
Authored: Tue Jun 11 00:54:09 2013 +0000
Committer: Dave Brondsema <dbrondsema@slashdotmedia.com>
Committed: Thu Jun 13 20:59:15 2013 +0000

----------------------------------------------------------------------
 Allura/allura/command/show_models.py | 31 +++++++++++++++++--------------
 Allura/allura/tests/test_commands.py | 21 +++++++++++----------
 2 files changed, 28 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/1a49d067/Allura/allura/command/show_models.py
----------------------------------------------------------------------
diff --git a/Allura/allura/command/show_models.py b/Allura/allura/command/show_models.py
index 68a55ac..29d2091 100644
--- a/Allura/allura/command/show_models.py
+++ b/Allura/allura/command/show_models.py
@@ -70,6 +70,8 @@ class ReindexCommand(base.Command):
                            'which are needed for some markdown macros to run properly')
     parser.add_option('--solr-hosts', dest='solr_hosts',
                       help='Override the solr host(s) to post to.  Comma-separated list of
solr server URLs')
+    parser.add_option('--max-chunk', dest='max_chunk', type=int, default=100*1000,
+                      help='Max number of artifacts to index in one Solr update command')
 
     def command(self):
         from allura import model as M
@@ -90,11 +92,6 @@ class ReindexCommand(base.Command):
         if not self.options.solr and not self.options.refs:
             self.options.solr = self.options.refs = True
 
-        if self.options.solr_hosts:
-            self.add_artifact_kwargs = {'solr_hosts': self.options.solr_hosts.split(',')}
-        else:
-            self.add_artifact_kwargs = {}
-
         for projects in utils.chunked_find(M.Project, q_project):
             for p in projects:
                 c.project = p
@@ -126,13 +123,7 @@ class ReindexCommand(base.Command):
                     M.main_orm_session.flush()
                     M.artifact_orm_session.clear()
                     try:
-                        if self.options.tasks:
-                            self._chunked_add_artifacts(ref_ids)
-                        else:
-                            add_artifacts(ref_ids,
-                                          update_solr=self.options.solr,
-                                          update_refs=self.options.refs,
-                                          **self.add_artifact_kwargs)
+                        self._chunked_add_artifacts(ref_ids)
                     except CompoundError, err:
                         base.log.exception('Error indexing artifacts:\n%r', err)
                         base.log.error('%s', err.format_error())
@@ -140,12 +131,24 @@ class ReindexCommand(base.Command):
                     M.main_orm_session.clear()
         base.log.info('Reindex %s', 'queued' if self.options.tasks else 'done')
 
+    @property
+    def add_artifact_kwargs(self):
+        if self.options.solr_hosts:
+           return {'solr_hosts': self.options.solr_hosts.split(',')}
+        return {}
+
     def _chunked_add_artifacts(self, ref_ids):
         # ref_ids contains solr index ids which can easily be over
         # 100 bytes. Here we allow for 160 bytes avg, plus
         # room for other document overhead.
-        for chunk in utils.chunked_list(ref_ids, 100 * 1000):
-            self._post_add_artifacts(chunk)
+        for chunk in utils.chunked_list(ref_ids, self.options.max_chunk):
+            if self.options.tasks:
+                self._post_add_artifacts(chunk)
+            else:
+                add_artifacts(chunk,
+                              update_solr=self.options.solr,
+                              update_refs=self.options.refs,
+                              **self.add_artifact_kwargs)
 
     def _post_add_artifacts(self, chunk):
         """

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/1a49d067/Allura/allura/tests/test_commands.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/test_commands.py b/Allura/allura/tests/test_commands.py
index 1d72ff6..5545466 100644
--- a/Allura/allura/tests/test_commands.py
+++ b/Allura/allura/tests/test_commands.py
@@ -365,13 +365,17 @@ class TestReindexCommand(object):
     @patch('pysolr.Solr')
     def test_solr_hosts_1(self, Solr):
         cmd = show_models.ReindexCommand('reindex')
-        cmd.run([test_config, '-p', 'test', '--solr', '--solr-hosts=http://blah.com/solr/forge'])
+        cmd.options, args = cmd.parser.parse_args([
+            '-p', 'test', '--solr', '--solr-hosts=http://blah.com/solr/forge'])
+        cmd._chunked_add_artifacts(list(range(10)))
         assert_equal(Solr.call_args[0][0], 'http://blah.com/solr/forge')
 
     @patch('pysolr.Solr')
     def test_solr_hosts_list(self, Solr):
         cmd = show_models.ReindexCommand('reindex')
-        cmd.run([test_config, '-p', 'test', '--solr', '--solr-hosts=http://blah.com/solr/forge,https://other.net/solr/forge'])
+        cmd.options, args = cmd.parser.parse_args([
+            '-p', 'test', '--solr', '--solr-hosts=http://blah.com/solr/forge,https://other.net/solr/forge'])
+        cmd._chunked_add_artifacts(list(range(10)))
         # check constructors of first and second Solr() instantiations
         assert_equal(set([Solr.call_args_list[0][0][0], Solr.call_args_list[1][0][0]]),
                      set(['http://blah.com/solr/forge', 'https://other.net/solr/forge'])
@@ -387,13 +391,12 @@ class TestReindexCommand(object):
     @patch('allura.command.show_models.add_artifacts')
     def test_chunked_add_artifacts(self, add_artifacts):
         cmd = show_models.ReindexCommand('reindex')
-        cmd.options = Mock()
-        cmd.add_artifact_kwargs = {}
-        ref_ids = list(range(100 * 1000 * 2 + 20))
+        cmd.options = Mock(tasks=True, max_chunk=10*1000)
+        ref_ids = list(range(10 * 1000 * 2 + 20))
         cmd._chunked_add_artifacts(ref_ids)
         assert_equal(len(add_artifacts.post.call_args_list), 3)
-        assert_equal(len(add_artifacts.post.call_args_list[0][0][0]), 100 * 1000)
-        assert_equal(len(add_artifacts.post.call_args_list[1][0][0]), 100 * 1000)
+        assert_equal(len(add_artifacts.post.call_args_list[0][0][0]), 10 * 1000)
+        assert_equal(len(add_artifacts.post.call_args_list[1][0][0]), 10 * 1000)
         assert_equal(len(add_artifacts.post.call_args_list[2][0][0]), 20)
 
     @patch('allura.command.show_models.add_artifacts')
@@ -404,8 +407,7 @@ class TestReindexCommand(object):
                         "BSON document too large (16906035 bytes) - the connected server
supports BSON document sizes up to 16777216 bytes.")
         add_artifacts.post.side_effect = on_post
         cmd = show_models.ReindexCommand('reindex')
-        cmd.options = Mock()
-        cmd.add_artifact_kwargs = {}
+        cmd.options, args = cmd.parser.parse_args([])
         cmd._post_add_artifacts(range(5))
         kw = {'update_solr': cmd.options.solr, 'update_refs': cmd.options.refs}
         expected = [
@@ -428,6 +430,5 @@ class TestReindexCommand(object):
         add_artifacts.post.side_effect = on_post
         cmd = show_models.ReindexCommand('reindex')
         cmd.options = Mock()
-        cmd.add_artifact_kwargs = {}
         with td.raises(pymongo.errors.InvalidDocument):
             cmd._post_add_artifacts(range(5))


Mime
View raw message