Return-Path: X-Original-To: apmail-incubator-allura-commits-archive@minotaur.apache.org Delivered-To: apmail-incubator-allura-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id E7BE71014A for ; Thu, 13 Jun 2013 21:03:32 +0000 (UTC) Received: (qmail 74260 invoked by uid 500); 13 Jun 2013 21:03:32 -0000 Delivered-To: apmail-incubator-allura-commits-archive@incubator.apache.org Received: (qmail 74240 invoked by uid 500); 13 Jun 2013 21:03:32 -0000 Mailing-List: contact allura-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: allura-dev@incubator.apache.org Delivered-To: mailing list allura-commits@incubator.apache.org Received: (qmail 74232 invoked by uid 99); 13 Jun 2013 21:03:32 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 13 Jun 2013 21:03:32 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 96A0F8162E3; Thu, 13 Jun 2013 21:03:32 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: brondsem@apache.org To: allura-commits@incubator.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: git commit: [#6346] Add a 'max items in solr index batch' option Date: Thu, 13 Jun 2013 21:03:32 +0000 (UTC) Updated Branches: refs/heads/master 3be1c6c9b -> 1a49d067b [#6346] Add a 'max items in solr index batch' option Signed-off-by: Tim Van Steenburgh Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/1a49d067 Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/1a49d067 Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/1a49d067 Branch: refs/heads/master Commit: 1a49d067b5343f2c98c87e2a80b2dff79119050c Parents: 3be1c6c Author: Tim Van Steenburgh Authored: Tue Jun 11 00:54:09 2013 +0000 Committer: Dave Brondsema Committed: Thu Jun 13 20:59:15 2013 +0000 ---------------------------------------------------------------------- Allura/allura/command/show_models.py | 31 +++++++++++++++++-------------- Allura/allura/tests/test_commands.py | 21 +++++++++++---------- 2 files changed, 28 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/1a49d067/Allura/allura/command/show_models.py ---------------------------------------------------------------------- diff --git a/Allura/allura/command/show_models.py b/Allura/allura/command/show_models.py index 68a55ac..29d2091 100644 --- a/Allura/allura/command/show_models.py +++ b/Allura/allura/command/show_models.py @@ -70,6 +70,8 @@ class ReindexCommand(base.Command): 'which are needed for some markdown macros to run properly') parser.add_option('--solr-hosts', dest='solr_hosts', help='Override the solr host(s) to post to. Comma-separated list of solr server URLs') + parser.add_option('--max-chunk', dest='max_chunk', type=int, default=100*1000, + help='Max number of artifacts to index in one Solr update command') def command(self): from allura import model as M @@ -90,11 +92,6 @@ class ReindexCommand(base.Command): if not self.options.solr and not self.options.refs: self.options.solr = self.options.refs = True - if self.options.solr_hosts: - self.add_artifact_kwargs = {'solr_hosts': self.options.solr_hosts.split(',')} - else: - self.add_artifact_kwargs = {} - for projects in utils.chunked_find(M.Project, q_project): for p in projects: c.project = p @@ -126,13 +123,7 @@ class ReindexCommand(base.Command): M.main_orm_session.flush() M.artifact_orm_session.clear() try: - if self.options.tasks: - self._chunked_add_artifacts(ref_ids) - else: - add_artifacts(ref_ids, - update_solr=self.options.solr, - update_refs=self.options.refs, - **self.add_artifact_kwargs) + self._chunked_add_artifacts(ref_ids) except CompoundError, err: base.log.exception('Error indexing artifacts:\n%r', err) base.log.error('%s', err.format_error()) @@ -140,12 +131,24 @@ class ReindexCommand(base.Command): M.main_orm_session.clear() base.log.info('Reindex %s', 'queued' if self.options.tasks else 'done') + @property + def add_artifact_kwargs(self): + if self.options.solr_hosts: + return {'solr_hosts': self.options.solr_hosts.split(',')} + return {} + def _chunked_add_artifacts(self, ref_ids): # ref_ids contains solr index ids which can easily be over # 100 bytes. Here we allow for 160 bytes avg, plus # room for other document overhead. - for chunk in utils.chunked_list(ref_ids, 100 * 1000): - self._post_add_artifacts(chunk) + for chunk in utils.chunked_list(ref_ids, self.options.max_chunk): + if self.options.tasks: + self._post_add_artifacts(chunk) + else: + add_artifacts(chunk, + update_solr=self.options.solr, + update_refs=self.options.refs, + **self.add_artifact_kwargs) def _post_add_artifacts(self, chunk): """ http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/1a49d067/Allura/allura/tests/test_commands.py ---------------------------------------------------------------------- diff --git a/Allura/allura/tests/test_commands.py b/Allura/allura/tests/test_commands.py index 1d72ff6..5545466 100644 --- a/Allura/allura/tests/test_commands.py +++ b/Allura/allura/tests/test_commands.py @@ -365,13 +365,17 @@ class TestReindexCommand(object): @patch('pysolr.Solr') def test_solr_hosts_1(self, Solr): cmd = show_models.ReindexCommand('reindex') - cmd.run([test_config, '-p', 'test', '--solr', '--solr-hosts=http://blah.com/solr/forge']) + cmd.options, args = cmd.parser.parse_args([ + '-p', 'test', '--solr', '--solr-hosts=http://blah.com/solr/forge']) + cmd._chunked_add_artifacts(list(range(10))) assert_equal(Solr.call_args[0][0], 'http://blah.com/solr/forge') @patch('pysolr.Solr') def test_solr_hosts_list(self, Solr): cmd = show_models.ReindexCommand('reindex') - cmd.run([test_config, '-p', 'test', '--solr', '--solr-hosts=http://blah.com/solr/forge,https://other.net/solr/forge']) + cmd.options, args = cmd.parser.parse_args([ + '-p', 'test', '--solr', '--solr-hosts=http://blah.com/solr/forge,https://other.net/solr/forge']) + cmd._chunked_add_artifacts(list(range(10))) # check constructors of first and second Solr() instantiations assert_equal(set([Solr.call_args_list[0][0][0], Solr.call_args_list[1][0][0]]), set(['http://blah.com/solr/forge', 'https://other.net/solr/forge']) @@ -387,13 +391,12 @@ class TestReindexCommand(object): @patch('allura.command.show_models.add_artifacts') def test_chunked_add_artifacts(self, add_artifacts): cmd = show_models.ReindexCommand('reindex') - cmd.options = Mock() - cmd.add_artifact_kwargs = {} - ref_ids = list(range(100 * 1000 * 2 + 20)) + cmd.options = Mock(tasks=True, max_chunk=10*1000) + ref_ids = list(range(10 * 1000 * 2 + 20)) cmd._chunked_add_artifacts(ref_ids) assert_equal(len(add_artifacts.post.call_args_list), 3) - assert_equal(len(add_artifacts.post.call_args_list[0][0][0]), 100 * 1000) - assert_equal(len(add_artifacts.post.call_args_list[1][0][0]), 100 * 1000) + assert_equal(len(add_artifacts.post.call_args_list[0][0][0]), 10 * 1000) + assert_equal(len(add_artifacts.post.call_args_list[1][0][0]), 10 * 1000) assert_equal(len(add_artifacts.post.call_args_list[2][0][0]), 20) @patch('allura.command.show_models.add_artifacts') @@ -404,8 +407,7 @@ class TestReindexCommand(object): "BSON document too large (16906035 bytes) - the connected server supports BSON document sizes up to 16777216 bytes.") add_artifacts.post.side_effect = on_post cmd = show_models.ReindexCommand('reindex') - cmd.options = Mock() - cmd.add_artifact_kwargs = {} + cmd.options, args = cmd.parser.parse_args([]) cmd._post_add_artifacts(range(5)) kw = {'update_solr': cmd.options.solr, 'update_refs': cmd.options.refs} expected = [ @@ -428,6 +430,5 @@ class TestReindexCommand(object): add_artifacts.post.side_effect = on_post cmd = show_models.ReindexCommand('reindex') cmd.options = Mock() - cmd.add_artifact_kwargs = {} with td.raises(pymongo.errors.InvalidDocument): cmd._post_add_artifacts(range(5))