Return-Path: X-Original-To: apmail-incubator-allura-commits-archive@minotaur.apache.org Delivered-To: apmail-incubator-allura-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 80480E670 for ; Fri, 1 Mar 2013 15:58:05 +0000 (UTC) Received: (qmail 14409 invoked by uid 500); 1 Mar 2013 15:58:05 -0000 Delivered-To: apmail-incubator-allura-commits-archive@incubator.apache.org Received: (qmail 14329 invoked by uid 500); 1 Mar 2013 15:58:05 -0000 Mailing-List: contact allura-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: allura-dev@incubator.apache.org Delivered-To: mailing list allura-commits@incubator.apache.org Received: (qmail 14119 invoked by uid 99); 1 Mar 2013 15:58:05 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 01 Mar 2013 15:58:05 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id D06231CD92; Fri, 1 Mar 2013 15:58:04 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: johnsca@apache.org To: allura-commits@incubator.apache.org X-Mailer: ASF-Git Admin Mailer Subject: [10/24] git commit: [#5788] Converted refresh_last_commits to a ScriptTask Message-Id: <20130301155804.D06231CD92@tyr.zones.apache.org> Date: Fri, 1 Mar 2013 15:58:04 +0000 (UTC) [#5788] Converted refresh_last_commits to a ScriptTask Also did a bit of cleanup Signed-off-by: Cory Johns Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/871ae8aa Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/871ae8aa Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/871ae8aa Branch: refs/heads/cj/5411 Commit: 871ae8aab668771adc5f9013964846a47927fc3a Parents: 3eaf690 Author: Cory Johns Authored: Mon Feb 25 19:50:59 2013 +0000 Committer: Tim Van Steenburgh Committed: Thu Feb 28 21:48:32 2013 +0000 ---------------------------------------------------------------------- Allura/allura/scripts/refresh_last_commits.py | 181 ++++++++++++++++++ scripts/refresh-last-commits.py | 195 -------------------- 2 files changed, 181 insertions(+), 195 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/871ae8aa/Allura/allura/scripts/refresh_last_commits.py ---------------------------------------------------------------------- diff --git a/Allura/allura/scripts/refresh_last_commits.py b/Allura/allura/scripts/refresh_last_commits.py new file mode 100644 index 0000000..b665d99 --- /dev/null +++ b/Allura/allura/scripts/refresh_last_commits.py @@ -0,0 +1,181 @@ +import sys +import argparse +import logging +import re +from math import pow, log10 +from datetime import datetime +from contextlib import contextmanager + +import faulthandler +from pylons import tmpl_context as c +from ming.orm import ThreadLocalORMSession, session + +from allura import model as M +from allura.lib.utils import chunked_find +from allura.tasks.repo_tasks import refresh +from allura.scripts import ScriptTask + +log = logging.getLogger(__name__) + + +class RefreshLastCommits(ScriptTask): + @classmethod + def parser(cls): + def _repo_type_list(s): + repo_types = [] + for repo_type in s.split(','): + repo_type = repo_type.strip() + if repo_type not in ['git', 'hg']: + raise argparse.ArgumentTypeError( + '{0} is not a valid repo type.'.format(repo_type)) + repo_types.append(repo_type) + return repo_types + parser = argparse.ArgumentParser(description='Using existing commit data, ' + 'refresh the last commit metadata in MongoDB. Run for all repos (no args), ' + 'or restrict by neighborhood, project, or code tool mount point.') + parser.add_argument('--nbhd', action='store', default='', dest='nbhd', + help='Restrict update to a particular neighborhood, e.g. /p/.') + parser.add_argument('--project', action='store', default='', dest='project', + help='Restrict update to a particular project. To specify a ' + 'subproject, use a slash: project/subproject.') + parser.add_argument('--project-regex', action='store', default='', + dest='project_regex', + help='Restrict update to projects for which the shortname matches ' + 'the provided regex.') + parser.add_argument('--repo-types', action='store', type=_repo_type_list, + default=['git', 'hg'], dest='repo_types', + help='Only refresh last commits for repos of the given type(s). Defaults to: ' + 'git,hg. Example: --repo-types=git') + parser.add_argument('--mount-point', default='', dest='mount_point', + help='Restrict update to repos at the given tool mount point. ') + parser.add_argument('--clean', action='store_true', dest='clean', + default=False, help='Remove last commit mongo docs for ' + 'project(s) being refreshed before doing the refresh.') + parser.add_argument('--dry-run', action='store_true', dest='dry_run', + default=False, help='Log names of projects that would have their ') + parser.add_argument('--diffs', action='store_true', dest='diffs', + default=False, help='Refresh / clean diffs as well as LCDs') + return parser + + @classmethod + def execute(cls, options): + q_project = {} + if options.nbhd: + nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd) + if not nbhd: + return "Invalid neighborhood url prefix." + q_project['neighborhood_id'] = nbhd._id + if options.project: + q_project['shortname'] = options.project + elif options.project_regex: + q_project['shortname'] = {'$regex': options.project_regex} + + log.info('Refreshing last commit data') + + for chunk in chunked_find(M.Project, q_project): + for p in chunk: + log.info("Refreshing last commit data for project '%s'." % p.shortname) + if options.dry_run: + continue + c.project = p + if options.mount_point: + mount_points = [options.mount_point] + else: + mount_points = [ac.options.mount_point for ac in + M.AppConfig.query.find(dict(project_id=p._id))] + for app in (p.app_instance(mp) for mp in mount_points): + c.app = app + if not hasattr(app, 'repo'): + continue + if c.app.repo.tool.lower() not in options.repo_types: + log.info("Skipping %r: wrong type (%s)", c.app.repo, + c.app.repo.tool.lower()) + continue + + c.app.repo.status = 'analyzing' + session(c.app.repo).flush(c.app.repo) + try: + ci_ids = list(reversed(list(c.app.repo.all_commit_ids()))) + if options.clean: + cls._clean(ci_ids, options.diffs) + + log.info('Refreshing all last commits in %r', c.app.repo) + cls.refresh_repo_lcds(ci_ids, options) + new_commit_ids = app.repo.unknown_commit_ids() + if len(new_commit_ids) > 0: + refresh.post() + except: + log.exception('Error refreshing %r', c.app.repo) + raise + finally: + c.app.repo.status = 'ready' + session(c.app.repo).flush(c.app.repo) + ThreadLocalORMSession.flush_all() + ThreadLocalORMSession.close_all() + + @classmethod + def refresh_repo_lcds(cls, commit_ids, options): + tree_cache = {} + timings = [] + if options.diffs: + print 'Processing diffs' + for i, commit_id in enumerate(commit_ids): + commit = M.repo.Commit.query.get(_id=commit_id) + with time(timings): + M.repo_refresh.compute_diffs(c.app.repo._id, tree_cache, commit) + if i % 1000 == 0: + cls._print_stats(i, timings, 1000) + + lcd_cache = M.repo.ModelCache( + max_instances={M.repo.LastCommit: 4000}, + max_queries={M.repo.LastCommit: 4000}, + ) + timings = [] + print 'Processing last commits' + for i, commit_id in enumerate(commit_ids): + commit = M.repo.Commit.query.get(_id=commit_id) + if commit is None: + print "Commit missing, skipping: %s" % commit_id + continue + commit.set_context(c.app.repo) + with time(timings): + M.repo_refresh.compute_lcds(commit, lcd_cache) + ThreadLocalORMSession.flush_all() + if i % 100 == 0: + cls._print_stats(i, timings, 100) + ThreadLocalORMSession.flush_all() + + @classmethod + def _clean(cls, commit_ids, clean_diffs): + if clean_diffs: + # delete DiffInfoDocs + i = M.repo.DiffInfoDoc.m.find(dict(commit_ids={'$in': commit_ids})).count() + log.info("Deleting %i DiffInfoDoc docs for %i commits...", i, len(commit_ids)) + M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': commit_ids})) + + # delete LastCommitDocs + i = M.repo.LastCommitDoc.m.find(dict(commit_ids={'$in': commit_ids})).count() + log.info("Deleting %i LastCommitDoc docs for %i commits...", i, len(commit_ids)) + M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': commit_ids})) + + @classmethod + def _print_stats(cls, processed, timings, debug_step): + mt = max(timings) + tt = sum(timings) + at = tt / len(timings) + mat = sum(timings[-debug_step:]) / debug_step + print ' Processed %d commits (max: %f, avg: %f, mavg: %f, tot: %f)' % ( + processed, mt, at, mat, tt) + + +@contextmanager +def time(timings): + s = datetime.now() + yield + timings.append((datetime.now() - s).total_seconds()) + + + +if __name__ == '__main__': + faulthandler.enable() + RefreshLastCommits.main() http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/871ae8aa/scripts/refresh-last-commits.py ---------------------------------------------------------------------- diff --git a/scripts/refresh-last-commits.py b/scripts/refresh-last-commits.py deleted file mode 100644 index 858f335..0000000 --- a/scripts/refresh-last-commits.py +++ /dev/null @@ -1,195 +0,0 @@ -import sys -import argparse -import logging -import re -from math import pow, log10 -from datetime import datetime -from contextlib import contextmanager - -import faulthandler -from pylons import tmpl_context as c -from ming.orm import ThreadLocalORMSession, session - -from allura import model as M -from allura.lib.utils import chunked_find, chunked_list -from allura.tasks.repo_tasks import refresh - -log = logging.getLogger(__name__) - - -def main(options): - q_project = {} - if options.nbhd: - nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd) - if not nbhd: - return "Invalid neighborhood url prefix." - q_project['neighborhood_id'] = nbhd._id - if options.project: - q_project['shortname'] = options.project - elif options.project_regex: - q_project['shortname'] = {'$regex': options.project_regex} - - log.info('Refreshing last commit data') - - for chunk in chunked_find(M.Project, q_project): - for p in chunk: - log.info("Refreshing last commit data for project '%s'." % p.shortname) - if options.dry_run: - continue - c.project = p - if options.mount_point: - mount_points = [options.mount_point] - else: - mount_points = [ac.options.mount_point for ac in - M.AppConfig.query.find(dict(project_id=p._id))] - for app in (p.app_instance(mp) for mp in mount_points): - c.app = app - if not hasattr(app, 'repo'): - continue - if c.app.repo.tool.lower() not in options.repo_types: - log.info("Skipping %r: wrong type (%s)", c.app.repo, - c.app.repo.tool.lower()) - continue - - c.app.repo.status = 'analyzing' - session(c.app.repo).flush(c.app.repo) - try: - ci_ids = list(reversed(list(c.app.repo.all_commit_ids()))) - if options.clean: - if options.diffs: - # delete DiffInfoDocs - i = M.repo.DiffInfoDoc.m.find(dict(commit_ids={'$in': ci_ids})).count() - log.info("Deleting %i DiffInfoDoc docs, by repo id...", i) - M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': ci_ids})) - - # delete LastCommitDocs - i = M.repo.LastCommitDoc.m.find(dict(commit_ids={'$in': ci_ids})).count() - log.info("Deleting %i LastCommitDoc docs, by repo id...", i) - M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': ci_ids})) - - log.info('Refreshing all last commits in %r', c.app.repo) - if options.profile: - import cProfile - cProfile.runctx('refresh_repo_lcds(ci_ids, options)', - globals(), locals(), '/tmp/refresh_lcds.profile') - else: - refresh_repo_lcds(ci_ids, options) - new_commit_ids = app.repo.unknown_commit_ids() - if len(new_commit_ids) > 0: - refresh.post() - except: - log.exception('Error refreshing %r', c.app.repo) - raise - finally: - c.app.repo.status = 'ready' - session(c.app.repo).flush(c.app.repo) - ThreadLocalORMSession.flush_all() - ThreadLocalORMSession.close_all() - - -def enum_step(iter, step): - for i,elem in enumerate(iter): - if i % step == 0: - yield i, elem - -def refresh_repo_lcds(commit_ids, options): - tree_cache = {} - timings = [] - if options.diffs: - print 'Processing diffs' - for commit_id in commit_ids: - commit = M.repo.Commit.query.get(_id=commit_id) - with time(timings): - M.repo_refresh.compute_diffs(c.app.repo._id, tree_cache, commit) - if len(timings) % 1000 == 0: - _print_stats(timings, 1000) - lcd_cache = M.repo.ModelCache( - max_instances={M.repo.LastCommit: 4000}, - max_queries={M.repo.LastCommit: 10000}, - ) - timings = [] - print 'Processing last commits' - debug_step = int(pow(10, max(0, int(log10(len(commit_ids)) - log10(options.step) - 1)))) - _cids = commit_ids[options.skip:] - for i, commit_id in enum_step(_cids, options.step): - commit = M.repo.Commit.query.get(_id=commit_id) - commit.set_context(c.app.repo) - with time(timings): - M.repo_refresh.compute_lcds(commit, lcd_cache) - ThreadLocalORMSession.flush_all() - # ensure new LCDs get fully refreshed in the cache - # so that every commit sees the same copy - lcd_cache.expire_new_instances(M.repo.LastCommit) - if len(timings) % debug_step == 0: - _print_stats(timings, debug_step) - ThreadLocalORMSession.flush_all() - - -def _print_stats(timings, debug_step): - mt = max(timings) - tt = sum(timings) - at = tt / len(timings) - mat = sum(timings[-debug_step:]) / debug_step - print ' Processed %d commits (max: %f, avg: %f, mavg: %f, tot: %f)' % ( - len(timings), mt, at, mat, tt) - -@contextmanager -def time(timings): - s = datetime.now() - yield - timings.append((datetime.now() - s).total_seconds()) - - -def repo_type_list(s): - repo_types = [] - for repo_type in s.split(','): - repo_type = repo_type.strip() - if repo_type not in ['git', 'hg']: - raise argparse.ArgumentTypeError( - '{0} is not a valid repo type.'.format(repo_type)) - repo_types.append(repo_type) - return repo_types - - -def parse_options(): - parser = argparse.ArgumentParser(description='Using existing commit data, ' - 'refresh the last commit metadata in MongoDB. Run for all repos (no args), ' - 'or restrict by neighborhood, project, or code tool mount point.') - parser.add_argument('--nbhd', action='store', default='', dest='nbhd', - help='Restrict update to a particular neighborhood, e.g. /p/.') - parser.add_argument('--project', action='store', default='', dest='project', - help='Restrict update to a particular project. To specify a ' - 'subproject, use a slash: project/subproject.') - parser.add_argument('--project-regex', action='store', default='', - dest='project_regex', - help='Restrict update to projects for which the shortname matches ' - 'the provided regex.') - parser.add_argument('--repo-types', action='store', type=repo_type_list, - default=['git', 'hg'], dest='repo_types', - help='Only refresh last commits for repos of the given type(s). Defaults to: ' - 'git,hg. Example: --repo-types=git') - parser.add_argument('--mount_point', default='', dest='mount_point', - help='Restrict update to repos at the given tool mount point. ') - parser.add_argument('--clean', action='store_true', dest='clean', - default=False, help='Remove last commit mongo docs for ' - 'project(s) being refreshed before doing the refresh.') - parser.add_argument('--dry-run', action='store_true', dest='dry_run', - default=False, help='Log names of projects that would have their ' - 'last commits refreshed, but do not perform the actual refresh.') - parser.add_argument('--profile', action='store_true', dest='profile', - default=False, help='Enable the profiler (slow). Will log ' - 'profiling output to ./refresh.profile') - parser.add_argument('--diffs', action='store_true', dest='diffs', - default=False, help='Refresh diffs as well as LCDs') - parser.add_argument('--all', action='store_const', dest='step', - const=1, default=100, help='Refresh the LCD for every commit instead of every 100th') - parser.add_argument('--step', action='store', dest='step', - type=int, default=100, help='Refresh the LCD for every Nth commit instead of every 100th') - parser.add_argument('--skip', action='store', dest='skip', - type=int, default=0, help='Skip a number of commits') - return parser.parse_args() - -if __name__ == '__main__': - import sys - faulthandler.enable() - sys.exit(main(parse_options()))