Return-Path: X-Original-To: apmail-incubator-allura-commits-archive@minotaur.apache.org Delivered-To: apmail-incubator-allura-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 31C18DF80 for ; Fri, 7 Dec 2012 16:11:49 +0000 (UTC) Received: (qmail 92795 invoked by uid 500); 7 Dec 2012 16:11:49 -0000 Delivered-To: apmail-incubator-allura-commits-archive@incubator.apache.org Received: (qmail 92708 invoked by uid 500); 7 Dec 2012 16:11:49 -0000 Mailing-List: contact allura-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: allura-dev@incubator.apache.org Delivered-To: mailing list allura-commits@incubator.apache.org Received: (qmail 92486 invoked by uid 99); 7 Dec 2012 16:11:48 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 07 Dec 2012 16:11:48 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 1CE9031D219; Fri, 7 Dec 2012 16:11:48 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: johnsca@apache.org To: allura-commits@incubator.apache.org X-Mailer: ASF-Git Admin Mailer Subject: [5/21] git commit: [#4691] ModelCache improvements and more debuggon for refresh-last-commit.py Message-Id: <20121207161148.1CE9031D219@tyr.zones.apache.org> Date: Fri, 7 Dec 2012 16:11:48 +0000 (UTC) [#4691] ModelCache improvements and more debuggon for refresh-last-commit.py Signed-off-by: Cory Johns Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/12f487b7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/12f487b7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/12f487b7 Branch: refs/heads/cj/4691 Commit: 12f487b72a4cc46dbb545470f846d336dd078c5e Parents: 1bcbaf1 Author: Cory Johns Authored: Fri Nov 30 19:35:32 2012 +0000 Committer: Cory Johns Committed: Fri Dec 7 16:11:27 2012 +0000 ---------------------------------------------------------------------- Allura/allura/model/repo.py | 34 ++++++++++++++------------ Allura/allura/model/repo_refresh.py | 2 +- Allura/allura/tests/model/test_repo.py | 16 ++++++++++-- scripts/refresh-last-commits.py | 24 +++++++++++++----- 4 files changed, 49 insertions(+), 27 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/12f487b7/Allura/allura/model/repo.py ---------------------------------------------------------------------- diff --git a/Allura/allura/model/repo.py b/Allura/allura/model/repo.py index 2bc22a9..5a7f002 100644 --- a/Allura/allura/model/repo.py +++ b/Allura/allura/model/repo.py @@ -5,7 +5,7 @@ import logging from hashlib import sha1 from itertools import chain from datetime import datetime -from collections import defaultdict +from collections import defaultdict, OrderedDict from difflib import SequenceMatcher, unified_diff from pylons import c @@ -824,12 +824,11 @@ class ModelCache(object): Commit instances and 2000 Tree instances in the cache at once with the default value. ''' - self._cache = defaultdict(dict) + self._cache = defaultdict(OrderedDict) self.max_size = max_size - self._insertion_order = defaultdict(list) # temporary, for performance testing - self._hits = 0 - self._misses = 0 + self._hits = defaultdict(int) + self._accesses = defaultdict(int) self._get_calls = 0 self._get_walks = 0 self._get_walks_max = 0 @@ -847,12 +846,13 @@ class ModelCache(object): def get(self, cls, key): _key = self._normalize_key(key) + self._manage_cache(cls, _key) + self._accesses[cls] += 1 if _key not in self._cache[cls]: - self._misses += 1 query = getattr(cls, 'query', getattr(cls, 'm', None)) self.set(cls, _key, query.get(**key)) else: - self._hits += 1 + self._hits[cls] += 1 return self._cache[cls][_key] def set(self, cls, key, val): @@ -866,23 +866,25 @@ class ModelCache(object): and expire from the cache in a FIFO manner. ''' if key in self._cache[cls]: - return - self._insertion_order[cls].append(key) - if len(self._insertion_order[cls]) > self.max_size: - _key = self._insertion_order[cls].pop(0) - self._cache[cls].pop(_key) + # refresh access time in cache + val = self._cache[cls].pop(key) + self._cache[cls][key] = val + elif len(self._cache[cls]) >= self.max_size: + # remove the least-recently-used cache item + self._cache[cls].popitem(last=False) def size(self): - return sum([len(c) for c in self._insertion_order.values()]) + return sum([len(c) for c in self._cache.values()]) - def keys(self, cls): + def keys(self, cls, as_dict=True): ''' Returns all the cache keys for a given class. Each cache key will be a dict. ''' - if self._cache[cls]: + if as_dict: return [dict(k) for k in self._cache[cls].keys()] - return [] + else: + return self._cache[cls].keys() def batch_load(self, cls, query, attrs=None): ''' http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/12f487b7/Allura/allura/model/repo_refresh.py ---------------------------------------------------------------------- diff --git a/Allura/allura/model/repo_refresh.py b/Allura/allura/model/repo_refresh.py index 149fcae..4796daa 100644 --- a/Allura/allura/model/repo_refresh.py +++ b/Allura/allura/model/repo_refresh.py @@ -515,6 +515,6 @@ def _walk_commit_tree(commit, cache): def _update_tree_cache(tree_ids, cache): current_ids = set(tree_ids) - cached_ids = set([k['_id'] for k in cache.keys(Tree)]) + cached_ids = set([k[0][1] for k in cache.keys(Tree, as_dict=False)]) new_ids = current_ids - cached_ids cache.batch_load(Tree, {'_id': {'$in': list(new_ids)}}) http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/12f487b7/Allura/allura/tests/model/test_repo.py ---------------------------------------------------------------------- diff --git a/Allura/allura/tests/model/test_repo.py b/Allura/allura/tests/model/test_repo.py index 0dbff66..040c750 100644 --- a/Allura/allura/tests/model/test_repo.py +++ b/Allura/allura/tests/model/test_repo.py @@ -571,6 +571,12 @@ class TestModelCache(unittest.TestCase): self.assertEqual(self.cache.keys(M.repo.Tree), [{'_id': 'test_keys', 'text': 'tko'}, {'fubar': 'scm'}]) self.assertEqual(self.cache.keys(M.repo.LastCommit), []) + def test_keys_not_as_dict(self): + self.cache._cache[M.repo.Tree][(('_id', 'test_keys'), ('text', 'tko'))] = 'foo' + self.cache._cache[M.repo.Tree][(('fubar', 'scm'),)] = 'bar' + self.assertEqual(self.cache.keys(M.repo.Tree, as_dict=False), [(('_id', 'test_keys'), ('text', 'tko')), (('fubar', 'scm'),)]) + self.assertEqual(self.cache.keys(M.repo.LastCommit), []) + @mock.patch.object(M.repo.Tree.query, 'find') def test_batch_load(self, tr_find): # cls, query, attrs @@ -600,14 +606,18 @@ class TestModelCache(unittest.TestCase): }) def test_pruning(self): - self.cache.max_size = 2 + self.cache.max_size = 3 + # ensure cache expires as LRU self.cache.set(M.repo.Tree, {'_id': 'foo'}, 'bar') self.cache.set(M.repo.Tree, {'_id': 'qux'}, 'zaz') self.cache.set(M.repo.Tree, {'_id': 'f00'}, 'b4r') - self.cache.set(M.repo.Tree, {'_id': 'qux'}, 'zaz') + self.cache.set(M.repo.Tree, {'_id': 'foo'}, 'zaz') + self.cache.get(M.repo.Tree, {'_id': 'f00'}) + self.cache.set(M.repo.Tree, {'_id': 'mee'}, 'you') self.assertEqual(self.cache._cache, { M.repo.Tree: { - (('_id', 'qux'),): 'zaz', + (('_id', 'foo'),): 'zaz', (('_id', 'f00'),): 'b4r', + (('_id', 'mee'),): 'you', }, }) http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/12f487b7/scripts/refresh-last-commits.py ---------------------------------------------------------------------- diff --git a/scripts/refresh-last-commits.py b/scripts/refresh-last-commits.py index 0a59b31..ed753ab 100644 --- a/scripts/refresh-last-commits.py +++ b/scripts/refresh-last-commits.py @@ -2,6 +2,7 @@ import sys import argparse import logging import re +from math import pow, log10 from datetime import datetime from contextlib import contextmanager @@ -97,29 +98,38 @@ def refresh_repo_lcds(commit_ids, options): at = tt / len(timings) print ' Processed %d commits (max: %f, avg: %f, tot: %f, cl: %d)' % ( len(timings), mt, at, tt, len(tree_cache)) - lcd_cache = M.repo.ModelCache(80000) + lcd_cache = M.repo.ModelCache(20000) timings = [] print 'Processing last commits' + debug_step = int(pow(10, max(0, int(log10(len(commit_ids)) - log10(options.step) - 1)))) for i, commit_id in enum_step(commit_ids, options.step): - print ' Processing commit %s...' % commit_id, - sys.stdout.flush() + #print ' Processing commit %s...' % commit_id, + #sys.stdout.flush() commit = M.repo.Commit.query.get(_id=commit_id) with time(timings): M.repo_refresh.compute_lcds(commit, lcd_cache) - print 'done in %fs' % timings[-1] - if len(timings) % 10 == 0: + #print 'done in %fs [%d%% in %d]' % ( + # timings[-1], + # lcd_cache._hits[M.repo.LastCommit] * 100 / lcd_cache._accesses[M.repo.LastCommit], + # len(lcd_cache._cache[M.repo.LastCommit]), + # ) + if len(timings) % debug_step == 0: mt = max(timings) tt = sum(timings) at = tt / len(timings) - mat = sum(timings[-10:]) / 10 + mat = sum(timings[-debug_step:]) / debug_step + hits = sum(lcd_cache._hits.values()) + accs = sum(lcd_cache._accesses.values()) print ' Processed %d commits (max: %f, avg: %f, mavg: %f, tot: %f, lc: %d, lcl: %d, hits: %d, agw: %d, mgw: %d, gh: %d, abw: %d, mbw: %d, ts: %d)' % ( len(timings), mt, at, mat, tt, lcd_cache.size(), len(lcd_cache._cache[M.repo.LastCommit]), - lcd_cache._hits * 100 / (lcd_cache._hits + lcd_cache._misses), + hits * 100 / accs, lcd_cache._get_walks / lcd_cache._get_calls, lcd_cache._get_walks_max, lcd_cache._get_hits * 100 / lcd_cache._get_calls, lcd_cache._build_walks / lcd_cache._build_calls, lcd_cache._build_walks_max, len(lcd_cache.get(M.repo.TreesDoc, dict(_id=commit._id)).tree_ids)) ThreadLocalORMSession.flush_all() ThreadLocalORMSession.close_all() + ThreadLocalORMSession.flush_all() + ThreadLocalORMSession.close_all() @contextmanager