incubator-allura-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From john...@apache.org
Subject [34/50] git commit: [#4691] Optimizations to building LCDs and fixes to SVN.compute_tree_new
Date Wed, 06 Feb 2013 15:42:52 GMT
[#4691] Optimizations to building LCDs and fixes to SVN.compute_tree_new

Signed-off-by: Cory Johns <johnsca@geek.net>


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/4d8ce02d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/4d8ce02d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/4d8ce02d

Branch: refs/heads/cj/4691
Commit: 4d8ce02df6d239d16a0d34803309eec325232499
Parents: 9459a7b
Author: Cory Johns <johnsca@geek.net>
Authored: Fri Jan 25 15:00:49 2013 +0000
Committer: Tim Van Steenburgh <tvansteenburgh@gmail.com>
Committed: Tue Feb 5 20:22:52 2013 +0000

----------------------------------------------------------------------
 Allura/allura/model/repo.py            |   30 +++++++----
 Allura/allura/model/repository.py      |   16 ++++++
 Allura/allura/tests/model/test_repo.py |    2 +
 ForgeGit/forgegit/model/git_repo.py    |    1 -
 ForgeSVN/forgesvn/model/svn.py         |   78 +++++++++++++++------------
 5 files changed, 82 insertions(+), 45 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/4d8ce02d/Allura/allura/model/repo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repo.py b/Allura/allura/model/repo.py
index e99db42..e5c6210 100644
--- a/Allura/allura/model/repo.py
+++ b/Allura/allura/model/repo.py
@@ -747,16 +747,26 @@ class LastCommit(RepoObject):
                 prev_lcd = cls.get(parent.get_path(path), create=False)
             except KeyError as e:
                 prev_lcd = None  # will fail if path was added this commit
-        for node in chain(tree.tree_ids, tree.blob_ids, tree.other_ids):
-            not_changed = os.path.join(path, node.name) not in tree.commit.changed_paths
-            if not_changed and prev_lcd:
-                commit_id = prev_lcd.by_name[node.name]
-            else:
-                commit_id = cls._last_commit_id(tree.commit, os.path.join(path, node.name))
-            entries.append(dict(
-                    name=node.name,
-                    commit_id=commit_id,
-                ))
+        entries = {}
+        nodes = set([node.name for node in chain(tree.tree_ids, tree.blob_ids, tree.other_ids)])
+        changed = set([node for node in nodes if os.path.join(path, node) in tree.commit.changed_paths])
+        if prev_lcd:
+            # get unchanged entries from previously computed LCD
+            entries = prev_lcd.by_name
+        else:
+            # no previously computed LCD, so get unchanged entries from SCM
+            # (but only ask for the ones that we know we need)
+            unchanged = [os.path.join(path, node) for node in nodes - changed]
+            entries = tree.commit.repo.last_commit_ids(tree.commit, unchanged)
+            if entries is None:
+                # something strange went wrong; bail out and possibly try again later
+                return None
+            # paths are fully-qualified; shorten them back to just node names
+            entries = {os.path.basename(path):commit_id for path,commit_id in entries.iteritems()}
+        # update with the nodes changed in this tree's commit
+        entries.update({node: tree.commit._id for node in changed})
+        # convert to a list of dicts, since mongo doesn't handle arbitrary keys well (i.e.,
. and $ not allowed)
+        entries = [{'name':name, 'commit_id':value} for name,value in entries.iteritems()]
         lcd = cls(
                 commit_id=tree.commit._id,
                 path=path,

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/4d8ce02d/Allura/allura/model/repository.py
----------------------------------------------------------------------
diff --git a/Allura/allura/model/repository.py b/Allura/allura/model/repository.py
index c322658..7df624e 100644
--- a/Allura/allura/model/repository.py
+++ b/Allura/allura/model/repository.py
@@ -112,6 +112,20 @@ class RepositoryImplementation(object):
         '''Return count of the commits related to path'''
         raise NotImplementedError, 'commits_count'
 
+    def last_commit_ids(self, commit, paths):
+        '''
+        Return a mapping {path: commit_id} of the _id of the last
+        commit to touch each path, starting from the given commit.
+        '''
+        paths = set(paths)
+        result = {}
+        while commit:
+            changed = paths & set(commit.changed_paths)
+            result.update({path: commit._id for path in changed})
+            paths = paths - changed
+            commit = commit.get_parent()
+        return result
+
     @classmethod
     def shorthand_for_commit(cls, oid):
         return '[%s]' % oid[:6]
@@ -227,6 +241,8 @@ class Repository(Artifact, ActivityObject):
         return self._impl.commits(path, rev, skip, limit)
     def commits_count(self, path=None, rev=None):
         return self._impl.commits_count(path, rev)
+    def last_commit_ids(self, commit, paths):
+        return self._impl.last_commit_ids(commit, paths)
 
     def _log(self, rev, skip, limit):
         head = self.commit(rev)

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/4d8ce02d/Allura/allura/tests/model/test_repo.py
----------------------------------------------------------------------
diff --git a/Allura/allura/tests/model/test_repo.py b/Allura/allura/tests/model/test_repo.py
index 9745441..6ce0364 100644
--- a/Allura/allura/tests/model/test_repo.py
+++ b/Allura/allura/tests/model/test_repo.py
@@ -82,6 +82,8 @@ class TestLastCommit(unittest.TestCase):
         self.repo = mock.Mock('repo', _commits=OrderedDict(), _last_commit=None)
         self.repo.shorthand_for_commit = lambda _id: _id[:6]
         self.repo.commits = self._commits
+        lcids = M.repository.RepositoryImplementation.last_commit_ids.__func__
+        self.repo.last_commit_ids = lambda *a, **k: lcids(self.repo, *a, **k)
 
     def _build_tree(self, commit, path, tree_paths):
         tree_nodes = []

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/4d8ce02d/ForgeGit/forgegit/model/git_repo.py
----------------------------------------------------------------------
diff --git a/ForgeGit/forgegit/model/git_repo.py b/ForgeGit/forgegit/model/git_repo.py
index 111c863..6360dfd 100644
--- a/ForgeGit/forgegit/model/git_repo.py
+++ b/ForgeGit/forgegit/model/git_repo.py
@@ -255,7 +255,6 @@ class GitImplementation(M.RepositoryImplementation):
                     break
             commit = commit.get_parent()
 
-
     def commits_count(self, path=None, rev=None):
         commit = self._git.commit(rev)
         return commit.count(path)

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/4d8ce02d/ForgeSVN/forgesvn/model/svn.py
----------------------------------------------------------------------
diff --git a/ForgeSVN/forgesvn/model/svn.py b/ForgeSVN/forgesvn/model/svn.py
index 0a77d89..33bac43 100644
--- a/ForgeSVN/forgesvn/model/svn.py
+++ b/ForgeSVN/forgesvn/model/svn.py
@@ -416,27 +416,8 @@ class SVNImplementation(M.RepositoryImplementation):
         log.debug('Compute tree for %d paths', len(infos))
         tree_ids = []
         blob_ids = []
-        chg_revno = infos[0][1]['last_changed_rev'].number
-        cur_revno = self._revno(commit._id)
-        commit_ids = [self._oid(revno) for revno in range(chg_revno, cur_revno+1)]
-        lcd = M.repo.LastCommit.query.get(
-                commit_ids=self._oid(chg_revno),
-                path=tree_path.strip('/'),
-            )
-        if lcd:
-            lcd.commit_ids = list(set(lcd.commit_ids + commit_ids))
-            lcd_is_new = False
-        else:
-            # we can't use the normal auto-vivification, because
-            # SVN repos don't have their diff infos filled out :(
-            lcd = M.repo.LastCommit(
-                commit_ids=commit_ids,
-                path=tree_path.strip('/'),
-            )
-            lcd_is_new = True
+        lcd_entries = []
         for path, info in infos[1:]:
-            last_commit_id = self._oid(info['last_changed_rev'].number)
-            last_commit = M.repo.Commit.query.get(_id=last_commit_id)
             if info.kind == pysvn.node_kind.dir:
                 tree_ids.append(Object(
                         id=self._tree_oid(commit._id, path),
@@ -447,26 +428,26 @@ class SVNImplementation(M.RepositoryImplementation):
                         name=path))
             else:
                 assert False
-            if lcd_is_new:
-                lcd.entries.append(dict(
-                        name=path,
-                        type='DIR' if info.kind == pysvn.node_kind.dir else 'BLOB',
-                        commit_info=last_commit.info,
-                    ))
-        session(lcd).flush(lcd)
+            lcd_entries.append(dict(
+                    name=path,
+                    commit_id=self._oid(info.last_changed_rev.number),
+                ))
         tree, is_new = RM.Tree.upsert(tree_id,
                 tree_ids=tree_ids,
                 blob_ids=blob_ids,
                 other_ids=[],
             )
         if is_new:
-            trees_doc = RM.TreesDoc.m.get(_id=commit._id)
-            if not trees_doc:
-                trees_doc = RM.TreesDoc(dict(
-                    _id=commit._id,
-                    tree_ids=[]))
-            trees_doc.tree_ids.append(tree_id)
-            trees_doc.m.save(safe=False)
+            commit_id = self._oid(infos[0][1].last_changed_rev.number)
+            path = tree_path.strip('/')
+            RM.TreesDoc.m.update_partial(
+                    {'_id': commit._id},
+                    {'$addToSet': {'tree_ids': tree_id}},
+                    upsert=True)
+            RM.LastCommitDoc.m.update_partial(
+                    {'commit_id': commit_id, 'path': path},
+                    {'commit_id': commit_id, 'path': path, 'entries': lcd_entries},
+                    upsert=True)
         return tree_id
 
     def _tree_oid(self, commit_id, path):
@@ -593,5 +574,34 @@ class SVNImplementation(M.RepositoryImplementation):
             log.info('ClientError processing commits for path %s, rev %s, treating as empty',
path, rev, exc_info=True)
             return 0
 
+    def last_commit_ids(self, commit, paths):
+        '''
+        Return a mapping {path: commit_id} of the _id of the last
+        commit to touch each path, starting from the given commit.
+
+        Since SVN Diffs are computed on-demand, we can't walk the
+        commit tree to find these.  However, we can ask SVN for it
+        with a single call, so it shouldn't be too expensive.
+
+        NB: This assumes that all paths are direct children of a
+        single common parent path (i.e., you are only asking for
+        a subset of the nodes of a single tree, one level deep).
+        '''
+        tree_path = os.path.commonprefix(paths).strip('/')
+        rev = self._revision(commit._id)
+        try:
+            infos = self._svn.info2(
+                self._url + tree_path,
+                revision=rev,
+                depth=pysvn.depth.immediates)
+        except pysvn.ClientError:
+            log.exception('Error computing tree for %s: %s(%s)',
+                          self._repo, commit, tree_path)
+            return None
+        entries = {}
+        for path, info in infos[1:]:
+            if path in paths:
+                entries[path] = self._oid(info.last_changed_rev.number)
+        return entries
 
 Mapper.compile_all()


Mime
View raw message