Return-Path: X-Original-To: apmail-incubator-allura-commits-archive@minotaur.apache.org Delivered-To: apmail-incubator-allura-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id AFA7310385 for ; Thu, 19 Sep 2013 16:21:50 +0000 (UTC) Received: (qmail 22256 invoked by uid 500); 19 Sep 2013 16:21:45 -0000 Delivered-To: apmail-incubator-allura-commits-archive@incubator.apache.org Received: (qmail 22157 invoked by uid 500); 19 Sep 2013 16:21:44 -0000 Mailing-List: contact allura-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: allura-dev@incubator.apache.org Delivered-To: mailing list allura-commits@incubator.apache.org Received: (qmail 21584 invoked by uid 99); 19 Sep 2013 16:21:34 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 19 Sep 2013 16:21:34 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 9E541887C66; Thu, 19 Sep 2013 16:21:33 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: brondsem@apache.org To: allura-commits@incubator.apache.org Date: Thu, 19 Sep 2013 16:21:58 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [26/41] git commit: [#6535] ticket:424 Handle pagination [#6535] ticket:424 Handle pagination Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/37751659 Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/37751659 Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/37751659 Branch: refs/heads/db/5822 Commit: 37751659315b4300b27bbd2be0e544991ab42954 Parents: a2833b3 Author: Igor Bondarenko Authored: Tue Sep 17 12:35:11 2013 +0300 Committer: Dave Brondsema Committed: Thu Sep 19 14:46:51 2013 +0000 ---------------------------------------------------------------------- .../forgeimporters/github/__init__.py | 25 ++++++++++- .../tests/github/test_extractor.py | 47 +++++++++++++++----- 2 files changed, 58 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/37751659/ForgeImporters/forgeimporters/github/__init__.py ---------------------------------------------------------------------- diff --git a/ForgeImporters/forgeimporters/github/__init__.py b/ForgeImporters/forgeimporters/github/__init__.py index 438fc8a..35bbff7 100644 --- a/ForgeImporters/forgeimporters/github/__init__.py +++ b/ForgeImporters/forgeimporters/github/__init__.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +import re import logging import json import urllib @@ -31,9 +32,29 @@ class GitHubProjectExtractor(base.ProjectExtractor): 'issues': 'https://api.github.com/repos/{project_name}/issues', } POSSIBLE_STATES = ('opened', 'closed') + NEXT_PAGE_URL_RE = re.compile(r'<([^>]*)>; rel="next"') + + def get_next_page_url(self, link): + if not link: + return + m = self.NEXT_PAGE_URL_RE.match(link) + return m.group(1) if m else None def parse_page(self, page): - return json.loads(page.read().decode('utf8')) + # Look at link header to handle pagination + link = page.info().get('Link') + next_page_url = self.get_next_page_url(link) + return json.loads(page.read().decode('utf8')), next_page_url + + def get_page(self, page_name_or_url, **kw): + page = super(GitHubProjectExtractor, self).get_page(page_name_or_url, **kw) + page, next_page_url = page + while next_page_url: + p = super(GitHubProjectExtractor, self).get_page(next_page_url, **kw) + p, next_page_url = p + page += p + self.page = page + return self.page def get_summary(self): return self.get_page('project_info').get('description') @@ -52,7 +73,7 @@ class GitHubProjectExtractor(base.ProjectExtractor): issue_list_url = url.format( state=state, ) - issues += json.loads(self.urlopen(issue_list_url).read().decode('utf8')) + issues += self.get_page(issue_list_url) issues.sort(key=lambda x: x['number']) for issue in issues: yield (issue['number'], issue) http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/37751659/ForgeImporters/forgeimporters/tests/github/test_extractor.py ---------------------------------------------------------------------- diff --git a/ForgeImporters/forgeimporters/tests/github/test_extractor.py b/ForgeImporters/forgeimporters/tests/github/test_extractor.py index 8eb5811..c7734fa 100644 --- a/ForgeImporters/forgeimporters/tests/github/test_extractor.py +++ b/ForgeImporters/forgeimporters/tests/github/test_extractor.py @@ -20,12 +20,9 @@ from unittest import TestCase from ... import github -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - - +# Can't use cStringIO here, because we cannot set attributes or subclass it, +# and this is needed in mocked_urlopen below +from StringIO import StringIO class TestGitHubProjectExtractor(TestCase): @@ -39,23 +36,48 @@ class TestGitHubProjectExtractor(TestCase): ] OPENED_ISSUES_LIST = [ {u'number': 3}, + {u'number': 4}, + {u'number': 5}, + ] + OPENED_ISSUES_LIST_PAGE2 = [ + {u'number': 6}, + {u'number': 7}, + {u'number': 8}, ] ISSUE_COMMENTS = [u'hello', u'mocked_comment'] + ISSUE_COMMENTS_PAGE2 = [u'hello2', u'mocked_comment2'] def mocked_urlopen(self, url): + headers = {} if url.endswith('/test_project'): - return StringIO(json.dumps(self.PROJECT_INFO)) + response = StringIO(json.dumps(self.PROJECT_INFO)) elif url.endswith('/issues?state=closed'): - return StringIO(json.dumps(self.CLOSED_ISSUES_LIST)) + response = StringIO(json.dumps(self.CLOSED_ISSUES_LIST)) elif url.endswith('/issues?state=opened'): - return StringIO(json.dumps(self.OPENED_ISSUES_LIST)) + response = StringIO(json.dumps(self.OPENED_ISSUES_LIST)) + headers = {'Link': '; rel="next"'} + elif url.endswith('/issues?state=opened&page=2'): + response = StringIO(json.dumps(self.OPENED_ISSUES_LIST_PAGE2)) elif url.endswith('/comments'): - return StringIO(json.dumps(self.ISSUE_COMMENTS)) + response = StringIO(json.dumps(self.ISSUE_COMMENTS)) + headers = {'Link': '; rel="next"'} + elif url.endswith('/comments?page=2'): + response = StringIO(json.dumps(self.ISSUE_COMMENTS_PAGE2)) + + response.info = lambda: headers + return response def setUp(self): self.extractor = github.GitHubProjectExtractor('test_project') self.extractor.urlopen = self.mocked_urlopen + def test_get_next_page_url(self): + self.assertIsNone(self.extractor.get_next_page_url(None)) + self.assertIsNone(self.extractor.get_next_page_url('')) + link = '; rel="next", ; rel="last"' + self.assertEqual(self.extractor.get_next_page_url(link), + 'https://api.github.com/repositories/8560576/issues?state=open&page=2') + def test_get_summary(self): self.assertEqual(self.extractor.get_summary(), 'project description') @@ -65,10 +87,11 @@ class TestGitHubProjectExtractor(TestCase): def test_iter_issues(self): issues = list(self.extractor.iter_issues()) all_issues = zip((1,2), self.CLOSED_ISSUES_LIST) - all_issues.append((3, self.OPENED_ISSUES_LIST[0])) + all_issues += zip((3, 4, 5), self.OPENED_ISSUES_LIST) + all_issues += zip((6, 7, 8), self.OPENED_ISSUES_LIST_PAGE2) self.assertEqual(issues, all_issues) def test_iter_comments(self): mock_issue = {'comments_url': '/issues/1/comments'} comments = list(self.extractor.iter_comments(mock_issue)) - self.assertEqual(comments, self.ISSUE_COMMENTS) + self.assertEqual(comments, self.ISSUE_COMMENTS + self.ISSUE_COMMENTS_PAGE2)