labs-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a..@apache.org
Subject svn commit: r1604694 - in /labs/panopticon/pan-utils: requirements.txt src/asf/data/releases.py src/asf/utils/test.py tests/test_releases.py
Date Mon, 23 Jun 2014 05:17:16 GMT
Author: adc
Date: Mon Jun 23 05:17:16 2014
New Revision: 1604694

URL: http://svn.apache.org/r1604694
Log:
Added some release checking utilities

Added:
    labs/panopticon/pan-utils/src/asf/data/releases.py
    labs/panopticon/pan-utils/tests/test_releases.py
Modified:
    labs/panopticon/pan-utils/requirements.txt
    labs/panopticon/pan-utils/src/asf/utils/test.py

Modified: labs/panopticon/pan-utils/requirements.txt
URL: http://svn.apache.org/viewvc/labs/panopticon/pan-utils/requirements.txt?rev=1604694&r1=1604693&r2=1604694&view=diff
==============================================================================
--- labs/panopticon/pan-utils/requirements.txt (original)
+++ labs/panopticon/pan-utils/requirements.txt Mon Jun 23 05:17:16 2014
@@ -1,5 +1,7 @@
-brownie
+beautifulsoup4==4.3.2
+brownie==0.5.1
 keyring==1.6.1
-PyCrypto
-python-ldap
-restkit
+python-gnupg==0.3.6
+pycrypto==2.6.1
+python-ldap==2.4.15
+restkit==4.2.2
\ No newline at end of file

Added: labs/panopticon/pan-utils/src/asf/data/releases.py
URL: http://svn.apache.org/viewvc/labs/panopticon/pan-utils/src/asf/data/releases.py?rev=1604694&view=auto
==============================================================================
--- labs/panopticon/pan-utils/src/asf/data/releases.py (added)
+++ labs/panopticon/pan-utils/src/asf/data/releases.py Mon Jun 23 05:17:16 2014
@@ -0,0 +1,176 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import collections
+import hashlib
+import logging
+import os
+import urlparse
+
+import bs4
+import gnupg
+import restkit
+
+from asf.utils.file import temp_directory
+
+
+log = logging.getLogger(__name__)
+
+
+def scrape_release_url(release_url, ignore=None):
+    """ Recursively scrape file listing HTML pages for resources that are part of a release
+    :param str release_url: the root URL from which to start recursively scraping
+    :param set ignore: set of URLs to ignore when scraping
+    :return: a dictionary of paths and files within those paths
+
+    .. code-block:: json
+
+        {
+          'path': {
+            'filename' : 'url'
+            ...
+          }
+        }
+    """
+    original_len = len(release_url) - 1
+
+    ignore = (ignore or set()) | set(['../'])
+
+    def _scrape(scanning_url):
+        log.debug('scraping %s', scanning_url)
+        request = restkit.request(scanning_url, follow_redirect=True)
+        soup = bs4.BeautifulSoup(request.body_string())
+        resources = collections.defaultdict(dict)
+
+        for link in soup.find_all('a'):
+            href = link.get('href')
+            if href in ignore:
+                log.debug('ignored href %s in %s', href, scanning_url)
+                continue
+
+            if href.endswith('/'):
+                resources.update(_scrape(link.get('href')))
+            else:
+                text = link.get_text()
+                resources[href[original_len:-len(text) - 1]][text] = urlparse.urljoin(scanning_url,
href)
+
+        return resources
+
+    return _scrape(release_url)
+
+
+BLOCK_SIZE = 65536
+
+
+def verify_hash(resource_path, hash_path, algorithm):
+    """ Verify that the hash of a resource matches the hash stored in the hash file
+    :param str resource_path: the location of the resource file to hash
+    :param str hash_path: the location of the hash file
+    :param str algorithm: the algorithm to use to hash the resource file
+    :return: True if the hash of the resource matches the hash stored in the hash file
+    """
+    resource_file = resource_path.split('/')[-1:][0]
+    with open(hash_path, 'r') as f:
+        reported_hash = f.readline()
+        if reported_hash.startswith(resource_file + ':'):
+            # sometimes hash files contain the name of the file suffixed with a colon
+            # we need to remove that bit and coalesce the hex digits in the file
+            reported_hash = reported_hash[len(resource_file) + 1:].strip()
+            line = f.readline()
+            while line:
+                reported_hash = reported_hash + line.strip()
+                line = f.readline()
+            reported_hash = reported_hash.replace(' ', '')
+
+    hasher = hashlib.new(algorithm)
+    with open(resource_path, 'rb') as f:
+        buf = f.read(BLOCK_SIZE)
+        while len(buf) > 0:
+            hasher.update(buf)
+            buf = f.read(BLOCK_SIZE)
+
+    return reported_hash.strip().lower() == hasher.hexdigest().strip().lower()
+
+
+def verify_signature(resource_file, signature_file, gpg):
+    """ Verify signed file
+    :param unicode resource_file: the location of the signed resource file
+    :param unicode signature_file: the location of the signature file
+    :param gpg: GPG instance used to verify signature
+    :return: True if signature is valid and False otherwise
+    """
+    with open(signature_file, 'r') as f:
+        result = gpg.verify_file(f, resource_file)
+        return result.valid
+
+
+def verify_hashes(resources_url, keys_url, allowed_algorithms=None):
+    """ Verify the declared hashes of resources that are stored at a particular URL
+
+    The declared hashes of the resources will be in the same "directory" but have the
+    algorithm suffixed at the end.
+
+    :param str resources_url: the URL used to locate the resources
+    :param str keys_url: the URL used to locate the KEYS file that are used to verify signatures
+    :param set allowed_algorithms: the set of allowed algorithms to use to hash, default:
sha1 and md5
+    :return bool: True if the files have the same hashes an False otherwise
+    """
+    resources = scrape_release_url(resources_url, ignore=set(['http://subversion.apache.org/']))
+
+    if not resources:
+        return False
+
+    keys_data = ''
+    for chunk in restkit.request(keys_url, follow_redirect=True).tee():
+        keys_data += chunk
+
+    with temp_directory() as temp_gpg_dir:
+        gpg = gnupg.GPG(gnupghome=temp_gpg_dir)
+        gpg.import_keys(keys_data)
+
+        allowed_algorithms = allowed_algorithms or set(['sha1', 'md5'])
+        for path, files in resources.iteritems():
+            with temp_directory() as temp_dir:
+
+                # download resources
+                for resource, resources_url in files.iteritems():
+                    with open(os.path.join(temp_dir, resource), 'wb') as f:
+                        for chunk in restkit.request(resources_url, follow_redirect=True).tee():
+                            f.write(chunk)
+
+                # verify hashes of resources
+                for resource in files.keys():
+                    if resource.split('.')[-1:][0] not in allowed_algorithms:
+                        resource_file = os.path.join(temp_dir, resource)
+                        for algorithm in allowed_algorithms:
+                            hash_file = resource_file + '.' + algorithm
+                            if os.path.exists(hash_file):
+                                if not verify_hash(resource_file, hash_file, algorithm):
+                                    log.warning('Resource %s does not match reported %s hash',
path + '/' + resource, algorithm)
+                                    return False
+
+                # verify signatures
+                for resource in files.keys():
+                    resource_file = os.path.join(temp_dir, resource)
+                    signature_file = resource_file + '.asc'
+                    if os.path.exists(signature_file):
+                        if not verify_signature(resource_file, signature_file, gpg):
+                            log.warning('Resource %s signature does not verify correctly',
path + '/' + resource)
+                            return False
+
+    return True

Modified: labs/panopticon/pan-utils/src/asf/utils/test.py
URL: http://svn.apache.org/viewvc/labs/panopticon/pan-utils/src/asf/utils/test.py?rev=1604694&r1=1604693&r2=1604694&view=diff
==============================================================================
--- labs/panopticon/pan-utils/src/asf/utils/test.py (original)
+++ labs/panopticon/pan-utils/src/asf/utils/test.py Mon Jun 23 05:17:16 2014
@@ -16,11 +16,12 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-import pytest
+import gnupg
 
+import pytest
 from asf.data import ldap
-
 from asf.utils.auth import get_stored_credentials
+from asf.utils.file import temp_directory
 
 
 def test_credentials_stored():
@@ -42,3 +43,16 @@ def test_ldap():
 
 ensure_ldap = pytest.mark.skipif(test_ldap(),
                                  reason="Need to ensure that LDAP is available")
+
+
+def test_gpg():
+    try:
+        with temp_directory() as temp_gpg_dir:
+            gnupg.GPG(gnupghome=temp_gpg_dir)
+        return False
+    except Exception:
+        return True
+
+
+ensure_gpg = pytest.mark.skipif(test_gpg(),
+                                reason="Need to ensure that gpg is available")

Added: labs/panopticon/pan-utils/tests/test_releases.py
URL: http://svn.apache.org/viewvc/labs/panopticon/pan-utils/tests/test_releases.py?rev=1604694&view=auto
==============================================================================
--- labs/panopticon/pan-utils/tests/test_releases.py (added)
+++ labs/panopticon/pan-utils/tests/test_releases.py Mon Jun 23 05:17:16 2014
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+from asf.data import releases
+from asf.utils.test import ensure_gpg
+
+
+@ensure_gpg
+def test_scrape_release_url():
+    assert releases.verify_hashes('https://repository.apache.org/content/repositories/orgapachemrql-1001/',
'http://www.apache.org/dist/incubator/mrql/KEYS')
+    assert releases.verify_hashes('https://dist.apache.org/repos/dist/dev/incubator/mrql/0.9.2-incubating-RC2/',
'http://www.apache.org/dist/incubator/mrql/KEYS')
+
+    assert releases.verify_hashes('https://people.apache.org/~ptgoetz/storm-0.9.2-incubating/',
'https://git-wip-us.apache.org/repos/asf?p=incubator-storm.git;a=blob_plain;f=KEYS;hb=22b832708295fa2c15c4f3c70ac0d2bc6fded4bd')
+    assert releases.verify_hashes('https://repository.apache.org/content/repositories/orgapachestorm-1008/',
'https://git-wip-us.apache.org/repos/asf?p=incubator-storm.git;a=blob_plain;f=KEYS;hb=22b832708295fa2c15c4f3c70ac0d2bc6fded4bd')
+



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org


Mime
View raw message