community-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s...@apache.org
Subject svn commit: r1716949 - /comdev/projects.apache.org/scripts/cronjobs/parsereleases.py
Date Sat, 28 Nov 2015 12:07:21 GMT
Author: sebb
Date: Sat Nov 28 12:07:20 2015
New Revision: 1716949

URL: http://svn.apache.org/viewvc?rev=1716949&view=rev
Log:
Docs

Modified:
    comdev/projects.apache.org/scripts/cronjobs/parsereleases.py

Modified: comdev/projects.apache.org/scripts/cronjobs/parsereleases.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/cronjobs/parsereleases.py?rev=1716949&r1=1716948&r2=1716949&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/cronjobs/parsereleases.py (original)
+++ comdev/projects.apache.org/scripts/cronjobs/parsereleases.py Sat Nov 28 12:07:20 2015
@@ -8,12 +8,23 @@ Reads the list of files in http://www.ap
 
 Creates:
 ../../site/json/foundation/releases.json
+Format:
+{ top-level dir: { release-id: date}, ... }
+
+The release id is derived from the filename by removing common suffixes etc, see cleanFilename()
+The date comes from the first entry
+
 ../../site/json/foundation/releases-files.json
+Format:
+{ top-level dir: { release-id: [list of files for that release-id]}, ... }
 
 TODO: it would probably be more efficient to parse the output of
 svn ls -R https://dist.apache.org/repos/dist/release/
 Could cache the output based on the last changed date
 
+Or use an rsync listing:
+rsync --list-only -r rsync.apache.org::apache-dist
+
 """
 
 releases = {}
@@ -39,6 +50,12 @@ def getDirList(url):
         pass
 
 def cleanFilename(filename):
+    """
+        Attempts to determine the release id to which a file belongs
+        Strips extensions such as .tgz etc, then suffixes such as -sources
+        Replaces qualifiers such as -assembly-, -parent- by '-'
+        Returns the simplified filename .
+    """
     for suffix in ['.tgz', '.gz', '.bz2', '.xz', '.zip', '.rar', '.tar', 'tar', '.deb', '.rpm',
'.dmg', '.egg', '.gem', '.pom', '.war', '.exe',
                    '-scala2.11', '-cdh4', '-hadoop1', '-hadoop2', '-hadoop2.3', '-hadoop2.4',
'-all',
                    '-src', '_src', '.src', '-sources', '_sources', '-source', '-bin', '-dist',



Mime
View raw message