community-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s...@apache.org
Subject svn commit: r1704378 - in /comdev/projects.apache.org: STRUCTURE.txt scripts/README.txt scripts/committee_info.py scripts/cronjobs/parsechairs.py
Date Mon, 21 Sep 2015 18:16:20 GMT
Author: sebb
Date: Mon Sep 21 18:16:09 2015
New Revision: 1704378

URL: http://svn.apache.org/viewvc?rev=1704378&view=rev
Log:
Restore parsechairs.py but use whimsy json data instead

Added:
    comdev/projects.apache.org/scripts/committee_info.py   (with props)
    comdev/projects.apache.org/scripts/cronjobs/parsechairs.py
      - copied, changed from r1703928, comdev/projects.apache.org/scripts/cronjobs/parsechairs.py
Modified:
    comdev/projects.apache.org/STRUCTURE.txt
    comdev/projects.apache.org/scripts/README.txt

Modified: comdev/projects.apache.org/STRUCTURE.txt
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/STRUCTURE.txt?rev=1704378&r1=1704377&r2=1704378&view=diff
==============================================================================
--- comdev/projects.apache.org/STRUCTURE.txt (original)
+++ comdev/projects.apache.org/STRUCTURE.txt Mon Sep 21 18:16:09 2015
@@ -55,7 +55,7 @@ crontab -l -u www-data:
 # m h  dom mon dow   command
 00 00 * * * cd /var/www/projects.apache.org/scripts/cronjobs && python3.4 podlings.py
 00 00 * * * cd /var/www/projects.apache.org/scripts/cronjobs && python3.4 parsecommitters.py
-##00 00 * * * cd /var/www/projects.apache.org/scripts/cronjobs && python3.4 parsechairs.py
+00 00 * * * cd /var/www/projects.apache.org/scripts/cronjobs && python3.4 parsechairs.py
 00 00 * * * cd /var/www/projects.apache.org/scripts/cronjobs && python3.4 countaccounts.py
 00 00 * * * cd /var/www/projects.apache.org/scripts/cronjobs && python3.4 parsereleases.py
 

Modified: comdev/projects.apache.org/scripts/README.txt
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/README.txt?rev=1704378&r1=1704377&r2=1704378&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/README.txt (original)
+++ comdev/projects.apache.org/scripts/README.txt Mon Sep 21 18:16:09 2015
@@ -7,6 +7,10 @@ various sources:
   in:  site/json/foundation/accounts-evolution.json + ldapsearch
   out: site/json/foundation/accounts-evolution.json (updated)
 
+- parsechairs.py
+  in:  data/cache/committee-info.json (from whimsy, via committee_info module)
+  out: site/json/foundation/chairs.json - (used by reporter.a.o only)
+
 - parsecommitters.py: Fetches and parses the committer (LDAP) list via
   people.apache.org.
   in: http://people.apache.org/committer-index.html

Added: comdev/projects.apache.org/scripts/committee_info.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/committee_info.py?rev=1704378&view=auto
==============================================================================
--- comdev/projects.apache.org/scripts/committee_info.py (added)
+++ comdev/projects.apache.org/scripts/committee_info.py Mon Sep 21 18:16:09 2015
@@ -0,0 +1,115 @@
+"""
+
+Module to give access to data from committee-info.json
+
+This module acts as the gatekeeper for all access to committee-info.json
+which is cached from https://whimsy.apache.org/public/committee-info.json
+
+"""
+
+import sys
+if sys.hexversion < 0x03000000:
+    raise ImportError("This script requires Python 3")
+import os
+from os.path import dirname, abspath, join
+from inspect import getsourcefile
+import urllib.request
+import time
+import calendar
+import json
+
+MYHOME = dirname(abspath(getsourcefile(lambda:0))) # automatically work out home location
so can run the code anywhere
+print(MYHOME)
+# we assume that this script is located one level below the top
+COMDEV_HOME=dirname(MYHOME)
+print(COMDEV_HOME)
+CACHE_DIR=join(COMDEV_HOME,'data','cache')
+print(CACHE_DIR)
+URL='https://whimsy.apache.org/public/committee-info.json'
+NAME='committee-info.json'
+FILE=CACHE_DIR+NAME
+INTERVAL=300 # code won't recheck for updated HTTP file until this number of seconds has
elapsed
+
+# time format used in Last-Modified/If-Modified-Since HTTP headers
+HTTP_TIME_FORMAT = '%a, %d %b %Y %H:%M:%S GMT'
+
+# get file mod date in suitable format for If-Modified-Since
+def mod_date(t):
+    return time.strftime(HTTP_TIME_FORMAT, time.gmtime(t))
+
+# get file mod_date
+def file_mtime(filename):
+    try:
+        t = os.path.getmtime(filename)
+    except FileNotFoundError:
+        t = 0
+    return t
+
+# download url as file if the cached copy is too old
+def get_url_if_newer(url, dir, name):
+    path=join(dir,name)
+    fileTime = file_mtime(path)
+    check = dir+".checked_"+name
+    if fileTime > 0:
+        checkTime = file_mtime(check)
+        now = time.time()
+        if checkTime > (now - INTERVAL):
+            print("Recently checked %d\n%d\n%d, skip check" % (INTERVAL, checkTime, now))
+            return
+        else:
+            print("Not recently checked\n%d\n%d" % (checkTime, now))
+    else:
+        print("Not found")
+
+    sinceTime = mod_date(fileTime)
+    headers = {"If-Modified-Since" : sinceTime}
+    
+    req = urllib.request.Request(URL, headers=headers)
+    try:
+        response = urllib.request.urlopen(req)
+        lastMod = response.headers['Last-Modified']
+        lastModT = calendar.timegm(time.strptime(lastMod, HTTP_TIME_FORMAT))
+        outFile = path + ".tmp"
+        with open(outFile,'wb') as f:
+            f.write(response.read())
+            f.close()
+        
+        # store the last mod time as the time of the file
+        os.utime(outFile, times=(lastModT, lastModT))
+        os.rename(outFile, path) # seems to preserve file mod time
+        print("Downloaded new version of %s " % path)
+    except urllib.error.HTTPError as err:
+        if not err.code == 304:
+            raise
+        else:
+            print("Cached copy of %s is up to date" % path)
+
+    with open(check,'a'):
+        os.utime(check, None) # touch the marker file
+
+def update_cache():
+    get_url_if_newer(URL, CACHE_DIR, NAME)
+
+def chairs():
+
+    update_cache()
+
+    with open(FILE, "r", encoding='utf-8') as f:
+        cidata = json.loads(f.read())
+        f.close()
+
+    committees = cidata['committees']
+
+    chairjson={}
+    for ctte in committees:    
+        c = committees[ctte]
+        if not c['pmc']:
+            continue
+        chs = c['chair']
+        ch = None
+        for ch in chs: # allow for multiple chairs
+            break
+        name = 'Apache %s' % c['display_name']
+        chairjson[name] = chs[ch]['name']
+
+    return chairjson
\ No newline at end of file

Propchange: comdev/projects.apache.org/scripts/committee_info.py
------------------------------------------------------------------------------
    svn:eol-style = native

Copied: comdev/projects.apache.org/scripts/cronjobs/parsechairs.py (from r1703928, comdev/projects.apache.org/scripts/cronjobs/parsechairs.py)
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/cronjobs/parsechairs.py?p2=comdev/projects.apache.org/scripts/cronjobs/parsechairs.py&p1=comdev/projects.apache.org/scripts/cronjobs/parsechairs.py&r1=1703928&r2=1704378&rev=1704378&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/cronjobs/parsechairs.py (original)
+++ comdev/projects.apache.org/scripts/cronjobs/parsechairs.py Mon Sep 21 18:16:09 2015
@@ -1,41 +1,21 @@
-import re, urllib.request
-import csv
+import sys
+sys.path.append("..") # module committee_info is in parent directory
+import committee_info
 import json
-import os
 
 """
-Reads http://www.apache.org/foundation/
+Reads committee-info.json via committee_info module
+
 Creates:
 ../../site/json/foundation/chairs.json
 
-TODO replace this by parsing committee-info.txt when access is granted
-
 """
 
-chairs = {}
-
-renames = {
-    'Apache APR': 'Apache Portable Runtime'
-}
-
-data = urllib.request.urlopen("http://www.apache.org/foundation/").read().decode('utf-8')
-x = 0
-
-for committer in re.findall(r"<tr>[\s\S]+?V\.P\., Apache [\s\S]+?</tr>", data,
re.MULTILINE | re.UNICODE):
-    x += 1
-    #print(committer)
-    m = re.search(r"<td>V.P., (Apache [\s\S]+?)</td>[\s\S]*?<td>([\s\S]+?)</td>",
committer, re.MULTILINE | re.UNICODE)
-    if m:
-        project = m.group(1)
-        if project in renames:
-            project = renames[project]
-        person = m.group(2)
-        chairs[project] = person
-
+chairs = committee_info.chairs()
 
 print("Writing chairs.json")
-with open("../../site/json/foundation/chairs.json", "w") as f:
-    f.write(json.dumps(chairs, sort_keys=True, indent=0))
+with open("../../site/json/foundation/chairs.json", "w", encoding='utf-8') as f:
+    json.dump(chairs, f, sort_keys = True, indent=0, ensure_ascii=False)
     f.close()
 
 print("All done!")
\ No newline at end of file



Mime
View raw message