community-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s...@apache.org
Subject svn commit: r1726997 - in /comdev/projects.apache.org: scripts/README.txt scripts/cronjobs/parsecommitters.py site/json/foundation/HEADER.html
Date Wed, 27 Jan 2016 11:20:47 GMT
Author: sebb
Date: Wed Jan 27 11:20:47 2016
New Revision: 1726997

URL: http://svn.apache.org/viewvc?rev=1726997&view=rev
Log:
Finally abandon parsing http://people.apache.org/committer-index.html

Modified:
    comdev/projects.apache.org/scripts/README.txt
    comdev/projects.apache.org/scripts/cronjobs/parsecommitters.py
    comdev/projects.apache.org/site/json/foundation/HEADER.html

Modified: comdev/projects.apache.org/scripts/README.txt
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/README.txt?rev=1726997&r1=1726996&r2=1726997&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/README.txt (original)
+++ comdev/projects.apache.org/scripts/README.txt Wed Jan 27 11:20:47 2016
@@ -22,9 +22,13 @@ various sources:
      + site/json/foundation/committees-retired.json (updated)
      + site/json/foundation/pmcs.json - (used by reporter.a.o only)
 
-- parsecommitters.py: Fetches and parses the committer (LDAP) list via
-  people.apache.org.
-  in: http://people.apache.org/committer-index.html
+- parsecommitters.py: extracts the committer & group details as follows:
+  in: https://whimsy.apache.org/public/member_info.json
+    + https://whimsy.apache.org/public/public_ldap_committees.json
+    + https://whimsy.apache.org/public/public_ldap_groups.json
+    + https://whimsy.apache.org/public/public_ldap_people.json
+    + https://whimsy.apache.org/public/public_nonldap_groups.json
+
   out: site/json/foundation/people.json - committers with reference to groups
      + site/json/foundation/people_name.json - converts availid to Public Name
      + site/json/foundation/groups.json - groups with corresponding committers

Modified: comdev/projects.apache.org/scripts/cronjobs/parsecommitters.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/cronjobs/parsecommitters.py?rev=1726997&r1=1726996&r2=1726997&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/cronjobs/parsecommitters.py (original)
+++ comdev/projects.apache.org/scripts/cronjobs/parsecommitters.py Wed Jan 27 11:20:47 2016
@@ -1,48 +1,83 @@
 import errtee
 """
 
-Reads http://people.apache.org/committer-index.html and creates
+Reads:
+https://whimsy.apache.org/public/member_info.json
+https://whimsy.apache.org/public/public_ldap_committees.json
+https://whimsy.apache.org/public/public_ldap_groups.json
+https://whimsy.apache.org/public/public_ldap_people.json
+https://whimsy.apache.org/public/public_nonldap_groups.json
+
+Creates:
+
 ../../site/json/foundation/people.json
+- key: availid
+  value: dict => {'groups': array, 'member': true/false, 'name': public name }
+
 ../../site/json/foundation/people_name.json
+
 ../../site/json/foundation/groups.json
+- key: group name (-pmc suffix if relevant), value: array of availids
+(partial inverse of people.json)
 
 """
 
-import re
-import urllib.request
+import io
 import json
-import urlutils
-fc = urlutils.UrlCache(interval=600)
+from urllib.request import urlopen, Request
+from urllib.error import HTTPError
 
-people = {}
-people_name = {}
-groups = {}
-data = fc.get("http://people.apache.org/committer-index.html","committer-index.html", encoding='utf-8').read()
-#data = urllib.request.urlopen("http://people.apache.org/committer-index.html").read().decode('utf-8')
-
-for committer in re.findall(r"<tr>([\S\s]+?)</tr>", data, re.MULTILINE | re.UNICODE):
-##    print(committer)
-    m = re.search(r"<a id='(.+?)'>[\s\S]+?<td.+?>\s*(.+?)</td>[\s\S]+?>(.+)</td>",
committer, re.MULTILINE | re.UNICODE)
-    if m:
-        cid = m.group(1)
-        cname = re.sub(r"<.+?>", "", m.group(2), 4)
-        if '|' in cname:
-            cname = cname[:cname.index('|')]
-        groupsString = m.group(3)
-        cgroups = []
-        isMember = False
-        if re.search(r"<b", committer, re.MULTILINE | re.UNICODE):
-            isMember = True
-        for group in re.findall(r"#([-a-z0-9._]+)", groupsString):
-            cgroups.append(group)
-            groups[group] = groups[group] if group in groups else []
-            groups[group].append(cid)
-        people[cid] = {
-            'name':  cname,
-            'member': isMember,
-            'groups': cgroups
-        }
-        people_name[cid] = cname
+def loadJson(url):
+    print("Reading " +url)
+    resp = io.TextIOWrapper(urlopen(url), encoding='utf-8', errors=None)
+    j = json.load(resp)
+    resp.close()
+    return j
+
+people = {} # key: availid, value: array of groups to which the id belongs
+groups = {} # key: group name (-pmc suffix if relevant), value: array of availids
+people_name = {} # key: id, value: public name
+
+def addPersonGroup(p):
+    # only add people to the name list if they are referenced
+    if not p in people_name:
+        people_name[p] = ldappeople[p]['name']
+    if not p in people:
+        people[p] = {'groups':[],
+                     'member' : p in memberinfo,
+                     'name': ldappeople[p]['name'] # use full list as disabled entries may
be referenced
+                     }
+    return people[p]['groups']
+
+# must be done first so the name can be used
+ldappeople = loadJson('https://whimsy.apache.org/public/public_ldap_people.json')['people']
+# Membership details also needed above
+memberinfo = loadJson('https://whimsy-test.apache.org/public/member-info.json')['members']
+
+# load the other required files
+ldapgroups = loadJson('https://whimsy.apache.org/public/public_ldap_groups.json')['groups']
+ldapcttees = loadJson('https://whimsy.apache.org/public/public_ldap_committees.json')['committees']
+nonldapgroups = loadJson('https://whimsy.apache.org/public/public_nonldap_groups.json')['groups']
+
+for g in nonldapgroups:
+    groups[g] = nonldapgroups[g]
+    for p in nonldapgroups[g]:
+        addPersonGroup(p).append(g)
+
+for g in ldapgroups:
+    if not g == 'committers':
+        groups[g] = ldapgroups[g]['roster']
+        for p in ldapgroups[g]['roster']:
+            addPersonGroup(p).append(g)
+
+for g in ldapcttees:
+    groups[g+'-pmc'] = ldapcttees[g]['roster']
+    for p in ldapcttees[g]['roster']:
+        addPersonGroup(p).append(g+'-pmc')
+    
+# Now sort the groups arrays
+for p in people:
+    people[p]['groups'].sort()
 
 # Use utf-8 encoding for the file contents
 print("Writing people.json")
@@ -56,6 +91,8 @@ with open("../../site/json/foundation/pe
     f.close()
 
 print("Writing groups.json")
+for g in groups:
+    groups[g] = sorted(groups[g])
 with open("../../site/json/foundation/groups.json", "w", encoding='utf-8') as f:
     json.dump(groups, f, sort_keys=True, indent=0, ensure_ascii=False)
     f.close()

Modified: comdev/projects.apache.org/site/json/foundation/HEADER.html
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/site/json/foundation/HEADER.html?rev=1726997&r1=1726996&r2=1726997&view=diff
==============================================================================
--- comdev/projects.apache.org/site/json/foundation/HEADER.html (original)
+++ comdev/projects.apache.org/site/json/foundation/HEADER.html Wed Jan 27 11:20:47 2016
@@ -4,7 +4,7 @@ see <code><a href="http://svn.apache.org
 for more information.
 <ul>
 <li><a href="accounts-evolution.json"><code>accounts-evolution.json</code></a>
generated by <code>cronjobs/countaccounts.py</code> from LDAP</li>
-<li><a href="groups.json"><code>groups.json</code></a>, <a
href="people.json"><code>people.json</code></a> and <a href="people_name.json"><code>people_name.json</code></a>
generated by <code>cronjobs/parsecommitters.py</code> from <a href="http://people.apache.org/committer-index.html">http://people.apache.org/committer-index.html</a></li>
+<li><a href="groups.json"><code>groups.json</code></a>, <a
href="people.json"><code>people.json</code></a> and <a href="people_name.json"><code>people_name.json</code></a>
generated by <code>cronjobs/parsecommitters.py</code> from <a href="https://whimsy.apache.org/public/">https://whimsy.apache.org/public/</a></li>
 <li><a href="podlings.json"><code>podlings.json</code></a>
and <a href="podlings-history.json"><code>podlings-history.json</code></a>
generated by <code>cronjobs/podlings.py</code> from <a href="http://incubator.apache.org/podlings.xml">http://incubator.apache.org/podlings.xml</a></li>
 <li><a href="projects.json"><code>projects.json</code></a>
imported with <code>import/rdfparse.py</code> from <a href='https://svn.apache.org/repos/asf/infrastructure/site-tools/trunk/projects/files.xml'>projects'
DOAP files</a></li>
 <li><a href="releases.json"><code>releases.json</code></a>
generated by <code>cronjobs/parsereleases.py</code> from <a href="http://www.apache.org/dist/">http://www.apache.org/dist/</a></li>



Mime
View raw message