community-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s...@apache.org
Subject svn commit: r1690547 - in /comdev/projects.apache.org/scripts/import: parsecommittees.py parseprojects.py
Date Sun, 12 Jul 2015 22:58:51 GMT
Author: sebb
Date: Sun Jul 12 22:58:51 2015
New Revision: 1690547

URL: http://svn.apache.org/r1690547
Log:
EOL

Modified:
    comdev/projects.apache.org/scripts/import/parsecommittees.py   (contents, props changed)
    comdev/projects.apache.org/scripts/import/parseprojects.py   (contents, props changed)

Modified: comdev/projects.apache.org/scripts/import/parsecommittees.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/import/parsecommittees.py?rev=1690547&r1=1690546&r2=1690547&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/import/parsecommittees.py (original)
+++ comdev/projects.apache.org/scripts/import/parsecommittees.py Sun Jul 12 22:58:51 2015
@@ -1,342 +1,342 @@
-import re
-import json
-import sys
-import io
-import os
-import urllib.request
-import xml.etree.ElementTree as ET
-import xml.dom.minidom as minidom
-import datetime
-
-"""
-Reads:
-../../site/json/foundation/people.json
-../../data/committees.xml
-../../data/board/committee-info.txt
-../../site/json/foundation/chairs.json (cross-check)
-
-Updates:
-../../site/json/foundation/committees.json
-../../site/json/foundation/committees-retired.json
-
-Writes:
-../../site/json/foundation/pmcs.json
-"""
-
-# Committee names from committees-info.txt that do not match committees-evolution.json
-renamesCommittee2Json = {
-    'Apache APR': 'Apache Portable Runtime',
-    'Apache Perl': 'Apache mod_perl'
-}
-# Committee names from http://www.apache.org/foundation/ that do not match committees-evolution.json
-renamesChairs2Json = {
-    'Apache Logging Services': 'Apache Logging',
-    'Apache Perl': 'Apache mod_perl'
-}
-# committee ids not matching committee name in lowercase
-committeeIds = {
-    'Community Development': 'comdev',
-    'HTTP Server': 'httpd',
-    'Lucene.Net': 'lucenenet',
-    'Open Climate Workbench': 'climate'
-}
-# LDAP group ids not matching committee id
-group_ids = {
-    'webservices': 'ws'
-}
-# homepages not matching http://<committee id>.apache.org/
-homepages = {
-    'comdev': 'http://community.apache.org/',
-    'httpcomponents': 'http://hc.apache.org/',
-    'whimsy': 'http://whimsical.apache.org'
-}
-# short description for non-classical committees, that are not listed in http://www.apache.org/#projects-list
-shortdescs = {
-    'attic': 'A home for dormant projects',
-    'comdev': 'Ressources to help people become involved with Apache projects',
-    'incubator': "Entry path into The Apache Software Foundation (ASF) for projects and codebases
wishing to become part of the Foundation's efforts",
-    'labs': 'A place for innovation where committers of the foundation can experiment with
new ideas'
-}
-
-with open("../../site/json/foundation/people.json", "r") as f:
-    people = json.loads(f.read())
-    f.close()
-
-def handleChild(el):
-    retval = None
-    hasKids = False
-    for child in list(el):
-        hasKids = True
-    attribs = {}
-    for key in el.attrib:
-        xkey = re.sub(r"\{.+\}", "", key)
-        attribs[xkey] = el.attrib[key]
-    tag = re.sub(r"\{.+\}", "", el.tag)
-    value = attribs['resource'] if 'resource' in attribs else el.text
-    if not hasKids:
-        retval = value
-    else:
-        retval = {}
-        for child in list(el):
-            k, v = handleChild(child)
-            retval[k] = v
-    return tag, retval
-
-# get PMC Data from /data/committees.xml
-print("reading PMC Data (/data/committees.xml)")
-pmcs = {}
-pmcDataUrls = {} # id -> url
-with open("../../data/committees.xml", "r") as f:
-    xmldoc = minidom.parseString(f.read())
-    f.close()
-itemlist = xmldoc.getElementsByTagName('location')
-for s in itemlist :
-    url = s.childNodes[0].data
-    try:
-        if url.startswith('http'):
-            print(url)
-            rdf = urllib.request.urlopen(url).read()
-        else:
-            rdf = open("../../data/%s" % url, 'r').read()
-            url = "https://svn.apache.org/repos/asf/comdev/projects.apache.org/data/%s" %
url
-        rdfxml = ET.fromstring(rdf)
-        data = rdfxml[0]
-        committeeId = data.attrib['{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about']
-        pmcDataUrls[committeeId] = url
-
-        # transform PMC data RDF to json
-        pmcjson = {
-            'rdf': url
-        }
-        pmcname = None
-        for el in data:
-            k, v = handleChild(el)
-            if k in pmcjson:
-                # merge multiple values
-                if type(pmcjson[k]) is str:
-                    pmcjson[k] = "%s, %s" % (pmcjson[k], v)
-                else:
-                    for xk in v:
-                        pmcjson[k][xk] = v[xk]
-            else:
-                pmcjson[k] = v
-
-        pmcs[committeeId] = pmcjson
-
-        # copy PMC RDF data to /doap/{committeeId}/pmc-doap.rdf
-        if type(rdf) is str:
-            mode = "w"
-        else:
-            mode = "wb"
-        with open("../../site/doap/%s/pmc-doap.rdf" % committeeId, mode) as f:
-            f.write(rdf)
-            f.close()
-
-    except Exception as err:
-        print(err)
-
-with open("../../data/board/committee-info.txt", "rb") as f:
-    data = f.read().decode('utf-8')
-    f.close()
-
-# extract reporting cycles information
-cycles = { 'Incubator': 0 } # Incubator reports each month
-current = 1
-reports = data[data.index("January, April, July, October"):data.index('Next month')]
-buf = io.StringIO(reports)
-newCycle = True
-for p in buf.readlines():
-    if p.startswith('  '):
-        cycles[p.strip()] = current
-        newCycle = False
-    elif len(p.strip()) == 0:
-        if not newCycle:
-            newCycle = True
-            current += 1
-# extract committees composition
-data = data[data.index('Hint: '):]
-data = data[data.index('* '):]
-buf = io.StringIO(data)
-curCommittee = ''
-committees = {}
-c = {}
-newCommittee = True
-for l in buf.readlines():
-    if l.startswith('* '):
-        curCommittee = l[2:l.index('  (')]
-        newCommittee = True
-        c['members'] = {}
-    elif len(l.strip()) == 0:
-        if newCommittee:
-            committees[curCommittee] = c
-            c = {}
-            newCommittee = False
-    elif not l.startswith('==='):
-        m = re.search(r"(.+?)\s+<([^@]+)@apache.org", l.strip())
-        if not m:
-            print("unexpected line format: %s" % l.strip())
-        fullname = m.group(1)
-        uid = m.group(2)
-        isChair = fullname.endswith('(chair)')
-        if isChair:
-            fullname = fullname[0:fullname.index('(')].strip()
-            c['chair'] = uid
-        c['members'][uid] = fullname
-
-# This only appears to be used for checking links
-www = urllib.request.urlopen("http://www.apache.org/").read().decode('utf-8')
-
-committeeCount = 0
-committeesList = []
-committeesMap = {}
-addedCommittees = []
-c = {}
-
-for pmc in re.findall(r"\* .+?\s+\(est\. [0-9/]+[^\r\n]+", data):
-
-    #print(pmc)
-    m = re.search(r"\* (.+?)\s+\(est. ([0-9]+)/([0-9]+)", pmc)
-    if m:
-        committeeShortName = m.group(1)
-        month = m.group(2)
-        year = m.group(3)
-        if not re.search(r"Committee", pmc):
-            if committeeShortName in committeeIds:
-                committeeId = committeeIds[committeeShortName]
-            else:
-                committeeId = committeeShortName.lower().replace(' ', '').replace('.', '')
-            # Classical committee
-            committeeName = "Apache %s" % committeeShortName
-            if committeeName in renamesCommittee2Json:
-                committeeName = renamesCommittee2Json[committeeName]
-            #print(committeeShortName)
-            committeeCount += 1
-
-            # add committee to committees
-            committee = {}
-            committee['id'] = committeeId
-            if committeeId in group_ids:
-                group = group_ids[committeeId]
-            else:
-                group = committeeId
-            committee['group'] = group
-            committee['name'] = committeeName
-            committee['established'] = "%s-%s" % (year, month)
-            if group in homepages:
-                homepage = homepages[group]
-            else:
-                homepage = 'http://%s.apache.org/' % group
-            committee['homepage'] = homepage
-            # committee committers and PMC members
-            pmcgroup = "%s-pmc" % group
-            committers = [] # [ 'login' ]
-            pmc = [] # [ 'login' ]
-            for login in people:
-                p = people[login]
-                if p['groups']:
-                    if group in p['groups']:
-                        committers.append(login)
-                    if pmcgroup in p['groups']:
-                        pmc.append(login)
-                else:
-                    print("user %s has no groups" % login)
-            committers.sort()
-            pmc.sort()
-            # don't store committers and PMC members arrays in committee: it's easy to get
from groups.json
-            #committee['pmcs'] = pmc
-            #committee['committers'] = committers
-            if len(pmc) == 0:
-                print('WARN: %s (%s established in %s) has no PMC members LDAP group (id=%s)'
% (committeeId, committeeName, committee['established'], pmcgroup))
-            if committeeShortName in committees:
-                committee['chair'] = committees[committeeShortName]['chair']
-            if committeeShortName in cycles:
-                committee['reporting'] = cycles[committeeShortName]
-            else:
-                print('WARN: %s not found in reporting cycles' % committeeShortName)
-
-            link = '<a href="%s" title="' % homepage
-            if committeeId in shortdescs:
-                committee['shortdesc'] = shortdescs[committeeId]
-            elif link in www:
-                shortdesc = www[(www.index(link) + len(link)):]
-                shortdesc = shortdesc[:shortdesc.index('">')]
-                committee['shortdesc'] = shortdesc
-            else:
-                print("WARN: %s (%s) missing from http://www.apache.org/#projects-list" %
(committeeShortName, homepage))
-            # TODO committee['description'] (or charter) not in committee-info.txt
-            # TODO committee['retired'] not in committee-info.txt
-            if committeeId in pmcDataUrls:
-                committee['rdf'] = pmcDataUrls[committeeId]
-            else:
-                print("WARN: %s (%s) missing from committees.xml" % (committeeShortName,
committeeId))
-            committeesList.append(committee)
-            committeesMap[committeeId] = committee;
-
-            # generate TLP PMC DOAP file at http://projects-new.apache.org/doap/{committeeId}/pmc.rdf
-            doap = ET.Element('rdf:RDF', attrib= { 'xml:lang': 'en',
-                                                   'xmlns': 'http://usefulinc.com/ns/doap#',
-                                                   'xmlns:rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
-                                                   'xmlns:asfext': 'http://projects.apache.org/ns/asfext#',
-                                                   'xmlns:foaf': 'http://xmlns.com/foaf/0.1/'
-                                                   })
-            doap_pmc = ET.SubElement(doap, 'asfext:pmc')
-            ET.SubElement(doap_pmc, 'asfext:name').text = committeeName
-            ET.SubElement(doap_pmc, 'homepage', attrib = { 'rdf:resource': homepage })
-            doap_chair = ET.SubElement(doap_pmc, 'asfext:chair')
-            doap_chair_person = ET.SubElement(doap_chair, 'foaf:Person')
-            ET.SubElement(doap_chair_person, 'foaf:nick').text = committee['chair']
-            ET.SubElement(doap_chair_person, 'foaf:name').text = people[committee['chair']]['name']
-            directory = "../../site/doap/%s" % committeeId
-            if not os.path.exists(directory):
-                os.makedirs(directory)
-            with open("%s/pmc.rdf" % directory, "w") as f:
-                f.write(minidom.parseString(ET.tostring(doap, encoding="utf-8")).toprettyxml(indent="\t"))
-                f.close()
-
-        else:
-            # Special Committee (Officer's, President's or Board)
-            print("INFO: %s ignored %s" % (committeeShortName, pmc[pmc.rfind('('):]))
-        c[committeeName] = True
-
-print("found %s new committees from %s committees in committee_info.txt" % (len(addedCommittees),
committeeCount))
-addedCommittees.sort()
-for added in addedCommittees:
-    print("- %s" % added)
-
-# detect retired committees to add to committees-retired.json
-with open("../../site/json/foundation/committees.json", "r") as f:
-    committeesPrevious = json.loads(f.read())
-    f.close()
-with open("../../site/json/foundation/committees-retired.json", "r") as f:
-    committeesRetired = json.loads(f.read())
-    f.close()
-for previous in committeesPrevious:
-    if not previous['id'] in committeesMap:
-        print("found retired committee: %s" % previous['name'])
-        previous['retired'] = datetime.date.today().strftime('%Y-%m')
-        # remove data that is not useful in a retired committee
-        previous.pop('chair', None)
-        previous.pop('group', None)
-        previous.pop('rdf', None)
-        previous.pop('reporting', None)
-        committeesRetired.append(previous)
-
-with open("../../site/json/foundation/committees.json", "w") as f:
-    f.write(json.dumps(committeesList, sort_keys=True, indent=0))
-    f.close()
-
-with open("../../site/json/foundation/committees-retired.json", "w") as f:
-    f.write(json.dumps(committeesRetired, sort_keys=True, indent=0))
-    f.close()
-
-with open ("../../site/json/foundation/pmcs.json", "w") as f:
-    f.write(json.dumps(pmcs, sort_keys=True, indent=0))
-    f.close()
-
-# compare with chairs, for consistency checking
-chairs = json.load(open("../../site/json/foundation/chairs.json"))
-for chair in chairs:
-    if chair in renamesChairs2Json:
-        chair = renamesChairs2Json[chair]
-    if not chair in c:
-        print("WARN: %s is in http://www.apache.org/foundation/ but not in committee-info.txt:
typo somewhere or retirement in progress?" % chair)
+import re
+import json
+import sys
+import io
+import os
+import urllib.request
+import xml.etree.ElementTree as ET
+import xml.dom.minidom as minidom
+import datetime
+
+"""
+Reads:
+../../site/json/foundation/people.json
+../../data/committees.xml
+../../data/board/committee-info.txt
+../../site/json/foundation/chairs.json (cross-check)
+
+Updates:
+../../site/json/foundation/committees.json
+../../site/json/foundation/committees-retired.json
+
+Writes:
+../../site/json/foundation/pmcs.json
+"""
+
+# Committee names from committees-info.txt that do not match committees-evolution.json
+renamesCommittee2Json = {
+    'Apache APR': 'Apache Portable Runtime',
+    'Apache Perl': 'Apache mod_perl'
+}
+# Committee names from http://www.apache.org/foundation/ that do not match committees-evolution.json
+renamesChairs2Json = {
+    'Apache Logging Services': 'Apache Logging',
+    'Apache Perl': 'Apache mod_perl'
+}
+# committee ids not matching committee name in lowercase
+committeeIds = {
+    'Community Development': 'comdev',
+    'HTTP Server': 'httpd',
+    'Lucene.Net': 'lucenenet',
+    'Open Climate Workbench': 'climate'
+}
+# LDAP group ids not matching committee id
+group_ids = {
+    'webservices': 'ws'
+}
+# homepages not matching http://<committee id>.apache.org/
+homepages = {
+    'comdev': 'http://community.apache.org/',
+    'httpcomponents': 'http://hc.apache.org/',
+    'whimsy': 'http://whimsical.apache.org'
+}
+# short description for non-classical committees, that are not listed in http://www.apache.org/#projects-list
+shortdescs = {
+    'attic': 'A home for dormant projects',
+    'comdev': 'Ressources to help people become involved with Apache projects',
+    'incubator': "Entry path into The Apache Software Foundation (ASF) for projects and codebases
wishing to become part of the Foundation's efforts",
+    'labs': 'A place for innovation where committers of the foundation can experiment with
new ideas'
+}
+
+with open("../../site/json/foundation/people.json", "r") as f:
+    people = json.loads(f.read())
+    f.close()
+
+def handleChild(el):
+    retval = None
+    hasKids = False
+    for child in list(el):
+        hasKids = True
+    attribs = {}
+    for key in el.attrib:
+        xkey = re.sub(r"\{.+\}", "", key)
+        attribs[xkey] = el.attrib[key]
+    tag = re.sub(r"\{.+\}", "", el.tag)
+    value = attribs['resource'] if 'resource' in attribs else el.text
+    if not hasKids:
+        retval = value
+    else:
+        retval = {}
+        for child in list(el):
+            k, v = handleChild(child)
+            retval[k] = v
+    return tag, retval
+
+# get PMC Data from /data/committees.xml
+print("reading PMC Data (/data/committees.xml)")
+pmcs = {}
+pmcDataUrls = {} # id -> url
+with open("../../data/committees.xml", "r") as f:
+    xmldoc = minidom.parseString(f.read())
+    f.close()
+itemlist = xmldoc.getElementsByTagName('location')
+for s in itemlist :
+    url = s.childNodes[0].data
+    try:
+        if url.startswith('http'):
+            print(url)
+            rdf = urllib.request.urlopen(url).read()
+        else:
+            rdf = open("../../data/%s" % url, 'r').read()
+            url = "https://svn.apache.org/repos/asf/comdev/projects.apache.org/data/%s" %
url
+        rdfxml = ET.fromstring(rdf)
+        data = rdfxml[0]
+        committeeId = data.attrib['{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about']
+        pmcDataUrls[committeeId] = url
+
+        # transform PMC data RDF to json
+        pmcjson = {
+            'rdf': url
+        }
+        pmcname = None
+        for el in data:
+            k, v = handleChild(el)
+            if k in pmcjson:
+                # merge multiple values
+                if type(pmcjson[k]) is str:
+                    pmcjson[k] = "%s, %s" % (pmcjson[k], v)
+                else:
+                    for xk in v:
+                        pmcjson[k][xk] = v[xk]
+            else:
+                pmcjson[k] = v
+
+        pmcs[committeeId] = pmcjson
+
+        # copy PMC RDF data to /doap/{committeeId}/pmc-doap.rdf
+        if type(rdf) is str:
+            mode = "w"
+        else:
+            mode = "wb"
+        with open("../../site/doap/%s/pmc-doap.rdf" % committeeId, mode) as f:
+            f.write(rdf)
+            f.close()
+
+    except Exception as err:
+        print(err)
+
+with open("../../data/board/committee-info.txt", "rb") as f:
+    data = f.read().decode('utf-8')
+    f.close()
+
+# extract reporting cycles information
+cycles = { 'Incubator': 0 } # Incubator reports each month
+current = 1
+reports = data[data.index("January, April, July, October"):data.index('Next month')]
+buf = io.StringIO(reports)
+newCycle = True
+for p in buf.readlines():
+    if p.startswith('  '):
+        cycles[p.strip()] = current
+        newCycle = False
+    elif len(p.strip()) == 0:
+        if not newCycle:
+            newCycle = True
+            current += 1
+# extract committees composition
+data = data[data.index('Hint: '):]
+data = data[data.index('* '):]
+buf = io.StringIO(data)
+curCommittee = ''
+committees = {}
+c = {}
+newCommittee = True
+for l in buf.readlines():
+    if l.startswith('* '):
+        curCommittee = l[2:l.index('  (')]
+        newCommittee = True
+        c['members'] = {}
+    elif len(l.strip()) == 0:
+        if newCommittee:
+            committees[curCommittee] = c
+            c = {}
+            newCommittee = False
+    elif not l.startswith('==='):
+        m = re.search(r"(.+?)\s+<([^@]+)@apache.org", l.strip())
+        if not m:
+            print("unexpected line format: %s" % l.strip())
+        fullname = m.group(1)
+        uid = m.group(2)
+        isChair = fullname.endswith('(chair)')
+        if isChair:
+            fullname = fullname[0:fullname.index('(')].strip()
+            c['chair'] = uid
+        c['members'][uid] = fullname
+
+# This only appears to be used for checking links
+www = urllib.request.urlopen("http://www.apache.org/").read().decode('utf-8')
+
+committeeCount = 0
+committeesList = []
+committeesMap = {}
+addedCommittees = []
+c = {}
+
+for pmc in re.findall(r"\* .+?\s+\(est\. [0-9/]+[^\r\n]+", data):
+
+    #print(pmc)
+    m = re.search(r"\* (.+?)\s+\(est. ([0-9]+)/([0-9]+)", pmc)
+    if m:
+        committeeShortName = m.group(1)
+        month = m.group(2)
+        year = m.group(3)
+        if not re.search(r"Committee", pmc):
+            if committeeShortName in committeeIds:
+                committeeId = committeeIds[committeeShortName]
+            else:
+                committeeId = committeeShortName.lower().replace(' ', '').replace('.', '')
+            # Classical committee
+            committeeName = "Apache %s" % committeeShortName
+            if committeeName in renamesCommittee2Json:
+                committeeName = renamesCommittee2Json[committeeName]
+            #print(committeeShortName)
+            committeeCount += 1
+
+            # add committee to committees
+            committee = {}
+            committee['id'] = committeeId
+            if committeeId in group_ids:
+                group = group_ids[committeeId]
+            else:
+                group = committeeId
+            committee['group'] = group
+            committee['name'] = committeeName
+            committee['established'] = "%s-%s" % (year, month)
+            if group in homepages:
+                homepage = homepages[group]
+            else:
+                homepage = 'http://%s.apache.org/' % group
+            committee['homepage'] = homepage
+            # committee committers and PMC members
+            pmcgroup = "%s-pmc" % group
+            committers = [] # [ 'login' ]
+            pmc = [] # [ 'login' ]
+            for login in people:
+                p = people[login]
+                if p['groups']:
+                    if group in p['groups']:
+                        committers.append(login)
+                    if pmcgroup in p['groups']:
+                        pmc.append(login)
+                else:
+                    print("user %s has no groups" % login)
+            committers.sort()
+            pmc.sort()
+            # don't store committers and PMC members arrays in committee: it's easy to get
from groups.json
+            #committee['pmcs'] = pmc
+            #committee['committers'] = committers
+            if len(pmc) == 0:
+                print('WARN: %s (%s established in %s) has no PMC members LDAP group (id=%s)'
% (committeeId, committeeName, committee['established'], pmcgroup))
+            if committeeShortName in committees:
+                committee['chair'] = committees[committeeShortName]['chair']
+            if committeeShortName in cycles:
+                committee['reporting'] = cycles[committeeShortName]
+            else:
+                print('WARN: %s not found in reporting cycles' % committeeShortName)
+
+            link = '<a href="%s" title="' % homepage
+            if committeeId in shortdescs:
+                committee['shortdesc'] = shortdescs[committeeId]
+            elif link in www:
+                shortdesc = www[(www.index(link) + len(link)):]
+                shortdesc = shortdesc[:shortdesc.index('">')]
+                committee['shortdesc'] = shortdesc
+            else:
+                print("WARN: %s (%s) missing from http://www.apache.org/#projects-list" %
(committeeShortName, homepage))
+            # TODO committee['description'] (or charter) not in committee-info.txt
+            # TODO committee['retired'] not in committee-info.txt
+            if committeeId in pmcDataUrls:
+                committee['rdf'] = pmcDataUrls[committeeId]
+            else:
+                print("WARN: %s (%s) missing from committees.xml" % (committeeShortName,
committeeId))
+            committeesList.append(committee)
+            committeesMap[committeeId] = committee;
+
+            # generate TLP PMC DOAP file at http://projects-new.apache.org/doap/{committeeId}/pmc.rdf
+            doap = ET.Element('rdf:RDF', attrib= { 'xml:lang': 'en',
+                                                   'xmlns': 'http://usefulinc.com/ns/doap#',
+                                                   'xmlns:rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
+                                                   'xmlns:asfext': 'http://projects.apache.org/ns/asfext#',
+                                                   'xmlns:foaf': 'http://xmlns.com/foaf/0.1/'
+                                                   })
+            doap_pmc = ET.SubElement(doap, 'asfext:pmc')
+            ET.SubElement(doap_pmc, 'asfext:name').text = committeeName
+            ET.SubElement(doap_pmc, 'homepage', attrib = { 'rdf:resource': homepage })
+            doap_chair = ET.SubElement(doap_pmc, 'asfext:chair')
+            doap_chair_person = ET.SubElement(doap_chair, 'foaf:Person')
+            ET.SubElement(doap_chair_person, 'foaf:nick').text = committee['chair']
+            ET.SubElement(doap_chair_person, 'foaf:name').text = people[committee['chair']]['name']
+            directory = "../../site/doap/%s" % committeeId
+            if not os.path.exists(directory):
+                os.makedirs(directory)
+            with open("%s/pmc.rdf" % directory, "w") as f:
+                f.write(minidom.parseString(ET.tostring(doap, encoding="utf-8")).toprettyxml(indent="\t"))
+                f.close()
+
+        else:
+            # Special Committee (Officer's, President's or Board)
+            print("INFO: %s ignored %s" % (committeeShortName, pmc[pmc.rfind('('):]))
+        c[committeeName] = True
+
+print("found %s new committees from %s committees in committee_info.txt" % (len(addedCommittees),
committeeCount))
+addedCommittees.sort()
+for added in addedCommittees:
+    print("- %s" % added)
+
+# detect retired committees to add to committees-retired.json
+with open("../../site/json/foundation/committees.json", "r") as f:
+    committeesPrevious = json.loads(f.read())
+    f.close()
+with open("../../site/json/foundation/committees-retired.json", "r") as f:
+    committeesRetired = json.loads(f.read())
+    f.close()
+for previous in committeesPrevious:
+    if not previous['id'] in committeesMap:
+        print("found retired committee: %s" % previous['name'])
+        previous['retired'] = datetime.date.today().strftime('%Y-%m')
+        # remove data that is not useful in a retired committee
+        previous.pop('chair', None)
+        previous.pop('group', None)
+        previous.pop('rdf', None)
+        previous.pop('reporting', None)
+        committeesRetired.append(previous)
+
+with open("../../site/json/foundation/committees.json", "w") as f:
+    f.write(json.dumps(committeesList, sort_keys=True, indent=0))
+    f.close()
+
+with open("../../site/json/foundation/committees-retired.json", "w") as f:
+    f.write(json.dumps(committeesRetired, sort_keys=True, indent=0))
+    f.close()
+
+with open ("../../site/json/foundation/pmcs.json", "w") as f:
+    f.write(json.dumps(pmcs, sort_keys=True, indent=0))
+    f.close()
+
+# compare with chairs, for consistency checking
+chairs = json.load(open("../../site/json/foundation/chairs.json"))
+for chair in chairs:
+    if chair in renamesChairs2Json:
+        chair = renamesChairs2Json[chair]
+    if not chair in c:
+        print("WARN: %s is in http://www.apache.org/foundation/ but not in committee-info.txt:
typo somewhere or retirement in progress?" % chair)

Propchange: comdev/projects.apache.org/scripts/import/parsecommittees.py
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: comdev/projects.apache.org/scripts/import/parseprojects.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/import/parseprojects.py?rev=1690547&r1=1690546&r2=1690547&view=diff
==============================================================================
--- comdev/projects.apache.org/scripts/import/parseprojects.py (original)
+++ comdev/projects.apache.org/scripts/import/parseprojects.py Sun Jul 12 22:58:51 2015
@@ -1,189 +1,189 @@
-from xml.dom import minidom
-import xml.etree.ElementTree as ET
-import re, urllib.request
-import json
-import os
-import traceback
-
-"""
-
-Reads:
-../../data/projects.xml
-parseprojects-failures.xml (if exists)
-../../site/json/foundation/committees-retired.json
-
-Writes:
-../../site/json/foundation/projects.json
-../../site/json/projects/%s.json
-../../site/doap/%s/%s.rdf
-parseprojects-failures.xml (if failures occurred)
-
-"""
-
-projectsList = "../../data/projects.xml";
-save = True;
-if os.path.exists("parseprojects-failures.xml"):
-    projectsList = "parseprojects-failures.xml";
-    save = False;
-with open(projectsList, "r") as f:
-    data  = f.read()
-    f.close()
-xmldoc = minidom.parseString(data)
-itemlist = xmldoc.getElementsByTagName('location') 
-
-siteMap = {
-    'hc': 'httpcomponents',
-    'ws':'webservices'
-}
-
-def site2committee(siteId):
-    if siteId in siteMap:
-        return siteMap[siteId]
-    return siteId
-
-with open("../../site/json/foundation/committees-retired.json", "r") as f:
-    committeesRetired = json.loads(f.read())
-    f.close()
-retired = []
-for r in committeesRetired:
-    retired.append(r['id'])
-
-projects = {}
-failures = []
-
-def handleChild(el):
-    retval = None
-    hasKids = False
-    for child in list(el):
-        hasKids = True
-    attribs = {}
-    for key in el.attrib:
-        xkey = re.sub(r"\{.+\}", "", key)
-        attribs[xkey] = el.attrib[key]
-    tag = re.sub(r"\{.+\}", "", el.tag)
-    value = attribs['resource'] if 'resource' in attribs else el.text
-    if not hasKids:
-        retval = value
-    else:
-        retval = {}
-        for child in list(el):
-            k, v = handleChild(child)
-            retval[k] = v
-            if k == "location":
-                retval = v
-                break
-    return tag, retval
-
-for s in itemlist :
-    url = s.childNodes[0].data
-    try:
-        rdf = urllib.request.urlopen(url).read()
-        rdfxml = ET.fromstring(rdf)
-        project = rdfxml[0]
-        pjson = {
-            'doap': url
-        }
-        prname = None
-        committeeId = None
-        projectJsonFilename = None
-        for el in project:
-            k, v = handleChild(el)
-            if not save:
-                print("+ %s" % k);
-            if k in pjson and not k in ['name','homepage']:
-                if type(pjson[k]) is str:
-                    pjson[k] = "%s, %s" % (pjson[k], v)
-                else:
-                    for xk in v:
-                        pjson[k].append(v[xk])
-            else:
-                if k not in ['release', 'implements', 'repository', 'developer', 'maintainer',
'member', 'helper']:
-                    pjson[k] = v
-                else:
-                    pjson[k] = []
-                    for xk in v:
-                        pjson[k].append(v[xk])
-
-        if pjson['homepage']:
-            homepage = pjson['homepage']
-            m = re.match(r"https?://([^.]+)\.", homepage, re.IGNORECASE)
-            if m:
-                siteId = site2committee(m.group(1))
-            nn = re.sub("http.+\.apache\.org/?", "", homepage)
-            if (nn == ""):
-                projectJsonFilename = siteId
-            else:
-                nn = nn.replace('/', ' ').strip().split().pop().replace('-project', '')
-                if nn.startswith("%s-" % siteId):
-                    projectJsonFilename = nn
-                else:
-                    projectJsonFilename = "%s-%s" % (siteId, nn)
-        else:
-            print("WARN: no homepage defined in %s, pmc = %s" % (url, pjson['pmc']))
-
-        if pjson['pmc'].startswith('http://attic.apache.org'):
-            committeeId = 'attic'
-        elif '.incubator.' in homepage:
-            committeeId = 'incubator'
-        else:
-            committeeId = siteId
-        if committeeId in retired:
-            print("WARN: project from a retired committee but PMC not changed to Attic in
%s" % url)
-            committeeId = 'attic'
-        pjson['pmc'] = committeeId
-
-        # replace category url with id, by removing http://projects.apache.org/category/
-        if 'category' in pjson:
-            pjson['category'] = pjson['category'].replace("http://projects.apache.org/category/",
"")
-            if committeeId == 'attic' and not 'retired' in pjson['category']:
-                print("WARN: project in Attic but not in 'retired' category: %s" % url)
-                pjson['category'] = "%s, retired" % pjson['category']
-        elif committeeId == 'attic' and not 'retired' in pjson['category']:
-            print("WARN: project in Attic but not in 'retired' category: %s" % url)
-            pjson['category'] = "retired"
-        if projectJsonFilename:
-            #add = {}
-            #for k in pjson:
-            #    if pjson[k] != None and type(pjson[k]) is not str:
-            #        for e in pjson[k]:
-            #            add[e] = pjson[k][e]
-            #        pjson[k] = None
-
-            projects[projectJsonFilename] = pjson
-            #for e in add:
-            #    pjson[e] = add[e]
-            print("Writing projects/%s.json..." % projectJsonFilename)
-            with open ("../../site/json/projects/%s.json" % projectJsonFilename, "w") as
f:
-                f.write(json.dumps(pjson, sort_keys=True, indent=0))
-                f.close()
-            # copy project DOAP to /doap/<committee id>/<project id>.rdf
-            with open ("../../site/doap/%s/%s.rdf" % (committeeId, projectJsonFilename),
"wb") as f:
-                f.write(rdf)
-                f.close()
-        else:
-            print("WARN: project ignored since unable to extract project json filename from
%s" % url)
-    except Exception as err:
-        print("Error when reading %s's doap file %s:" % (prname, url))
-        print("-"*60)
-        traceback.print_exc()
-        print("-"*60)
-        failures.append(url)
-        with open (url.split('/')[-1], "wb") as f:
-            f.write(rdf)
-            f.close()
-
-if save:
-    print("Writing foundation/projects.json...")
-    with open ("../../site/json/foundation/projects.json", "w") as f:
-        f.write(json.dumps(projects, sort_keys=True, indent=0))
-        f.close()
-
-if len(failures) > 0:
-    with open ("parseprojects-failures.xml", "w") as f:
-        f.write("<doapFiles>\n")
-        for fail in failures:
-            f.write("<location>%s</location>\n" % fail)
-        f.write("</doapFiles>\n")
-        f.close()
-
-print("Done!")
+from xml.dom import minidom
+import xml.etree.ElementTree as ET
+import re, urllib.request
+import json
+import os
+import traceback
+
+"""
+
+Reads:
+../../data/projects.xml
+parseprojects-failures.xml (if exists)
+../../site/json/foundation/committees-retired.json
+
+Writes:
+../../site/json/foundation/projects.json
+../../site/json/projects/%s.json
+../../site/doap/%s/%s.rdf
+parseprojects-failures.xml (if failures occurred)
+
+"""
+
+projectsList = "../../data/projects.xml";
+save = True;
+if os.path.exists("parseprojects-failures.xml"):
+    projectsList = "parseprojects-failures.xml";
+    save = False;
+with open(projectsList, "r") as f:
+    data  = f.read()
+    f.close()
+xmldoc = minidom.parseString(data)
+itemlist = xmldoc.getElementsByTagName('location') 
+
+siteMap = {
+    'hc': 'httpcomponents',
+    'ws':'webservices'
+}
+
+def site2committee(siteId):
+    if siteId in siteMap:
+        return siteMap[siteId]
+    return siteId
+
+with open("../../site/json/foundation/committees-retired.json", "r") as f:
+    committeesRetired = json.loads(f.read())
+    f.close()
+retired = []
+for r in committeesRetired:
+    retired.append(r['id'])
+
+projects = {}
+failures = []
+
+def handleChild(el):
+    retval = None
+    hasKids = False
+    for child in list(el):
+        hasKids = True
+    attribs = {}
+    for key in el.attrib:
+        xkey = re.sub(r"\{.+\}", "", key)
+        attribs[xkey] = el.attrib[key]
+    tag = re.sub(r"\{.+\}", "", el.tag)
+    value = attribs['resource'] if 'resource' in attribs else el.text
+    if not hasKids:
+        retval = value
+    else:
+        retval = {}
+        for child in list(el):
+            k, v = handleChild(child)
+            retval[k] = v
+            if k == "location":
+                retval = v
+                break
+    return tag, retval
+
+for s in itemlist :
+    url = s.childNodes[0].data
+    try:
+        rdf = urllib.request.urlopen(url).read()
+        rdfxml = ET.fromstring(rdf)
+        project = rdfxml[0]
+        pjson = {
+            'doap': url
+        }
+        prname = None
+        committeeId = None
+        projectJsonFilename = None
+        for el in project:
+            k, v = handleChild(el)
+            if not save:
+                print("+ %s" % k);
+            if k in pjson and not k in ['name','homepage']:
+                if type(pjson[k]) is str:
+                    pjson[k] = "%s, %s" % (pjson[k], v)
+                else:
+                    for xk in v:
+                        pjson[k].append(v[xk])
+            else:
+                if k not in ['release', 'implements', 'repository', 'developer', 'maintainer',
'member', 'helper']:
+                    pjson[k] = v
+                else:
+                    pjson[k] = []
+                    for xk in v:
+                        pjson[k].append(v[xk])
+
+        if pjson['homepage']:
+            homepage = pjson['homepage']
+            m = re.match(r"https?://([^.]+)\.", homepage, re.IGNORECASE)
+            if m:
+                siteId = site2committee(m.group(1))
+            nn = re.sub("http.+\.apache\.org/?", "", homepage)
+            if (nn == ""):
+                projectJsonFilename = siteId
+            else:
+                nn = nn.replace('/', ' ').strip().split().pop().replace('-project', '')
+                if nn.startswith("%s-" % siteId):
+                    projectJsonFilename = nn
+                else:
+                    projectJsonFilename = "%s-%s" % (siteId, nn)
+        else:
+            print("WARN: no homepage defined in %s, pmc = %s" % (url, pjson['pmc']))
+
+        if pjson['pmc'].startswith('http://attic.apache.org'):
+            committeeId = 'attic'
+        elif '.incubator.' in homepage:
+            committeeId = 'incubator'
+        else:
+            committeeId = siteId
+        if committeeId in retired:
+            print("WARN: project from a retired committee but PMC not changed to Attic in
%s" % url)
+            committeeId = 'attic'
+        pjson['pmc'] = committeeId
+
+        # replace category url with id, by removing http://projects.apache.org/category/
+        if 'category' in pjson:
+            pjson['category'] = pjson['category'].replace("http://projects.apache.org/category/",
"")
+            if committeeId == 'attic' and not 'retired' in pjson['category']:
+                print("WARN: project in Attic but not in 'retired' category: %s" % url)
+                pjson['category'] = "%s, retired" % pjson['category']
+        elif committeeId == 'attic' and not 'retired' in pjson['category']:
+            print("WARN: project in Attic but not in 'retired' category: %s" % url)
+            pjson['category'] = "retired"
+        if projectJsonFilename:
+            #add = {}
+            #for k in pjson:
+            #    if pjson[k] != None and type(pjson[k]) is not str:
+            #        for e in pjson[k]:
+            #            add[e] = pjson[k][e]
+            #        pjson[k] = None
+
+            projects[projectJsonFilename] = pjson
+            #for e in add:
+            #    pjson[e] = add[e]
+            print("Writing projects/%s.json..." % projectJsonFilename)
+            with open ("../../site/json/projects/%s.json" % projectJsonFilename, "w") as
f:
+                f.write(json.dumps(pjson, sort_keys=True, indent=0))
+                f.close()
+            # copy project DOAP to /doap/<committee id>/<project id>.rdf
+            with open ("../../site/doap/%s/%s.rdf" % (committeeId, projectJsonFilename),
"wb") as f:
+                f.write(rdf)
+                f.close()
+        else:
+            print("WARN: project ignored since unable to extract project json filename from
%s" % url)
+    except Exception as err:
+        print("Error when reading %s's doap file %s:" % (prname, url))
+        print("-"*60)
+        traceback.print_exc()
+        print("-"*60)
+        failures.append(url)
+        with open (url.split('/')[-1], "wb") as f:
+            f.write(rdf)
+            f.close()
+
+if save:
+    print("Writing foundation/projects.json...")
+    with open ("../../site/json/foundation/projects.json", "w") as f:
+        f.write(json.dumps(projects, sort_keys=True, indent=0))
+        f.close()
+
+if len(failures) > 0:
+    with open ("parseprojects-failures.xml", "w") as f:
+        f.write("<doapFiles>\n")
+        for fail in failures:
+            f.write("<location>%s</location>\n" % fail)
+        f.write("</doapFiles>\n")
+        f.close()
+
+print("Done!")

Propchange: comdev/projects.apache.org/scripts/import/parseprojects.py
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message