incubator-cvs mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s...@apache.org
Subject svn commit: r1717353 - in /incubator/public/trunk: clutch.pkl clutch.py content/clutcht.ent
Date Mon, 30 Nov 2015 23:52:27 GMT
Author: sebb
Date: Mon Nov 30 23:52:27 2015
New Revision: 1717353

URL: http://svn.apache.org/viewvc?rev=1717353&view=rev
Log:
Replace parse of http://people.apache.org/committers-by-project.html
by parsing asf-authorization-template from the Git deployment branch

Modified:
    incubator/public/trunk/clutch.pkl
    incubator/public/trunk/clutch.py
    incubator/public/trunk/content/clutcht.ent

Modified: incubator/public/trunk/clutch.pkl
URL: http://svn.apache.org/viewvc/incubator/public/trunk/clutch.pkl?rev=1717353&r1=1717352&r2=1717353&view=diff
==============================================================================
Binary files - no diff available.

Modified: incubator/public/trunk/clutch.py
URL: http://svn.apache.org/viewvc/incubator/public/trunk/clutch.py?rev=1717353&r1=1717352&r2=1717353&view=diff
==============================================================================
--- incubator/public/trunk/clutch.py (original)
+++ incubator/public/trunk/clutch.py Mon Nov 30 23:52:27 2015
@@ -65,7 +65,7 @@ find /www/www.apache.org/dist/incubator
 The above has now been replaced by parsing the output of
 'svn', 'ls', '-R', 'https://dist.apache.org/repos/dist/release/incubator'
 
-http://people.apache.org/committers-by-project.html
+asf-authorization-template from Git deployment branch
 http://mail-archives.apache.org/mod_mbox/
 http://www.apache.org/dist/incubator/<resource>
 http://svn.apache.org/repos/asf/incubator
@@ -136,10 +136,13 @@ import re
 import urllib.request, urllib.error, urllib.parse
 import xml.dom.minidom
 import argparse
+import io
 
 # constants for external data ---
+GIT='https://git-wip-us.apache.org/repos/asf?p=infrastructure-puppet.git;hb=refs/heads/deployment;a=blob_plain;f=modules/subversion_server/files/authorization/%s'
+ASF='asf-authorization-template'
+# PIT='pit-authorization-template'
 
-COMMITTERS_BY_PROJECT = "http://people.apache.org/committers-by-project.html"
 MAIL_LIST_URL = "http://mail-archives.apache.org/mod_mbox/"
 
 # Constant for site content location ---
@@ -175,9 +178,13 @@ def logexternal(string):
   if optionExternal:
     print("External: " + string)
 
-def getUrl(url):
+def getUrl(url, encoding=None, errors=None):
   logexternal(url)
-  return urllib.request.urlopen(url, timeout=5) # ensure invalid URLs don't cause long wait
+  resp = urllib.request.urlopen(url, timeout=5) # ensure invalid URLs don't cause long wait
+  if encoding:
+    return io.TextIOWrapper(resp, encoding=encoding, errors=errors)
+  else:
+    return resp
 
 def osExec(list):
   logexternal(" ".join(list))
@@ -462,55 +469,28 @@ for k in sorted(projectNames, key=str.lo
 # Gather committers data ---
 
 print("Gather committers data ...")
-# Using the generated p.a.o/committers-by-project.html page is the easiest way.
-# However it has a very flat structure.
-# Need to process the "table" which follows each "h2" element.
-class CommittersParser(HTMLParser):
-
-  def __init__(self):
-    self.strict = True
-    self.projects = {}
-    self.projectId = "default"
-    self.projects['default'] = []
-    self.rowCount = 0
-    self.cellCount = 0
-    self.committerNameRE = re.compile("([a-z0-9_]+)")
-    self.convert_charrefs = False
-    self.reset()
+# Parse the locally defined groups directly
+committers_projects = {}
+with getUrl(GIT % ASF, encoding='UTF-8') as f:
+    for line in f: # skip the header
+        if line.startswith('[groups]'):
+            break
 
-  def handle_starttag(self, tag, attrs):
-    if tag == "h2":
-      for key, value in attrs:
-        if key == "id":
-          self.projectId = value
-          try:
-            self.projects[value]
-          except KeyError:
-            self.projects[value] = []
-          break
-    if tag == "table":
-      self.rowCount = 0
-    if tag == "tr":
-      self.rowCount += 1
-      self.cellCount = 0
-    if tag == "td":
-      self.cellCount += 1
-
-  def handle_data(self, data):
-    if self.cellCount == 1:
-      match = re.search(self.committerNameRE, data)
-      if match:
-        name = match.group(1)
-        if self.rowCount > 1: # The first row is the column headers.
-          self.projects[self.projectId].append(name)
-
-committersInput = getUrl(COMMITTERS_BY_PROJECT)
-dataCommitters = committersInput.read().decode('utf-8')
-#print("dataCommitters=" + dataCommitters)
-committers = CommittersParser()
-committers.feed(dataCommitters)
-committers.close()
-#pprint.pprint(committers.projects)
+    for line in f: # read the defs section
+        line = line.rstrip()
+        if re.match(r"^(#|\s*$)", line) :# comment or blanks
+            continue
+        if re.match(r"^\[/\]", line):# end of definition section
+            break
+        m = re.match(r"^\s*(\w\S+?)\s*=\s*(\S+)?$", line)
+        if m:
+            entry = m.group(1)
+            value = m.group(2)
+            if value: # ignore empty groups
+                if value.startswith('{'):
+                    continue
+                committers_projects[entry]=value.split(',')
+#pprint.pprint(committers_projects)
 
 # Gather incubator group mail list data ---
 
@@ -747,8 +727,8 @@ for k in sorted(projectNames, key=str.lo
   for svnGroup in svnGroups:
     if optionVerbose:
       print("DEBUG: Trying committers group '{0}'".format(svnGroup))
-    if svnGroup in committers.projects:
-      projects[k]['numberCommitters'] = len(committers.projects[svnGroup])
+    if svnGroup in committers_projects:
+      projects[k]['numberCommitters'] = len(committers_projects[svnGroup])
       projects[k]['committersSvn'] = svnGroup
       break
     else:

Modified: incubator/public/trunk/content/clutcht.ent
URL: http://svn.apache.org/viewvc/incubator/public/trunk/content/clutcht.ent?rev=1717353&r1=1717352&r2=1717353&view=diff
==============================================================================
--- incubator/public/trunk/content/clutcht.ent (original)
+++ incubator/public/trunk/content/clutcht.ent Mon Nov 30 23:52:27 2015
@@ -1,4 +1,4 @@
 <!-- generated by clutch; do not edit -->
 
-        Clutch last gathered: Mon Nov 30 23:17:33 2015 UTC.<br />
+        Clutch last gathered: Mon Nov 30 23:46:13 2015 UTC.<br />
         Number of podlings in incubation: 47



---------------------------------------------------------------------
To unsubscribe, e-mail: cvs-unsubscribe@incubator.apache.org
For additional commands, e-mail: cvs-help@incubator.apache.org


Mime
View raw message