community-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s...@apache.org
Subject svn commit: r1714280 - /comdev/projects.apache.org/scripts/cronjobs/readldap.py
Date Sat, 14 Nov 2015 00:27:16 GMT
Author: sebb
Date: Sat Nov 14 00:27:16 2015
New Revision: 1714280

URL: http://svn.apache.org/viewvc?rev=1714280&view=rev
Log:
Updated copy for use with pao
TODO not yet ready for use

Added:
    comdev/projects.apache.org/scripts/cronjobs/readldap.py   (with props)

Added: comdev/projects.apache.org/scripts/cronjobs/readldap.py
URL: http://svn.apache.org/viewvc/comdev/projects.apache.org/scripts/cronjobs/readldap.py?rev=1714280&view=auto
==============================================================================
--- comdev/projects.apache.org/scripts/cronjobs/readldap.py (added)
+++ comdev/projects.apache.org/scripts/cronjobs/readldap.py Sat Nov 14 00:27:16 2015
@@ -0,0 +1,291 @@
+"""
+                          *** DRAFT - NOT READY FOR USE ***
+   Read auth groups from LDAP
+   
+"""
+import sys
+if sys.hexversion < 0x03000000:
+    _PY3 = False
+    from io import open
+else:
+    _PY3 = True
+import os
+from os.path import dirname, abspath, join, getmtime
+import json
+import time, calendar
+import re
+
+import ldap3
+from ldap3 import Server, ServerPool, Connection, LEVEL, POOLING_STRATEGY_RANDOM
+
+import urlutils
+
+server1 = Server('ldap1-us-west.apache.org', port=636, use_ssl=True, connect_timeout=5)#,
get_info=ALL)
+server2 = Server('ldap2-us-west.apache.org', port=636, use_ssl=True, connect_timeout=5)#,
get_info=ALL)
+server3 = Server('ldap3-us-west.apache.org', port=636, use_ssl=True, connect_timeout=5)#,
get_info=ALL)
+
+server_pool = ServerPool([server1, server2, server3], POOLING_STRATEGY_RANDOM, active=True,
exhaust=True)
+
+conn = Connection(server_pool, auto_bind=True)
+
+
+"""
+    LDAP filters do not support > or <, so we have to negate <= and >= respectively
+    So (a>b) becomes (!(a<=b))
+"""
+
+def _getPMC(cn, ts=None):
+    print('getPMC',cn,ts)
+    base = 'ou=committees,ou=groups,dc=apache,dc=org'
+    filter = '(&(cn=%s)(!(modifyTimestamp<=%s)))' % (cn, ts) if ts else '(cn=%s)'
% cn
+    attributes = ['member', 'createTimestamp', 'modifyTimestamp', 'cn']
+    success = conn.search(base, filter, attributes=attributes)
+    if not success:
+        return success, {'base': base, 'filter': filter, 'attributes': attributes}
+    members = []
+    response = conn.response
+    if not len(response) == 1:
+        raise Error("Invalid Response - only expecting a single entry")
+    for c in response:
+        att = c['attributes']
+        created = att['createTimestamp'][0] # returned as an array of one (!?)
+        modified = att['modifyTimestamp'][0]
+        for m in att['member']:
+            mat = re.search("^uid=(.+),ou=people", m)
+            if mat:
+                members.append(mat.group(1))
+    return success, {'name': cn,
+            'type': 'pmc',
+            'roster': sorted(members), # These appear to be listed in order of addition
+            'created': created,
+            'modified': modified
+            }
+
+
+def _getUnix(cn, ts=None):
+    base = 'ou=groups,dc=apache,dc=org'
+    filter = '(&(cn=%s)(!(modifyTimestamp<=%s)))' % (cn, ts) if ts else '(cn=%s)'
% cn
+    attributes=['memberUid','createTimestamp','modifyTimestamp','cn']
+    success = conn.search(base, filter, attributes=attributes, search_scope=LEVEL)
+    if not success:
+        return success, {'base': base, 'filter': filter, 'attributes': attributes}
+    members = []
+    response = conn.response
+    if not len(response) == 1:
+        raise Error("Invalid Response - only expecting a single entry")
+    for c in response:
+        att = c['attributes']
+        created = att['createTimestamp'][0] # returned as an array of one (!?)
+        modified = att['modifyTimestamp'][0]
+        members.extend(att['memberUid'])
+    return success, {'name': cn,
+            'type': 'unix',
+            'roster': sorted(members),
+            'created': created,
+            'modified': modified
+            }
+
+def _getPerson(uid, ts=None):
+    base = 'ou=people,dc=apache,dc=org'
+    filter = '(&(uid=%s)(!(modifyTimestamp<=%s)))' % (uid, ts) if ts else '(uid=%s)'
% uid
+    attributes = ['uid', 'createTimestamp', 'modifyTimestamp', 'cn', 'loginShell']
+    success = conn.search(base, filter, attributes=attributes, search_scope=LEVEL)
+    if not success:
+        return success, {'base': base, 'filter': filter, 'attributes': attributes}
+    data = {}
+    response = conn.response
+    if not len(response) == 1:
+        raise Error("Invalid Response - only expecting a single entry")
+    for c in response:
+        att = c['attributes']
+        data = {}
+        for a in att:
+            data[a] = att[a][0]
+    return success, data
+
+# The search returns multiple entries, each with their own timeStamp.
+# Only recent entries will be listed if a timestamp is provided.
+# Although one could merge the two sets of entries, this would not allow for deletions
+# So the timestamp is not stored at file level, and so is not used when checking for updates.
+#
+# TODO consider whether it would be cheaper to request recent changes
+# and then refetch the whole list if there has been a change.
+
+def _getPeople(ignored, ts=None):
+    base = 'ou=people,dc=apache,dc=org'
+    filter = '(&(uid=*)(!(modifyTimestamp<=%s)))' % (ts) if ts else '(uid=*)'
+    attributes = ['uid', 'createTimestamp', 'modifyTimestamp', 'cn', 'loginShell']
+    success = conn.search(base, filter, attributes=attributes, search_scope=LEVEL)
+    if not success:
+        return success, {'base': base, 'filter': filter, 'attributes': attributes}
+    data = {}
+    for c in conn.response:# there will be many
+        att = c['attributes']
+        uid=att['uid'][0]
+        data[uid] = {}
+        for a in att:
+            data[uid][a] = att[a][0]
+            if not len(att[a]) == 1:
+                print("WARN: only expected a single entry for the attribute %s with uid %s,
found: %d" % (a, uid, len(att[a])))
+    return success, data
+
+class LdapCache(object):
+    """
+        Creates a cache for LDAP requests
+        @param cachedir: the cache directory to use 
+            (default data/cache; this is assumed to be at the current directory, its parent
or grandparent)
+        @param interval: minimum interval between checks for updates to the URL (default
300 secs)
+            if set to -1, never checks (intended for testing only)  
+            if set to 0, always checks (primarily intended for testing)
+        @return: the instance to use with the get() method
+    """
+
+    def __getUnixname(self, name=None):
+        if name:
+            return join(self.__cachedir, 'ldap_unix', name)
+        else:
+            return join(self.__cachedir, 'ldap_unix')
+
+    def __getPMCname(self, name=None):
+        if name:
+            return join(self.__cachedir, 'ldap_pmc', name)
+        else:
+            return join(self.__cachedir, 'ldap_pmc')
+
+    def __getname(self, name):
+        return join(self.__cachedir, name)
+
+    def __checkCacheDir(self, dir, type):
+        if not os.path.isdir(dir):
+            raise OSError("Could not find unix cache directory '%s'" % (type, dir))     
      
+        pass
+    def __init__(self, cachedir=None, interval=600):
+        __CACHE = 'data/cache'
+        self.__interval = interval
+        self.__cachedir = None
+        if cachedir:
+            self.__cachedir = cachedir
+        else:
+            self.__cachedir = __CACHE # will be overwritten if actually found
+            for d in ['./','../','../../']: # we may located at same level or 1 or 2 below
+                dir = d + __CACHE
+                if os.path.isdir(dir):
+                    self.__cachedir = dir
+                    break
+        
+        if os.path.isdir(self.__cachedir):
+            print("Cachedir: %s" % self.__cachedir)
+        else:
+            raise OSError("Could not find cache directory '%s'" % self.__cachedir)
+
+        self.__checkCacheDir(self.__getUnixname(), 'unix')
+        self.__checkCacheDir(self.__getPMCname(), 'PMC')
+
+    def __getLDAPjson(self, getter, filename, key):
+        """
+            LDAP caching:
+        
+        
+            Read the json file.
+            If the file exists:
+                - if the interval check is < 0, return the file as json
+                - if the time since the last check is less than the interval, return the
file as json
+                - otherwise request the LDAP data, and return as json (update time of last
check)
+            else:
+                request the LDAP data.
+                If data was returned, cache it in a file as json
+            
+        """
+        modified = None
+        try:
+            with open(filename, 'r', encoding='utf-8') as f:
+                old = json.load(f)
+                if self.__interval < 0:
+                    print("File %s exists and time check has been disabled" % filename)
+                    return old
+                elif self.__interval == 0:
+                    print("File %s exists and check interval is zero" % filename)
+                else:
+                    diff = int(time.time() - getmtime(filename))
+                    if diff < self.__interval:
+                        print("Recently checked: %d < %d, skip download" % (diff, self.__interval))
+                        return old
+                    else:
+                        print("Not recently checked: %d > %d" % (diff, self.__interval))
+            try:
+                modified = old['modified']
+            except KeyError:
+                print("No modified key found for %s" % filename)
+        except IOError:
+            print("No file found " + filename)
+            old = {}
+        except ValueError as e:
+            print("Could not load " + filename + " " + str(e))
+            old = {}
+        success, new = getter(key, modified)
+        if success: # either new or updated
+            print("Saving %s (modified: %s)" % (filename, modified))
+            with open(filename, 'w', encoding='utf-8') as f:
+                json.dump(new, f, indent=1, sort_keys=True)
+        else:
+            if modified == None: # we have new data or there was none
+                print("No data for: %s" % (new))
+            else: # old data was OK
+                print("Touching %s (modified: %s)" %(filename, modified))
+                urlutils.touchFile(filename, time.time())
+            new = old
+        return new
+
+    def getPMC(self, key):
+        filename = self.__getPMCname(key+".json")
+        return self.__getLDAPjson(_getPMC, filename, key)
+    
+    def getUnix(self, key):
+        filename = self.__getUnixname(key+".json")
+        return self.__getLDAPjson(_getUnix, filename, key)
+    
+    def getPeople(self):
+        filename = self.__getname('people'+".json")
+        return self.__getLDAPjson(_getPeople, filename, '*')
+    
+    def getPerson(self, key):
+        # we don't cache these (too many)
+        success, data = _getPerson(key)
+        if success:
+            return data
+        else:
+            return {}
+
+if __name__ == '__main__':
+    # test the code by checking group consistency
+    lc = LdapCache(None)
+    members = lc.getUnix('member')['roster']
+    committers = lc.getUnix('committers')['roster']
+    lc = LdapCache(None, interval=1200)
+    people = lc.getPeople()
+    try: # Not yet available from Whimsy
+        with open('../../data/cache/member_info.json', 'r', encoding='utf-8') as f:
+            member_info = json.load(f)
+    except IOError:
+        member_info[:members] = Array.new
+        member_info[:ex_members] = Hash.new
+    for c in committers:
+        if not c in people:
+            print("Committer %s not in people" % c)
+    for c in people:
+        if not c in committers:
+            shell = people[c]['loginShell']
+            if not shell == '/usr/bin/false':
+                print("Person %s not in committers %s" % (c, shell))
+    for c in members:
+        if not c in people:
+            print("Member %s not in people" % c)
+        if not c in committers:
+            if c in member_info['members']:
+                status='current'
+            elif c in member_info['ex_members']:
+                status=member_info['ex_members'][c]
+            print("Member %s not in committers: %s" % (c, status))
+            if c in people:
+                print(people[c]['loginShell'])
+    print("Done")
\ No newline at end of file

Propchange: comdev/projects.apache.org/scripts/cronjobs/readldap.py
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message