gump-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From leosim...@apache.org
Subject svn commit: r124482 - /gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/__init__.py /gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/modeller.py
Date Fri, 07 Jan 2005 11:54:20 GMT
Author: leosimons
Date: Fri Jan  7 03:54:18 2005
New Revision: 124482

URL: http://svn.apache.org/viewcvs?view=rev&rev=124482
Log:
Lot of work on the modeller that I missed in my last commit. The visitor is wrong and this
needs to be rewritten, but I wanted it archived before deletion.
Modified:
   gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/__init__.py
   gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/modeller.py

Modified: gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/__init__.py
Url: http://svn.apache.org/viewcvs/gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/__init__.py?view=diff&rev=124482&p1=gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/__init__.py&r1=124481&p2=gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/__init__.py&r2=124482
==============================================================================
--- gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/__init__.py	(original)
+++ gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/__init__.py	Fri Jan  7 03:54:18
2005
@@ -77,13 +77,17 @@
     
     modeller_log = _get_logger(config.log_level, "modeller")
     modeller_loader = _get_modeller_loader(modeller_log, vfs)
+    modeller_normalizer = _get_modeller_normalizer()
     modeller_objectifier = _get_modeller_objectifier(modeller_log)
+    modeller_verifier = _get_modeller_verifier()
     
     mergefile = os.path.join(config.paths_work, "merge.xml")
     dropfile = os.path.join(config.paths_work, "dropped.xml")
 
     # create engine
-    engine = _Engine(log, db, modeller_loader, modeller_objectifier, config.paths_workspace,
mergefile, dropfile)
+    engine = _Engine(log, db, modeller_loader, modeller_normalizer,
+                     modeller_objectifier, modeller_verifier,
+                     config.paths_workspace, mergefile, dropfile)
     
     # run it
     engine.run()
@@ -173,11 +177,21 @@
     from gump.engine.modeller import Loader
     return Loader(log, vfs, mergefile, dropfile)
 
+def _get_modeller_normalizer():
+    """Provide a Normalizer implementation."""
+    from gump.engine.modeller import Normalizer
+    return Normalizer()
+
 def _get_modeller_objectifier(log):
     """Provide a Objectifier implementation."""
     from gump.engine.modeller import Objectifier
     return Objectifier(log)
 
+def _get_modeller_verifier():
+    """Provide a Verifier implementation."""
+    from gump.engine.modeller import Verifier
+    return Verifier()
+
 ###
 ### Classes
 ###
@@ -200,15 +214,22 @@
 class _Engine:
     """This is the core of the core of the pygump application."""
     
-    def __init__(self, log, db, workspace_loader, workspace_objectifier, workspace, merge_to=None,
drop_to=None):
+    def __init__(self, log, db, workspace_loader, workspace_normalizer,
+                 workspace_objectifier, workspace_verifier,
+                 workspace, merge_to=None, drop_to=None):
         """Store all config and dependencies as properties.
         
-        Arguments
+        Arguments:
+            
             - log -- the log to write debug and error messages to.
             - db -- the database to store all activity in.
             - workspace_loader -- the component providing the dom tree.
+            - workspace_normalizer -- the component transforming the dom tree
+                into a standard format
             - workspace_objectifier -- the component transforming the dom into
-                                       object form
+                object form
+            - workspace_verifier -- the component making sure the object model
+                is correct
 
             - workspace -- the resource containing the workspace xml.
             - merge_to -- the resource to write the merged workspace xml to.
@@ -217,7 +238,9 @@
         self.log = log
         self.db = db
         self.workspace_loader = workspace_loader
+        self.workspace_normalizer = workspace_normalizer
         self.workspace_objectifier = workspace_objectifier
+        self.workspace_verifier = workspace_verifier
 
         self.workspace = open_file_or_stream(workspace,'r')
         self.merge_to = open_file_or_stream(merge_to,'w')
@@ -226,20 +249,31 @@
     def run(self):
         """Perform a run."""
         try:
-            # 1) merge workspace into big DOM tree
-            (dom, dropped_nodes) = self.workspace_loader.get_workspace_tree(self.workspace)
-            
-            # 2) write the merged tree out to a new xml file
-            self._write_merge_files(dom, dropped_nodes)
+            # * merge workspace into big DOM tree
+            (domtree, dropped_nodes) = self.workspace_loader.get_workspace_tree(self.workspace)
+            # * clean it up and structure it properly
+            self.workspace_normalizer.normalize(domtree)
+            # * write the merged, normalized tree out to a new xml file
+            self._write_merge_files(domtree, dropped_nodes)
+            # * convert that DOM tree into python objects
+            workspace = self.workspace_objectifier.get_workspace(domtree)
+            # * we're done with the xml stuff, allow GC
+            domtree.unlink()
+            for node in dropped_nodes:
+                node.unlink()
+            # * verify that our model is correct (for example, that it has
+            #   no circular dependencies)
+            self.workspace_verifier.verify(domtree)
+            # * store those objects in the database
+
+            raise RuntimeError, "Not Implemented!"
             
-            # 2) convert that DOM tree into python objects
-            workspace = self.workspace_objectifier.get_workspace(dom.documentElement)
-            # 3) store those objects in the database
             self.store_workspace(self.workspace) #TODO
-            # 4) determine the tasks to perform
+
+            # * determine the tasks to perform
             self.tasks = self.create_ordered_tasklist() #TODO
             
-            # 5) now make the workers perform those tasks
+            # * now make the workers perform those tasks
             self.create_workers() #TODO
             self.start_workers() #TODO
             self.wait_for_workers() #TODO

Modified: gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/modeller.py
Url: http://svn.apache.org/viewcvs/gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/modeller.py?view=diff&rev=124482&p1=gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/modeller.py&r1=124481&p2=gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/modeller.py&r2=124482
==============================================================================
--- gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/modeller.py	(original)
+++ gump/branches/Dec04MajorCleanup/pygump/python/gump/engine/modeller.py	Fri Jan  7 03:54:18
2005
@@ -27,10 +27,121 @@
 
 from gump.model import *
 
+###
+### Utility methods shared between classes
+###
+def _find_element_text(parent, element_name):
+    """Retrieves the text contents of an element like <blah>text</blah>."""
+    return parent.getElementsByTagName(element_name).item(0).firstChild.data
+
+
+def _do_drop(to_remove, dropped_nodes=None):
+    """Remove node from its parent and add to dropped_nodes list."""
+    
+    node_to_remove_element_from = to_remove.parentNode
+    node_to_remove_element_from.removeChild(to_remove)
+    if dropped_nodes:
+        dropped_nodes.append(to_remove)
+
+
+def _find_ancestor_by_tag(node, tagName):
+    """Walk up the DOM hierarchy to locate an element of the specified tag."""
+    parent = node
+    while parent.nodeType == dom.Node.ELEMENT_NODE:
+        if parent.tagName == tagName:
+            return parent
+        parent = parent.parentNode
+        if not parent:
+            return None
+
+
+def _find_document_containing_node(node):
+    """Walk up the DOM hierarchy to locate a Document node."""
+    parent = node
+    while not parent.nodeType == dom.Node.DOCUMENT_NODE:
+        parent = parent.parentNode
+        if not parent: # really ought not happen I think...
+            raise ModellerError, "Cannot find document containing this node!"
+    
+    return parent
+
+
+def _find_project_containing_node(node):
+    """Walk up the DOM hierarchy to locate a <project> Element."""
+    return _find_ancestor_by_tag("project")
+
+
+def _find_module_containing_node(node):
+    """Walk up the DOM hierarchy to locate a <module> Element."""
+    return _find_ancestor_by_tag("module")
+
+
+def _find_repository_containing_node(node):
+    """Walk up the DOM hierarchy to locate a <repository> Element."""
+    return _find_ancestor_by_tag("repository")
+
+
+def _import_node(target_node, new_node):
+    """Combines two DOM trees together.
+
+    The second argument is merged into the first argument, which is then
+    returned.
+    """
+    self._import_attributes(target_node, new_node)
+    self._import_children(target_node, new_node)
+
+    
+def _import_attributes(target_node, new_node):
+    """Copy all attributes from the new node to the target node."""
+    
+    new_attributes = new_node.attributes
+    if new_attributes:
+        #if new_attributes.length > 0:
+        i = 0
+        while i < new_attributes.length: # for loops gives a KeyError,
+            att = new_attributes.item(i) #   seems like a minidom bug!
+            i = i + 1
+            if not att: continue
+
+            name = att.name.__str__()
+            value = new_node.getAttribute(name).__str__()
+            target_node.setAttribute(name, value)
+
+
+def _import_children(target_node, new_node, filter=None):
+    """Copy all children from the new node to the target node."""
+    new_elements = new_node.childNodes
+    if new_elements and new_elements.length > 0:
+        for child in new_elements:
+            if filter:
+                if filter.exclude(child):
+                    continue # skip this one
+            clone = child.cloneNode(True)
+            target_node.appendChild(clone)
+
+###
+### Classes
+###
+
+class _TagNameFilter:
+    """Filter for use with _import_children."""
+    def __init__(self, excludedTags):
+        self.excludedTags = excludedTags
+
+    def exclude(node):
+        if not child.nodeType == dom.Node.ELEMENT_NODE:
+            return False
+        if node.tagName in self.excludedTags:
+            return True
+        
+        return False
+
+
 class ModellerError(Exception):
     """Generic error thrown for all internal Modeller module exceptions."""
     pass
 
+
 class Loader:
     """Parses and resolves Gump XML metadata.
 
@@ -159,47 +270,16 @@
         stream.close() # close file immediately, we're done!
         new_root = new_dom.documentElement
         
-        # we succeeded loading the new document, get rid of the href
+        # we succeeded loading the new document, get rid of the href, save it
+        # as "resolved"
         node.removeAttribute('href')
+        node.setAttribute('resolved-from-href', href)
         
-        self._import_node(node, new_root)
+        _import_node(node, new_root)
         
         # we're done with the file now, allow GC
         new_root.unlink()
     
-    def _import_node(self, target_node, new_node):
-        """Combines two DOM trees together.
-
-        The second argument is merged into the first argument, which is then
-        returned.
-        """
-        self._import_attributes(target_node, new_node)
-        self._import_children(target_node, new_node)
-    
-    def _import_attributes(self, target_node, new_node):
-        """Copy all attributes from the new node to the target node."""
-        
-        new_attributes = new_node.attributes
-        if new_attributes:
-            #if new_attributes.length > 0:
-            i = 0
-            while i < new_attributes.length: # for loops gives a KeyError,
-                att = new_attributes.item(i) #   seems like a minidom bug!
-                i = i + 1
-                if not att: continue
-
-                name = att.name.__str__()
-                value = new_node.getAttribute(name).__str__()
-                target_node.setAttribute(name, value)
-    
-    def _import_children(self, target_node, new_node):
-        """Copy all children from the new node to the target node."""
-        new_elements = new_node.childNodes
-        if new_elements and new_elements.length > 0:
-            for child in new_elements:
-                clone = child.cloneNode(True)
-                target_node.appendChild( clone )
-    
     def _drop_module_or_project(self, node, dropped_nodes):
         """Finds the project associated with this node and removes it.
         
@@ -207,126 +287,439 @@
         instead. If there is no module either, an exception is raised.
         """
         
-        project = self._find_project_containing_node(node)
+        project = _find_project_containing_node(node)
         if project:
-            doc = self._find_document_containing_node(project)
-            module = self._find_module_containing_node(project)
+            doc = _find_document_containing_node(project)
+            module = _find_module_containing_node(project)
             modulename = module.getAttribute("name")
             comment = doc.createComment(" Part of module: %s " % modulename)
             project.appendChild(comment)
             name = project.getAttribute("name")
             self.log.warning("Dropping project '%s' from module '%s' because of href resolution
error!" % (name , modulename))
 
-            self._do_drop(project, dropped_nodes)
+            _do_drop(project, dropped_nodes)
         else:
-            module = self._find_module_containing_node(node)
+            module = _find_module_containing_node(node)
             if module:
-                doc = self._find_document_containing_node(module)
+                doc = _find_document_containing_node(module)
                 name = module.getAttribute("name")
                 self.log.warning("Dropping module '%s' because of href resolution error!"
% name)
 
-                self._do_drop(project, dropped_nodes)
+                _do_drop(project, dropped_nodes)
             else:
                 raise ModellerError, \
                       "Problematic node has no parent <project/> or " + \
                       "<module/>, unable to recover! Node:\n%s" \
                       % node.toprettyxml()
+
+
+class Normalizer:
+    """Turns a messy gump DOM workspace into a simplified and normalized form.
     
-    def _do_drop(self, to_remove, dropped_nodes):
-        """Remove node from its parent and add to dropped_nodes list."""
+    The normalized form is as follows:
         
-        node_to_remove_element_from = to_remove.parentNode
-        node_to_remove_element_from.removeChild(to_remove)
-        dropped_nodes.append(to_remove)
+        <workspace name="1">
+          <other-stuff .../>
+          
+          <repositories>
+            <repository name="1">
+              <full-repo-definition-without-modules-or-projects/>
+            </repository>
+            <repository name="2" ...>
+            <repository name="..." ...>
+            <repository name="N" ...>
+          </repositories>
+          
+          <modules>
+            <module name="1">
+              <repository name=""/><!-- not "url"... -->
+              <full-module-definition-without-repositories-or-projects/>
+            </module>
+            <module name="2" ...>
+            <module name="..." ...>
+            <module name="N" ...>
+          </modules>
+          
+          <projects>
+            <project name="1">
+              <module name=""/>
+              <full-project-definition-without-repositories-or-modules/>
+              
+              <depend name="1" id="one-id-only"/>
+              <depend name="1" id="one-id-only-per-tag"/>
+              <depend name="2" optional="true"/>
+              <depend name="..." ...>
+              <depend name="N" ...>
+            </project>
+            <project name="2" ...>
+            <project name="..." ...>
+            <project name="N" ...>
+          </projects>
+          
+        </workspace>
     
-    def _find_document_containing_node(self, node):
-        """Walk up the DOM hierarchy to locate a Document node."""
-        parent = node
-        while not parent.nodeType == dom.Node.DOCUMENT_NODE:
-            parent = parent.parentNode
-            if not parent: # really ought not happen I think...
-                raise ModellerError, "Cannot find document containing this node!"
-        
-        return parent
-    
-    def _find_project_containing_node(self, node):
-        """Walk up the DOM hierarchy to locate a <project> Element."""
-        
-        parent = node
-        while parent.nodeType == dom.Node.ELEMENT_NODE:
-            if parent.tagName == "project":
-                return parent
-            parent = parent.parentNode
-            if not parent:
-                return None
-    
-    def _find_module_containing_node(self, node):
-        """Walk up the DOM hierarchy to locate a <module> Element."""
-        parent = node
-        while parent.nodeType == dom.Node.ELEMENT_NODE:
-            if parent.tagName == "module":
-                return parent
-            parent = parent.parentNode
-            if not parent:
-                return None
+    Note that the introduction of the Notifier makes pygump more flexible than
+    the current gump model is specced in several ways:
 
-class Objectifier:
-    """Turns DOM workspace into Pythonified workspace."""
+        * All named elements (repositories, modules, projects) can appear
+        anywhere within the graph. This allows, for example, repositories defined
+        within projects or modules.
+        
+        * Relationships between named elements can be established "by inverse".
+        The "child" (module to repository, project to module, ...) element can
+        appear outside the parent, then reference its parent by name. For example,
+        
+          <project name="bootstrap-ant">
+            <module name="ant"/>
+          </project>
+        
+        is valid. Of course, the reverse
+        
+          <module name="ant>
+            <project name="bootstrap-ant"/>
+          </module>
+        
+        is also still valid.
     
+    TODO: support for 0.4 model and earlier...
+    """
     def __init__(self, log):
-        """Store all settings and dependencies as properties."""
         self.log = log
-
-    def get_workspace(self, dom):
-        """Transforms a workspace xml document into object form.
-        
-        Travels the entire document tree, converting everything it finds."""
+    
+    def normalize(self, olddoc):
+        self.olddoc = olddoc
+        self.oldroot = olddoc.documentElement
+        self.impl = dom.getDOMImplementation()
+        self.newdoc = self.impl.createDocument(None, "workspace", None)
+        self.newroot = newdoc.documentElement
+        
+        self._copy_workspace_root_stuff()
+        self._populate_newroot()
+        self._parse_maven_projects()
+
+        self._normalize_repositories()
+        self._normalize_modules()
+        self._normalize_projects()
+        
+        self._normalize_dependencies()
+        
+        doc = self.newdoc
+        # allow GC
+        self.repositories = None
+        self.modules = None
+        self.projects = None
+        self.olddoc.unlink()
+        self.olddoc = None
+        self.oldroot = None
+        self.newroot = None
+        self.newdoc = none
         
-        workspace = self._create_workspace(dom)
-        self._create_repositories(workspace, dom)
+        return doc
+    
+    def _copy_workspace_root_stuff(self):
+        """Copies over the unnamed config bits and properties."""
+        # copy all ws attributes
+        _import_attributes(self.newroot, self.oldroot)
+        
+        # these elements are to be filtered out completely
+        # at all levels
+        exclude = ["repositories",
+                   "repository",
+                   "modules",
+                   "module",
+                   "projects",
+                   "project"]
+        
+        # try to avoid cloning most of them
+        filter = _TagNameFilter(exclude)
+        _import_children(self.newroot, self.oldroot, filter)
+        
+        # now get rid of the excluded tags that were lower down the tree
+        # (for example in a <profile/> or somethin')
+        self._clean_out_by_tag( self.newroot, exclude )
+        
+    def _populate_newroot(self):
+        """Creates the main containers like <repositories/>."""
+        self.repositories = self.impl.createElement("repositories")
+        self.newroot.appendChild(self.repositories)
 
-        raise RuntimeError, "not implemented!" # TODO
-        #self._create_modules(workspace.repositories, dom)
-        #self._create_projects(workspace, dom)
+        self.modules = self.impl.createElement("modules")
+        self.newroot.appendChild(self.modules)
         
-        return workspace
+        self.projects = self.impl.createElement("projects")
+        self.newroot.appendChild(self.projects)
     
-    def _create_workspace(self, root):
-        return Workspace(root.getAttribute('name'))
+    def _parse_maven_projects(self):
+        """Looks for <project type="maven"> and converts those."""
         
-    def _create_repositories(self, workspace, root):
-        """Creates all repositories and adds them to the workspace."""
+        projects = self.projects.getElementsByTagName("project")
+        for project in projects:
+            if not project.getAttribute("type") == "maven":
+                continue
+            
+            self._parse_maven_project(project)
+    
+    def _parse_maven_project(self, project):
+        if True: return
+
+        self._resolve_maven_imports(project)
         
-        repository_definitions = self._find_repository_definitions(root)
-        undefined = [] # store repositories that are referenced by name
+        id = _find_element_text(project, "id")
+        groupid = _find_element_text(project, "groupId")
+        name = "%s-%s" % (groupid,id)
+
+        title = _find_element_text(project, "title")
+        url = _find_element_text(project, "url")
+        cvsweb = _find_element_text(project.getElementsByTagName("repository").item(0), "url")
+        description = _find_element_text(project, "description")
+        
+        repository = _find_element_text(project, "gumpRepositoryId")
+        if not repository:
+            # create repository and module
+            connection = _find_element_text(project.getElementsByTagName("repository").item(0),
"connection")
+            connection = connection[4:] # get rid of "scm:"
+            provider = connection[:connection.index(':')] # "cvs" or "svn" or "starteam"
+            if provider.upper() == "cvs".upper():
+                repository = connection[connection.index(':')+1:]
+                parts = repository.split(':')
+                user = parts[1][:connection.index('@')]
+                host = parts[1][connection.index('@')+1:]
+                path = parts[2]
+                module = parts[3]
+
+                            
+        #new_project = self.newdoc.createElement("project")
+        #new_project.setAttribute("name", name)
+        #new_command = self.newdoc.createElement("maven")
+        #new_project.appendChild(new_command)
+    
+    def _resolve_maven_imports(self, project):
+        pass #TODO
+    
+    def _normalize_repositories(self):
+        repos = self._get_list_merged_by_name("repository")
+        exclude = ["project", "module", "repository"];
+        for repo in repos:
+            clone = repo.clone(True)
+            self._clean_out_by_tag(clone, exclude)
+            self.repositories.appendChild(clone)
+    
+    def _normalize_modules(self):
+        modules = self._get_list_merged_by_name("module")
         
-        for repository_definition in repository_definitions:
-            if not repository_definition.hasChildNodes():
-                # we hope this repository gets fully defined later, skip for now
-                if not repository.getAttribute("name"):
-                    #TODO: implement recovery
-                    raise ModellerError, "Encountered a repository without a name!"
-                undefined.append(repository_definition)
+        exclude = ["project", "module", "repository"];
+        for module in modules:
+            repository = self._find_repository_for_module(module)
+            if not repository:
+                name = module.getAttribute("name")
+                self.log.warn("Dropping module '%s' because no corresponding repository could
be found!" % name)
                 continue
             
-            repository = self._create_repository(workspace, repository_definition)
-            # TODO: detect overrides here
-            workspace.repositories[repository.name] = repository
-        
-        # TODO: add support for maven repository definitions here as found
-        # inside maven project.xml files...
-        
-        # walk the undefined repository list to make sure they're all defined
-        # now. If that's not the case, we'll complain about it right here.
-        for repository_definition in undefined:
-            name = repository_definition.getAttribute("name")
-            if not workspace.repositories.has_key(name):
-                # TODO: drop associated modules and projects instead
-                raise ModellerError, "Repository '%s' is referenced but never defined!" %
name
+            clone = module.clone(True)
+            self._clean_out_by_tag( clone, exclude )
+            reporef = self.newdoc.createElement("repository")
+            reporef.setAttribute("name", repository.getAttribute("name") )
+            module.appendChild(reporef)
+            
+            self.modules.appendChild(module)
+
+    def _find_repository_for_module(self, module):
+        repo = None
+
+        # look within module
+        repos = module.getElementsByTagName("repository")
+        if repos.length > 0:
+            return repos.item(0)
+        
+        # look upward
+        return _find_repository_containing_node(module)
+
+    def _normalize_projects(self):
+        projects = self._get_list_merged_by_name("project")
+        exclude = ["project", "module", "repository"];
+        for project in projects:
+            module = self._find_module_for_project(module)
+            if not module:
+                name = project.getAttribute("name")
+                self.log.warn("Dropping project '%s' because no corresponding module could
be found!" % name)
+                continue
+            
+            clone = project.clone(True)
+            self._clean_out_by_tag( clone, exclude )
+            moduleref = self.newdoc.createElement("module")
+            moduleref.setAttribute("name", module.getAttribute("name") )
+            project.appendChild(moduleref)
+            
+            self.projects.appendChild(project)
+    
+    def _find_module_for_project(self, project):
+        repo = None
+
+        # look within module
+        modules = project.getElementsByTagName("module")
+        if modules.length > 0:
+            return modules.item(0)
+        
+        # look upward
+        return _find_module_containing_node(project)
+
+    def _normalize_dependencies(self):
+        """Converts <depend/> and <option/> elements into normalized form."""
+        for project in self.projects:
+            self._normalize_optional_depend(project)
+            dependencies = project.getElementsByTagName("depend")
+            if dependencies.length > 0:
+                self._normalize_depend_inside_ant(project, dependencies)
+                self._normalize_depend_on_multiple_ids(project, dependencies)
+
+    def _normalize_optional_depend(self, project):
+        """Replace an <option/> with a <depend optional=""/>."""
+        options = project.getElementsByTagName("option")
+        if options.length > 0:
+            for option in options:
+                depend = self.newdoc.createElement("depend")
+                _import_attributes(depend, option)
+                depend.setAttribute("optional", "true")
+                project.appendChild(depend)
+                _do_drop(option)
+
+    def _normalize_depend_inside_ant(self, project, dependencies):
+        """Split <depend/> inside <ant/> out into a <depend/> and a <property/>."""
+        for dependency in dependencies:
+            if dependency.parentNode.tagName in ["ant","maven"]:
+                new_dependency = dependency.clone(True)
+                new_dependency.removeAttribute("property")
+                project.appendChild(new_dependency)
+                
+                new_property = self.newdoc.createElement("property")
+                new_property.setAttribute("name", dependency.getAttribute("property"))
+                new_property.setAttribute("project", dependency.getAttribute("project"))
+                new_property.setAttribute("reference", "jarpath")
+                if dependency.getAttribute("id"):
+                    new_property.setAttribute("id", dependency.getAttribute("id"))
+                
+                dependency.parentNode.appendChild(new_property)
+                _do_drop(dependency)
+
+    def _normalize_depend_on_multiple_ids(self, project, dependencies):
+        """Split one <depend/> out into multiple, one for each id."""
+        for dependency in dependencies:
+            ids = dependency.getAttribute("name")
+            if not ids: continue
+            if ids.find(",") == -1: continue
+            
+            project.removeChild(dependency)
+            list = ids.split(",")
+            for id in list:
+                new_dependency = dependency.clone(True)
+                new_dependency.setAttribute("ids",id)
+                project.appendChild(new_dependency)
+
+    def _clean_out_by_tag(self, root, exclude):
+        for tagname in exclude:
+            elems_to_remove = root.getElementsByTagName(tagname)
+            if elems_to_remove.length > 0:
+                for to_remove in elems_to_remove:
+                    _do_drop(to_remove)
+        
+    def _get_list_merged_by_name(self, tagName):
+        list = self.oldroot.getElementsByTagName(tagName)
+        newlist = {}
+        for elem in list:
+            name = elem.getAttribute('name')
+            if not name:
+                self.log.warning( "Dropping a %s because it has no name!" % tagName )
+                continue
+            
+            if newlist.has_key(name):
+                _import_node(newlist[name], elem)
+            else:
+                clone = elem.clone(True)
+                newlist[name] = clone
         
-        undefined = None # clean up just to be sure...
+        return newlist.values
+
+
+class Objectifier:
+    """Turns a *normalized* gump DOM workspace into a pythonified workspace.
+
+    The input for the objectifier is a (potentially rather big) DOM tree that
+    contains normalized gump project definitions. From this tree, it starts
+    building a python object model graph consisting of instances of the
+    classes found in the gump.model package.
+
+    Also note that the Objectifier is purely single-threaded, since it stores
+    intermediate results during parsing as properties for convenience.
+    """
     
-    def _create_repository(self, workspace, repository_definition):
+    def __init__(self, log):
+        """Store all settings and dependencies as properties."""
+        self.log = log
+
+    def get_workspace(self, domtree):
+        """Transforms a workspace xml document into object form."""
+        
+        self.root = domtree.documentElement
+
+        self._find_repository_definitions()
+        self._find_module_definitions()
+        self._find_project_definitions()
+        
+        self._create_workspace()
+        self._create_repositories()
+        self._create_modules()
+        self._create_projects()
+
+        workspace = self.workspace
+        self.workspace = None
+        self.root = None
+        return workspace
+    
+    def _create_workspace(self):
+        self.workspace = Workspace(self.root.getAttribute('name'))
+    
+    ###
+    ### Searching
+    ###
+    def _find_repository_definitions(self):
+        """Retrieves a list of <repository/> elements."""
+        children = self.root.childNodes
+        for child in children:
+            if not child.nodeType == dom.Node.ELEMENT_NODE: continue
+            if child.tagName == "repositories":
+                self.repository_definitions = child.childNodes
+                break
+
+    def _find_module_definitions(self):
+        """Retrieve a list of <module/> elements."""
+        children = self.root.childNodes
+        for child in children:
+            if not child.nodeType == dom.Node.ELEMENT_NODE: continue
+            if child.tagName == "modules":
+                self.module_definitions = child.childNodes
+                break
+
+    def _find_project_definitions(self):
+        """Retrieve a list of <project/> elements."""
+        children = self.root.childNodes
+        for child in children:
+            if not child.nodeType == dom.Node.ELEMENT_NODE: continue
+            if child.tagName == "projects":
+                self.project_definitions = child.childNodes
+                break
+
+    ###
+    ### Repository parsing
+    ###
+    def _create_repositories(self):
+        """Creates all repositories and adds them to the workspace."""
+
+        for repository_definition in self.repository_definitions:
+            if not repository_definition.nodeType == dom.Node.ELEMENT_NODE: continue
+            repository = self._create_repository(repository_definition)
+            self.workspace.repositories[repository.name] = repository
+    
+    def _create_repository(self, repository_definition):
         name = repository_definition.getAttribute("name")
         self.log.debug("Converting repository definition '%s' into object form." % name)
         
@@ -353,32 +746,32 @@
         repository = None
         type = repository_definition.getAttribute("type").upper()
         if type == "CVS":
-            repository = self._create_cvs_repository(workspace, name, title, home_page, cvsweb,
redistributable, repository_definition)
+            repository = self._create_cvs_repository(name, title, home_page, cvsweb, redistributable,
repository_definition)
         elif type == "SVN":
-            repository = self._create_svn_repository(workspace, name, title, home_page, cvsweb,
redistributable, repository_definition)
+            repository = self._create_svn_repository(name, title, home_page, cvsweb, redistributable,
repository_definition)
         else:
             raise ModellerError, "Unknown repository type '%s' for repository '%s'" % (type,
name)
+        #TODO perforce support
         
         return repository
-
     
-    def _create_cvs_repository(self, workspace, name, title, home_page, cvsweb, redistributable,
repository_definition):
-        hostname = self._find_element_text(repository_definition, "hostname")
-        path = self._find_element_text(repository_definition, "path")
+    def _create_cvs_repository(self, name, title, home_page, cvsweb, redistributable, repository_definition):
+        hostname = _find_element_text(repository_definition, "hostname")
+        path = _find_element_text(repository_definition, "path")
 
         method = CVS_METHOD_PSERVER
-        try: method = self._find_element_text(repository_definition, "method")
+        try: method = _find_element_text(repository_definition, "method")
         except: pass
         
         user = None
-        try: user = self._find_element_text(repository_definition, "user")
+        try: user = _find_element_text(repository_definition, "user")
         except: pass
 
         password = None
-        try: password = self._find_element_text(repository_definition, "password")
+        try: password = _find_element_text(repository_definition, "password")
         except: pass
         
-        repository = CvsRepository(workspace,
+        repository = CvsRepository(self.workspace,
                                    name,
                                    hostname,
                                    path,
@@ -391,18 +784,18 @@
                                    password = password)
         return repository
 
-    def _create_svn_repository(self, workspace, name, title, home_page, cvsweb, redistributable,
repository_definition):
-        url = self._find_element_text(repository_definition, "url")
+    def _create_svn_repository(self, name, title, home_page, cvsweb, redistributable, repository_definition):
+        url = _find_element_text(repository_definition, "url")
 
         user = None
-        try: user = self._find_element_text(repository_definition, "user")
+        try: user = _find_element_text(repository_definition, "user")
         except: pass
 
         password = None
-        try: password = self._find_element_text(repository_definition, "password")
+        try: password = _find_element_text(repository_definition, "password")
         except: pass
         
-        repository = SvnRepository(workspace,
+        repository = SvnRepository(self.workspace,
                                    name,
                                    url,
                                    title = title,
@@ -412,29 +805,258 @@
                                    user = user,
                                    password = password)
         return repository
+    
+    ###
+    ### Module parsing
+    ###
+
+    def _find_repository_for_module(self, module_definition):
+        name = module_definition.getAttribute("name")
+        repo_name = module_definition.getElementsByTagName("repository").item(0).getAttribute("name")
+        repo = self.workspace.repositories[repo_name]
+        return repo
+
+    def _create_modules(self):
+        for module_definition in self.module_definitions:
+            if not module_definition.nodeType == dom.Node.ELEMENT_NODE: continue
+            module = self._create_module(module_definition)
+            module.repository.modules[module.name] = module
+            self.workspace.modules[module.name] = module
+        
+    def _create_module(self, module_definition):
+        name = module_definition.getAttribute("name")
+        repository = self._find_repository_for_module(module_definition)
+        
+        # parse the attributes and elements common to all modules
+        url = None
+        try: url = _find_element_text(module_definition, "url")
+        except: pass
+        
+        description = None
+        try: description = _find_element_text(module_definition, "description")
+        except: pass
+        
+        # now delegate to _create methods for specific modules to do the rest
+        module = None
+        if isinstance(repository, CvsRepository):
+            module = self._create_cvs_module(repository, name, url, description, module_definition)
+        elif isinstance(repository, SvnRepository):
+            module = self._create_svn_module(repository, name, url, description, module_definition)
+        else:
+            raise ModellerError, "Unknown repository type '%s' referenced by module '%s'"
% (repository.__class__,name)
+        #TODO perforce support
+        return module
+    
+    def _create_cvs_module(self, repository, name, url, description, module_definition):
+        tag = module_definition.getAttribute("tag")
+        return CvsModule(repository, name, tag, url, description)
+    
+    def _create_svn_module(self, repository, name, url, description, module_definition):
+        path = module_definition.getAttribute("path")
+        return SvnModule(repository, name, path, url, description)
+    
+    ###
+    ### Project parsing
+    ###
+    
+    def _find_module_for_project(self, project_definition):
+        name = project_definition.getAttribute("name")
+        module_name = project_definition.getElementsByTagName("module").item(0).getAttribute("name")
+        module = self.workspace.modules[module_name]
+        return module
+
+    def _create_projects(self):
+        for project_definition in self.project_definitions:
+            if not project_definition.nodeType == dom.Node.ELEMENT_NODE: continue
+            project = self._create_project(project_definition)
+            project.module.projects[project.name] = project
+            self.workspace.projects[project.name] = project
+
+            self._create_commands(project,project_definition)
+            self._create_outputs(project,project_definition)
+
+        # wire up dependencies only after projects have been created
+        for project_definition in self.project_definitions:
+            if not project_definition.nodeType == dom.Node.ELEMENT_NODE: continue
+            self._create_dependencies(project_definition)
+        
+    def _create_project(self, project_definition):
+        name = project_definition.getAttribute("name")
+        module = self._find_module_for_project(project_definition)
+        
+        project = Project(module, name)
+        return project
+
+    def _create_commands(self, project, project_definition):
+        rmdirs = project_definition.getElementsByTagName("delete")
+        for cmd in rmdirs:
+            dir = cmd.getAttribute("dir")
+            project.add_command(Rmdir(project, dir))
+            
+        mkdirs = project_definition.getElementsByTagName("mkdir")
+        for cmd in mkdirs:
+            dir = cmd.getAttribute("dir")
+            project.add_command(Mkdir(project, dir))
+        
+        scripts = project_definition.getElementsByTagName("script")
+        for cmd in scripts:
+            name = cmd.getAttribute("name")
+            args = []
+            for arg in cmd.getElementsByTagName("arg"):
+                name = arg.getAttribute("name")
+                value = arg.getAttribute("value")
+                args.append((name, value))
+                
+            project.add_command(Script(project, name, args))
+        
+        #TODO more commands
+    
+    def _create_outputs(self, project, project_definition):
+        homes = project_definition.getElementsByTagName("home")
+        if homes.length > 0:
+            home = homes.item(0).getAttribute("directory")
+            project.add_output(Homedir(project,home))
+        
+        jars = project_definition.getElementsByTagName("jar")
+        for jar in jars:
+            name = jar.getAttribute("name")
+            id = jar.getAttribute("id")
+            add_to_bootclass_path = jar.getAttribute("type") == "boot"
+            project.add_output(Jar(project,name,id,add_to_bootclass_path))
+            
+        #TODO more outputs
+    
+    def _create_dependencies(self, project_definition):
+        name = project_definition.getAttribute("name")
+        project = self.workspace.projects[name]
+            
+        dependencies = project_definition.getElementsByTagName("depend")
+        for dependency in dependencies:
+            self._add_dependency(project, dependency)
+    
+    def _add_dependency(self, project, dependency):
+        dependency_name = dependency.getAttribute("project")
+        runtime = dependency.getAttribute("runtime") == "true"
+        inherit = dependency.getAttribute("inherit")
+        optional = dependency.getAttribute("optional") == "true"
+        
+        dependency_project = None
+        try:
+            dependency_project = self.workspace.projects[dependency_name]
+        except KeyError:
+            # we store the name instead. a Verifier should be used later to
+            # fix this error.
+            dependency_project = dependency_name
+        
+        id = dependency.getAttribute("id")
+        project.add_dependency(Dependency(dependency_project,project,optional,runtime,inherit,id))
+
+
+class Visitor:
+    def __init__(self):
+        # we keep a stack of the dependencies of a particular project,
+        # adding an item as we traverse the graph. In the case of a cycle,
+        # we track back through that stack to find it completely
+        self.groups = []
+        
+        # we keep a flat list of all the projects we visit. IF we visit
+        # a project twice, that indicates a cycle, since the topological
+        # sort must have failed
+        self.visited = []
+        
+        # when we find cycles, we store all the projects involved in this
+        # array
+        self.cycles = []
+        
+    def visit(self, project):
+        if project in self.visited:
+            self._find_cycle(project, [project], project)
+        else:
+            self.visited.append(project)
+            self.groups.append(project.dependencies)
+    
+    def done(self, numberOfProjects):
+        # check whether we visited all projects. If not,
+        # the stack in the Verifier was empty before its
+        # time, hence there were projects lying around
+        # with dependencies that weren't satisfied, hence
+        # we must have found a cycle!
+        assert (numberOfProjects > self.visited) == \
+               (len(self.cycles) > 0)
+        
+        if len(self.cycles) > 0:
+            self._handle_cycles()
+    
+    def _find_cycle(self, project, cycle, first):
+        group = self.groups.pop
+        for dependency in group:
+            if dependency.dependee == first:
+                # that completes the cycle
+                self.cycles.append(cycle)
+                break
+            if dependency.dependee == project:
+                # this is the project that references us
+                project_in_cycle = dependency.dependency
+                cycle.append(project_in_cycle)
+                self._handle_cycle(project_in_cycle, cycle, first)
+    
+    def _handle_cycles(self):
+        pass # TODO: remove these projects and their dependendees
 
-    def _find_repository_definitions(self, root):
-        return root.getElementsByTagName("repository")
+class Verifier:
+    """Verifies an objectified gump workspace."""
 
-    def _find_element_text(self, parent, element_name):
-        return parent.getElementsByTagName(element_name).item(0).firstChild.data
+    def verify(self, workspace):
+        if True: return # TODO
+
+        visitor = Visitor()
+        self.topsortedTravesal(workspace, visitor)
+    
+    def topsortedTravesal(self, workspace, visitor):
+        self._set_indegrees(workspace)
+        # using a stack *should* ensure depth-first
+        stack = self._get_initial_stack(workspace)
+
+        while len(queue) > 0:
+            project = stack.pop
+            visitor.visit(project)
+            
+            for dependency in project.dependencies:
+                dependency.dependency.indegree -= 1
+                if dependency.dependency.indegrees == 0:
+                    stack.append(dependency.dependency)
+        
+        visitor.done(len(workspace.projects))
+        self._clear_indegrees(workspace)
     
-    def _create_modules(self, repositories, root):
-        module_definitions = self._find_module_definitions(root)
+    def _set_indegrees(projects):
+        """Set the number of in-degrees for each project.
         
-        for module_definition in module_definitions:
-            name = module_definition.getAttribute("name")
-            repository = self._find_repository_for_module(repositories, module_definition)
-            module = Module(repository)
-            if repository:
-                repository.modules[name] = module
-    
-    def _create_projects(self, modules, root):
-        project_definitions = self._find_project_definitions(root)
-        
-        for project_definition in project_definitions:
-            name = project_definition.getAttribute("name")
-            module = self._find_module_for_project(modules, project_definition)
-            project = Project(module)
-            if module:
-                module.projects[name] = project
\ No newline at end of file
+        The number of in-degrees is a measure of how many
+        dependees a project has. The key bit is that the
+        verifier decreases the number of in-degrees for each
+        project as a dependency is handled.
+        """
+        for project in workspace.projects:
+            project.indegree = 0
+        
+        for dependency in workspace.dependencies:
+            dependency.dependency.indegree += 1
+    
+    def _clear_indegrees(projects):
+        """Removes the in-degrees property from each project."""
+        
+        for project in workspace.proejcts:
+            del project.indegree
+
+    def _get_initial_stack(self, workspace):
+        """Get the projects with an in-degree of 0.
+        
+        In other words, get the projects without dependees.
+        """
+        stack = []
+        for project in workspace.projects:
+            if project.indegree == 0:
+                stack.append(project) 
+        
+        return stack
\ No newline at end of file

Mime
View raw message