ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1539040 - /ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/AnnotationDepUtils.java
Date Tue, 05 Nov 2013 15:57:23 GMT
Author: tmill
Date: Tue Nov  5 15:57:22 2013
New Revision: 1539040

URL: http://svn.apache.org/r1539040
Log:
Fixes CTAKES-257. Implements two different forms - Dependency Word (DW) and Grammatical Relation
and Word (GRW) from Nguyen et al. (2009) emnlp paper.

Added:
    ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/AnnotationDepUtils.java

Added: ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/AnnotationDepUtils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/AnnotationDepUtils.java?rev=1539040&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/AnnotationDepUtils.java
(added)
+++ ctakes/trunk/ctakes-dependency-parser/src/main/java/org/apache/ctakes/dependency/parser/util/AnnotationDepUtils.java
Tue Nov  5 15:57:22 2013
@@ -0,0 +1,331 @@
+package org.apache.ctakes.dependency.parser.util;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.log4j.Logger;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+public class AnnotationDepUtils {
+  private static Logger logger = Logger.getLogger(AnnotationDepUtils.class);
+
+  public static String getTokenRelTreeString(JCas jCas, List<ConllDependencyNode> nodes,
Annotation[] annotations, String[] labels){
+    return getTokenRelTreeString(jCas, nodes, annotations, labels, false);
+  }
+  
+  public static String getTokenRelTreeString(JCas jCas, List<ConllDependencyNode> nodes,
Annotation[] annotations, String[] labels, boolean getParent){
+    Map<ConllDependencyNode, SimpleTree> node2tree = new HashMap<ConllDependencyNode,
SimpleTree>();
+    ConllDependencyNode topNode = null;
+    
+    // create a SimpleTree object that corresponds to this dependency tree, where the
+    // root is the head of the sentence and the children are all the words such that the
parent
+    // is their head. In this case every word is represented by its relationship as well
as
+    // its word
+    for(ConllDependencyNode node : nodes){
+      if(node.getHead() == null){
+        topNode = node;
+        continue;
+        // do absolutely nothing with this -- it covers the whole sentence and has no useful
info
+//        continue;
+//      }else if(node.getHead().getHead() == null){
+//        topNode = node;
+      }
+           
+      SimpleTree curTree = null;
+      SimpleTree headTree = null;
+      if(!node2tree.containsKey(node)){
+        curTree = SimpleTree.fromString(String.format("(%s %s)", node.getDeprel(), node.getCoveredText()));
+        node2tree.put(node, curTree);
+      }else{
+        curTree = node2tree.get(node);
+      }
+
+
+      if(curTree.parent == null && node.getHead() != null){
+        if(node2tree.containsKey(node.getHead())){
+          headTree = node2tree.get(node.getHead());
+        }else{
+          String token = node.getHead().getHead() == null ? "TOP" : node.getHead().getCoveredText();
+          headTree = SimpleTree.fromString(String.format("(%s %s)", node.getHead().getDeprel(),
SimpleTree.escapeCat(token)));
+          node2tree.put(node.getHead(), headTree);
+        }
+
+        curTree.parent = headTree.children.get(0);
+        headTree.children.get(0).addChild(curTree);
+      }
+    }
+    
+    ConllDependencyNode highestHead = null;
+    ConllDependencyNode leftmostHead = null;
+    ConllDependencyNode rightmostHead = null;
+    List<SimpleTree> annotationNodes = Lists.newArrayList();
+    
+    // take the set of input annotations and the corresponding labels and insert them into
the SimpleTree
+    for(int i = 0; i < annotations.length; i++){
+      // get the node representing the head of this annotation
+      List<ConllDependencyNode> coveredNodes = JCasUtil.selectCovered(jCas, ConllDependencyNode.class,
annotations[i]);
+      if(coveredNodes == null || coveredNodes.size() == 0) continue;
+      ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(coveredNodes);
+      
+      // is this the highest node of all the annotations we're looking at?
+      if(highestHead == null || (distanceFromRoot(headNode) < distanceFromRoot(highestHead))){
+        highestHead = headNode;
+      }
+      if(leftmostHead == null || headNode.getBegin() < leftmostHead.getBegin()){
+        leftmostHead = headNode;
+      }
+      if(rightmostHead == null || headNode.getEnd() > rightmostHead.getEnd()){
+        rightmostHead = headNode;
+      }
+      
+      SimpleTree insertionPoint = node2tree.get(headNode);
+      SimpleTree insertingTree = new SimpleTree(insertionPoint.cat);
+      insertionPoint.cat = labels[i];
+      insertingTree.children = insertionPoint.children;
+      insertingTree.children.get(0).parent = insertingTree;
+      insertionPoint.children = new ArrayList<SimpleTree>();
+      insertionPoint.addChild(insertingTree);
+      insertingTree.parent = insertionPoint;
+      annotationNodes.add(insertionPoint);
+    }
+    if(highestHead == null) return null;
+    
+    SimpleTree root = node2tree.get(topNode);
+    SimpleTree leftmostNode = getLeftmostNode(root, Sets.newHashSet(labels));
+    SimpleTree rightmostNode = getRightmostNode(root, Sets.newHashSet(labels));
+    SimpleTree pet = getPathEnclosedTree(root, annotationNodes, leftmostNode, rightmostNode);
+    if(getParent && pet.parent != null) pet = pet.parent;
+    
+    String treeStr = pet.toString();
+    treeStr = treeStr.replaceAll("\\(([^\\(]+) \\)", "($1 nil)").toLowerCase();
+    
+    return treeStr;
+  }
+  
+  private static SimpleTree getRightmostNode(SimpleTree root, Set<String> labels) {
+    SimpleTree node = null;
+    
+    for(int i = root.children.size()-1; i >= 0; i--){
+      if(labels.contains(root.children.get(i).cat)){
+        node = root.children.get(i);
+        break;
+      }
+      node = getRightmostNode(root.children.get(i), labels);
+      if(node != null) break;
+    }
+    
+    return node;
+  }
+
+  private static SimpleTree getLeftmostNode(SimpleTree root, Set<String> labels) {
+    SimpleTree node = null;
+    
+    for(int i = 0; i < root.children.size(); i++){
+      if(labels.contains(root.children.get(i).cat)){
+        node = root.children.get(i);
+        break;
+      }
+      node = getLeftmostNode(root.children.get(i), labels);
+      if(node != null) break;
+    }
+    
+    return node;
+  }
+  
+  public static String getTokenTreeString(JCas jCas, List<ConllDependencyNode> nodes,
Annotation[] annotations, String[] labels, boolean getParent){
+    Map<ConllDependencyNode, SimpleTree> node2tree = getNodeTreeMap(nodes);
+    ConllDependencyNode topNode = getTopNode(nodes);
+    
+    ConllDependencyNode highestHead = null;
+    ConllDependencyNode leftmostHead = null;
+    ConllDependencyNode rightmostHead = null;
+    List<SimpleTree> annotationNodes = Lists.newArrayList();
+    // take the set of input annotations and the corresponding labels and insert them into
the SimpleTree
+    for(int i = 0; i < annotations.length; i++){
+      // get the node representing the head of this annotation
+      List<ConllDependencyNode> coveredNodes = JCasUtil.selectCovered(jCas, ConllDependencyNode.class,
annotations[i]);
+      if(coveredNodes == null || coveredNodes.size() == 0) continue;
+      ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(coveredNodes);
+      
+      // is this the highest node of all the annotations we're looking at?
+      if(highestHead == null || (distanceFromRoot(headNode) < distanceFromRoot(highestHead))){
+        highestHead = headNode;
+      }
+      if(leftmostHead == null || headNode.getBegin() < leftmostHead.getBegin()){
+        leftmostHead = headNode;
+      }
+      if(rightmostHead == null || headNode.getEnd() > rightmostHead.getEnd()){
+        rightmostHead = headNode;
+      }
+      
+      SimpleTree insertionPoint = node2tree.get(headNode);
+      SimpleTree insertingTree = new SimpleTree(insertionPoint.cat);
+      insertionPoint.cat = labels[i];
+      insertingTree.children = insertionPoint.children;
+      insertingTree.children.get(0).parent = insertingTree;
+      insertionPoint.children = new ArrayList<SimpleTree>();
+      insertionPoint.addChild(insertingTree);
+      insertingTree.parent = insertionPoint;
+      annotationNodes.add(insertionPoint);
+    }
+    if(highestHead == null) return null;
+    
+    SimpleTree root = node2tree.get(topNode);
+    SimpleTree leftmostNode = getLeftmostNode(root, Sets.newHashSet(labels));
+    SimpleTree rightmostNode = getRightmostNode(root, Sets.newHashSet(labels));
+    SimpleTree pet = getPathEnclosedTree(root, annotationNodes, leftmostNode, rightmostNode);
+    if(getParent && pet.parent != null) pet = pet.parent;
+    
+    String treeStr = pet.toString();
+    treeStr = treeStr.replaceAll("\\(([^\\(]+) \\)", "($1 nil)").toLowerCase();
+    
+    return treeStr;
+
+  }
+  
+  public static SimpleTree getPathEnclosedTree(SimpleTree root, List<SimpleTree> annotationNodes,
SimpleTree leftmostTree, SimpleTree rightmostTree){
+    SimpleTree pet = null;
+    // for the general case (>= 1 annotations) we need to first find the common set of
ancestors
+    Set<SimpleTree> commonAncestors = getAncestors(annotationNodes.get(0));
+    for(int i = 1; i < annotationNodes.size(); i++){
+      Set<SimpleTree> nodeAncestors = getAncestors(annotationNodes.get(i));
+      commonAncestors = Sets.intersection(commonAncestors, nodeAncestors);
+    }
+    // of the common set, which is the lowest?
+    SimpleTree lowestAncestor = null;
+    for(SimpleTree ancestor : commonAncestors){
+      if(lowestAncestor == null || distanceFromRoot(ancestor) > distanceFromRoot(lowestAncestor)){
+        lowestAncestor = ancestor;
+      }
+    }
+    // of the children of the lowest ancestors, which do not contain any of the annotations
we are
+    // interested in?
+    root = lowestAncestor;
+    SimpleTree curNode = leftmostTree;
+    SimpleTree lastNode = null;
+    while(curNode != root){
+      lastNode = curNode;
+      if(curNode == null || curNode.parent == null){
+        logger.error("Something weird.");
+      }
+      curNode = curNode.parent;
+      if(curNode == null || curNode.children == null){
+        logger.error("Something is null on the left side of the tree in PET!");
+        break;
+      }
+      while(curNode.children.get(0) != lastNode){
+        curNode.children.remove(0);
+      }
+    }
+    
+    curNode = rightmostTree;
+    lastNode = null;
+    while(curNode != root){
+      lastNode = curNode;
+      curNode = curNode.parent;
+      if(curNode == null){
+        logger.error("Something is null on the right side of the tree in PET!");
+        break;
+      }
+      if(curNode.children == null || curNode.children.size() == 0){
+        System.err.println("Help");
+      }
+      while(curNode.children.get(curNode.children.size()-1) != lastNode){
+        curNode.children.remove(curNode.children.size()-1);
+        if(curNode.children == null || curNode.children.size() == 0){
+          System.err.println("Help");
+        }
+      }
+    }
+    pet = lowestAncestor;
+    return pet;
+  }
+  
+  private static ConllDependencyNode getTopNode(List<ConllDependencyNode> nodes){
+    ConllDependencyNode topNode = null;
+    for(ConllDependencyNode node : nodes){
+      if(node.getHead() == null){
+        topNode = node;
+        break;
+      }
+    }
+    
+    return topNode;
+  }
+  
+  private static Map<ConllDependencyNode, SimpleTree> getNodeTreeMap(List<ConllDependencyNode>
nodes){
+    Map<ConllDependencyNode, SimpleTree> node2tree = Maps.newHashMap();
+    for(ConllDependencyNode node : nodes){
+      if(node.getHead() == null){
+//        topNode = node;
+        continue;
+        // do absolutely nothing with this -- it covers the whole sentence and has no useful
info
+//        continue;
+//      }else if(node.getHead().getHead() == null){
+//        topNode = node;
+      }
+           
+      SimpleTree curTree = null;
+      SimpleTree headTree = null;
+      if(!node2tree.containsKey(node)){
+        curTree = SimpleTree.fromString(String.format("(%s %s)", node.getDeprel(), node.getCoveredText()));
+        node2tree.put(node, curTree);
+      }else{
+        curTree = node2tree.get(node);
+      }
+
+
+      if(curTree.parent == null && node.getHead() != null){
+        if(node2tree.containsKey(node.getHead())){
+          headTree = node2tree.get(node.getHead());
+        }else{
+          String token = node.getHead().getHead() == null ? "TOP" : node.getHead().getCoveredText();
+          headTree = SimpleTree.fromString(String.format("(%s %s)", node.getHead().getDeprel(),
SimpleTree.escapeCat(token)));
+          node2tree.put(node.getHead(), headTree);
+        }
+
+        curTree.parent = headTree.children.get(0);
+        headTree.children.get(0).addChild(curTree);
+      }
+    }
+    return node2tree;
+  }
+  
+  private static Set<SimpleTree> getAncestors(SimpleTree tree){
+    Set<SimpleTree> ancestors = Sets.newHashSet();
+    while(tree != null){
+      ancestors.add(tree);
+      tree = tree.parent;
+    }
+    return ancestors;
+  }
+  
+  public static int distanceFromRoot(ConllDependencyNode node){
+    int dist = 0;
+    while(node.getHead() != null){
+      dist++;
+      node = node.getHead();
+    }
+    return dist;
+  }
+  
+  public static int distanceFromRoot(SimpleTree tree){
+    int dist = 0;
+    while(tree.parent != null){
+      dist++;
+      tree = tree.parent;
+    }
+    return dist;
+  }
+}



Mime
View raw message