ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1586665 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/
Date Fri, 11 Apr 2014 14:47:54 GMT
Author: tmill
Date: Fri Apr 11 14:47:54 2014
New Revision: 1586665

URL: http://svn.apache.org/r1586665
Log:
CTAKES-82: A variety of high-level syntactic and semantic features.

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeRelationsFeaturesExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ListFeaturesExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseFeatureExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLRelationFeaturesExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SyntacticParentFeatureExtractor.java

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeRelationsFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeRelationsFeaturesExtractor.java?rev=1586665&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeRelationsFeaturesExtractor.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/EventTimeRelationsFeaturesExtractor.java
Fri Apr 11 14:47:54 2014
@@ -0,0 +1,51 @@
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+public class EventTimeRelationsFeaturesExtractor implements
+    RelationFeaturesExtractor {
+
+  /*
+   * (non-Javadoc)
+   * @see org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor#extract(org.apache.uima.jcas.JCas,
org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation, org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation)
+   * This feature extractor uses existing event-time relations as features, e.g., for an
event-event classifier downstream
+   * At this point, it only does narrative container-based features -- arg1 is in a narrative
contrainer, arg2 is in a narrative container 
+   */
+  @Override
+  public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+      IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+    List<Feature> feats = Lists.newArrayList();
+    
+    for(BinaryTextRelation etc : JCasUtil.select(jCas, BinaryTextRelation.class)){
+      Annotation etcA1 = etc.getArg1().getArgument();
+      Annotation etcA2 = etc.getArg2().getArgument();
+      if(etcA1 instanceof TimeMention || etcA2 instanceof TimeMention){
+        if(etc.getCategory().equalsIgnoreCase("CONTAINS")){
+          if(etcA1.getBegin() == arg1.getBegin() && etcA1.getEnd() == arg1.getEnd()
||
+              etcA2.getBegin() == arg1.getBegin() && etcA2.getEnd() == arg1.getEnd()){
+            feats.add(new Feature("ARG1_IN_NC"));
+          }
+          if(etcA1.getBegin() == arg2.getBegin() && etcA1.getEnd() == arg2.getEnd()
||
+              etcA2.getBegin() == arg2.getBegin() && etcA2.getEnd() == arg2.getEnd()){
+            feats.add(new Feature("ARG2_IN_NC"));
+          }
+        }
+      }
+    }
+    
+    return feats;
+  }
+
+}

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ListFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ListFeaturesExtractor.java?rev=1586665&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ListFeaturesExtractor.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ListFeaturesExtractor.java
Fri Apr 11 14:47:54 2014
@@ -0,0 +1,130 @@
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+public class ListFeaturesExtractor implements RelationFeaturesExtractor {
+
+  /*
+   * (non-Javadoc)
+   * @see org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor#extract(org.apache.uima.jcas.JCas,
org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation, org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation)
+   * This feature extractor checks if each argument is part of a list, by looking
+   * in a parse tree at sibling categories, for commas, coordinators given an NP parent.
+   * Features include whether either arg is in a list and what list position (start, middle,
end),
+   * and whether the left sibling in the list is part of an existing relation, and if so,
whether
+   * that relation has the same other argument as the current proposed relation.
+   */
+  public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+      IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+    List<Feature> feats = Lists.newArrayList();
+    
+    feats.addAll(getArgFeats(jCas, arg1, arg2, "Arg1"));
+    feats.addAll(getArgFeats(jCas, arg2, arg2, "Arg2"));
+    return feats;
+  }
+
+  private static Collection<? extends Feature> getArgFeats(JCas jCas, IdentifiedAnnotation
primeArg,
+      IdentifiedAnnotation secondArg, String prefix) {
+    List<Feature> feats = Lists.newArrayList();
+    
+    List<TreebankNode> nodes = JCasUtil.selectCovered(TreebankNode.class, primeArg);
+    if(nodes.size() > 0){
+      TreebankNode node = nodes.get(0);
+      if(node.getBegin() == primeArg.getBegin() && node.getEnd() == primeArg.getEnd()){
+        HashSet<String> priorNPs = new HashSet<String>();
+        // we have a node with the exact span as the argument
+        // now check if it is an element of a list
+        // first move NNs up to their constituent
+        if(node.getNodeType().startsWith("NN")){
+          node = node.getParent();
+        }
+        TreebankNode parent = node.getParent();
+        if(parent == null) return feats;
+        int childIndex = -1;
+        for(int i = 0; i < parent.getChildren().size(); i++){
+          if(parent.getChildren(i) == node){
+            childIndex = i;
+            break;
+          }
+          priorNPs.add(getKey(parent.getChildren(i)));
+        }
+           
+        // cnoditions for this arg being an element of a list:
+        // 1) is NP
+        // 2) Parent is NP
+        // 3) left neighbor is , or right neighbor is , or both neigbors are ,
+        boolean lcComma=false, rcComma=false, lcAnd=false;
+        if(node.getNodeType().equals("NP") && parent.getNodeType().equals("NP")){
+          if(childIndex > 0 && parent.getChildren(childIndex-1).getNodeType().equals(",")){
+            // left child is ","
+            lcComma = true;
+          }
+          if(childIndex+1 < parent.getChildren().size() && parent.getChildren(childIndex+1).getNodeType().equals(",")){
+            rcComma = true;
+          }
+          if(childIndex+1 == parent.getChildren().size() && childIndex > 0 &&
parent.getChildren(childIndex-1).getNodeType().equals("CC")){
+            lcAnd = true;
+          }
+        }
+        if(lcComma && rcComma){
+          feats.add(new Feature(prefix + "_midlist", true));
+        }else if(childIndex==0 && rcComma){
+          feats.add(new Feature(prefix + "_startlist", true));
+        }else if(lcAnd){
+          feats.add(new Feature(prefix + "_endlist", true));
+        }
+        
+        if(lcComma || rcComma || lcAnd){
+          // somehow in a list
+          // check to see if any element of the list is already part of a relation
+          for(BinaryTextRelation otherRel : JCasUtil.select(jCas, BinaryTextRelation.class)){
+            Annotation a1 = otherRel.getArg1().getArgument();
+            Annotation a2 = otherRel.getArg2().getArgument();
+            if(a1 instanceof TimeMention || a2 instanceof TimeMention) continue; // covered
by another feature
+            if(priorNPs.contains(getKey(a1))){
+              // one of the left children is already in another relation!
+              feats.add(new Feature(prefix + "_leftSiblingInRelation", true));
+              
+              // check if the other argument in that relation is the secondary arg
+              if(secondArg.getBegin() == a2.getBegin() && secondArg.getEnd() == a2.getEnd()){
+                // the other proposed arg of this relation is already in a relation with
another element of this list!
+                feats.add(new Feature(prefix + "_leftSiblingInRelationWithCurArg"));
+              }
+            }
+            
+            if(priorNPs.contains(getKey(a2))){
+              feats.add(new Feature(prefix + "_leftSiblingInRelation", true));
+              
+              if(secondArg.getBegin() == a1.getBegin() && secondArg.getEnd() == a1.getEnd()){
+                // the other proposed arg of this relation is already in a relation with
another element of this list!
+                feats.add(new Feature(prefix + "_leftSiblingInRelationWithCurArg"));
+              }
+            }
+          }
+        }
+      }
+      
+      
+    }
+    
+    return feats;
+  }
+
+  private static String getKey(Annotation annot){
+    return annot.getBegin() + "-" + annot.getEnd();
+  }
+}

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseFeatureExtractor.java?rev=1586665&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseFeatureExtractor.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/ParseFeatureExtractor.java
Fri Apr 11 14:47:54 2014
@@ -0,0 +1,54 @@
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.classifier.Feature;
+
+public class ParseFeatureExtractor implements RelationFeaturesExtractor {
+
+  /*
+   * (non-Javadoc)
+   * @see org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor#extract(org.apache.uima.jcas.JCas,
org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation, org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation)
+   * This feature extractor finds the lowest dominating phrase category of each
+   * argument, then specifies if one dominates t'other.
+   */
+	@Override
+	public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+			IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+		List<Feature> features = new ArrayList<Feature>();
+		
+		TreebankNode tree1 = AnnotationTreeUtils.annotationNode(jCas, arg1);
+		TreebankNode tree2 = AnnotationTreeUtils.annotationNode(jCas, arg2);
+		TreebankNode phrase1 = tree1;
+		TreebankNode phrase2 = tree2;
+		
+		while(phrase1.getParent() != null){
+			phrase1 = phrase1.getParent();
+			if(phrase1.getNodeType().endsWith("P")) break;
+		}
+		while(phrase2.getParent() != null){
+			phrase2 = phrase2.getParent();
+			if(phrase2.getNodeType().endsWith("P")) break;
+		}
+		
+		
+		if(phrase1.getBegin() <= phrase2.getBegin() && phrase1.getEnd() >= phrase2.getEnd()){
+			features.add(new Feature("Arg1DominatesArg2"));
+		}else if(phrase2.getBegin() <= phrase1.getBegin() && phrase2.getEnd() >=
phrase1.getEnd()){
+			features.add(new Feature("Arg2DominatesArg1"));
+		}
+		
+//		TreebankNode lca = AnnotationTreeUtils.getCommonAncestor(tree1, tree2);
+//		features.add(new Feature("LCA", lca));
+		
+		return features;
+	}
+
+}

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLRelationFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLRelationFeaturesExtractor.java?rev=1586665&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLRelationFeaturesExtractor.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SRLRelationFeaturesExtractor.java
Fri Apr 11 14:47:54 2014
@@ -0,0 +1,84 @@
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.List;
+
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Predicate;
+import org.apache.ctakes.typesystem.type.textsem.SemanticArgument;
+import org.apache.ctakes.typesystem.type.textsem.SemanticRoleRelation;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSList;
+import org.apache.uima.jcas.cas.NonEmptyFSList;
+import org.cleartk.classifier.Feature;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+public class SRLRelationFeaturesExtractor implements RelationFeaturesExtractor {
+
+  /*
+   * (non-Javadoc)
+   * @see org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor#extract(org.apache.uima.jcas.JCas,
org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation, org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation)
+   * This feature extractor uses semantic role labeling features -- is either
+   * argument a predicate, if so what frame, and if so is the other argument a semantic arg
to that predicate.
+   * Also are features for whether individual args are just any semantic arg to any semantic
relation and what arg type.
+   */
+  public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1,
+      IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+    List<Feature> feats = Lists.newArrayList();
+    
+    if(arg1 instanceof EventMention){
+      List<Predicate> preds = JCasUtil.selectCovered(Predicate.class, arg1);
+      if(preds.size() > 0){
+        feats.add(new Feature("Arg1_Pred", true));
+        feats.add(new Feature("Arg1_Frame", preds.get(0).getFrameSet()));
+        // check if arg2 is one of its SRL args
+        FSList relList = preds.get(0).getRelations();
+        while(relList instanceof NonEmptyFSList){
+          SemanticRoleRelation rel = (SemanticRoleRelation) ((NonEmptyFSList)relList).getHead();
+          SemanticArgument arg = rel.getArgument();
+          if(arg.getBegin() == arg2.getBegin() && arg.getEnd() == arg2.getEnd()){
+            feats.add(new Feature("Arg1_Pred_Arg2_Role", true));
+            break;
+          }
+          relList = ((NonEmptyFSList)relList).getTail();
+        }
+      }
+      List<SemanticArgument> args = JCasUtil.selectCovered(SemanticArgument.class,
arg1);
+      if(args.size() > 0){
+        feats.add(new Feature("Arg1_SemArg", true));
+        feats.add(new Feature("Arg1_SemArgType", args.get(0).getLabel()));
+      }
+    }
+    
+    if(arg2 instanceof EventMention){
+      List<Predicate> preds = JCasUtil.selectCovered(Predicate.class, arg2);
+      if(preds.size() > 0){
+        feats.add(new Feature("Arg2_Pred", true));
+        feats.add(new Feature("Arg2_Frame", preds.get(0).getFrameSet()));
+        // check if arg2 is one of its SRL args
+        FSList relList = preds.get(0).getRelations();
+        while(relList instanceof NonEmptyFSList){
+          SemanticRoleRelation rel = (SemanticRoleRelation) ((NonEmptyFSList)relList).getHead();
+          SemanticArgument arg = rel.getArgument();
+          if(arg.getBegin() == arg1.getBegin() && arg.getEnd() == arg1.getEnd()){
+            feats.add(new Feature("Arg2_Pred_Arg1_Role", true));
+            break;
+          }
+          relList = ((NonEmptyFSList)relList).getTail();
+        }
+      }
+      List<SemanticArgument> args = JCasUtil.selectCovered(SemanticArgument.class,
arg2);
+      if(args.size() > 0){
+        feats.add(new Feature("Arg2_SemArg", true));
+        feats.add(new Feature("Arg2_SemArgType", args.get(0).getLabel()));
+      }
+    }
+    
+    return feats;
+  }
+
+}

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SyntacticParentFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SyntacticParentFeatureExtractor.java?rev=1586665&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SyntacticParentFeatureExtractor.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/SyntacticParentFeatureExtractor.java
Fri Apr 11 14:47:54 2014
@@ -0,0 +1,35 @@
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.util.List;
+
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.classifier.Feature;
+
+import com.google.common.collect.Lists;
+
+public class SyntacticParentFeatureExtractor implements RelationFeaturesExtractor {
+
+  /*
+   * (non-Javadoc)
+   * @see org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor#extract(org.apache.uima.jcas.JCas,
org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation, org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation)
+   * This feature extractor simply gets the parent syntactic category for each argument.
+   */
+  public List<Feature> extract(JCas jcas, IdentifiedAnnotation arg1,
+      IdentifiedAnnotation arg2) throws AnalysisEngineProcessException {
+    List<Feature> feats = Lists.newArrayList();
+    
+    
+    TreebankNode arg1node = AnnotationTreeUtils.annotationNode(jcas, arg1);
+    feats.add(new Feature("Arg1Parent", arg1node.getNodeType()));
+    TreebankNode arg2node = AnnotationTreeUtils.annotationNode(jcas, arg2);
+    feats.add(new Feature("Arg2Parent", arg2node.getNodeType()));
+        
+    return feats;
+  }
+
+}



Mime
View raw message