ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1481794 - /ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/TokenFeaturesExtractor.java
Date Mon, 13 May 2013 11:33:24 GMT
Author: tmill
Date: Mon May 13 11:33:23 2013
New Revision: 1481794

URL: http://svn.apache.org/r1481794
Log:
Expand noun event mentions to NPs for token feature extraction.

Modified:
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/TokenFeaturesExtractor.java

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/TokenFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/TokenFeaturesExtractor.java?rev=1481794&r1=1481793&r2=1481794&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/TokenFeaturesExtractor.java
(original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/features/TokenFeaturesExtractor.java
Mon May 13 11:33:23 2013
@@ -21,8 +21,15 @@ package org.apache.ctakes.relationextrac
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 import org.cleartk.classifier.Feature;
 import org.cleartk.classifier.feature.extractor.CleartkExtractor;
 import org.cleartk.classifier.feature.extractor.CleartkExtractor.Bag;
@@ -32,12 +39,10 @@ import org.cleartk.classifier.feature.ex
 import org.cleartk.classifier.feature.extractor.CleartkExtractor.LastCovered;
 import org.cleartk.classifier.feature.extractor.CleartkExtractor.Preceding;
 import org.cleartk.classifier.feature.extractor.annotationpair.DistanceExtractor;
+import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
 import org.cleartk.classifier.feature.extractor.simple.NamingExtractor;
 import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
-import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
-
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.uimafit.util.JCasUtil;
 
 public class TokenFeaturesExtractor implements RelationFeaturesExtractor {
 
@@ -88,9 +93,22 @@ public class TokenFeaturesExtractor impl
   private DistanceExtractor nTokensBetween = new DistanceExtractor(null, BaseToken.class);
 
   @Override
-  public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1, IdentifiedAnnotation
arg2)
+  public List<Feature> extract(JCas jCas, IdentifiedAnnotation mention1, IdentifiedAnnotation
mention2)
       throws AnalysisEngineProcessException {
     List<Feature> features = new ArrayList<Feature>();
+    Annotation arg1 = mention1;
+    Annotation arg2 = mention2;
+    
+    if(arg1 instanceof EventMention){
+      arg1 = getExpandedEvent(jCas, mention1);
+      if(arg1 == null) arg1 = mention1;
+    }
+    
+    if(arg2 instanceof EventMention){
+      arg2 = getExpandedEvent(jCas, mention2);
+      if(arg2 == null) arg2 = mention2;
+    }
+    
     features.addAll(this.mention1FeaturesExtractor.extract(jCas, arg1));
     features.addAll(this.mention2FeaturesExtractor.extract(jCas, arg2));
     features.addAll(this.tokensBetween.extractBetween(jCas, arg1, arg2));
@@ -98,4 +116,31 @@ public class TokenFeaturesExtractor impl
     return features;
   }
 
+  private static TreebankNode getExpandedEvent(JCas jCas, IdentifiedAnnotation mention){
+    // since events are single words, we are at a terminal node:
+    List<TerminalTreebankNode> terms = JCasUtil.selectCovered(TerminalTreebankNode.class,
mention);
+    if(terms == null || terms.size() == 0){
+      return null;
+    }
+    
+    TreebankNode coveringNode = AnnotationTreeUtils.annotationNode(jCas, mention);
+    if(coveringNode == null) return terms.get(0);
+    
+    String pos =terms.get(0).getNodeType(); 
+    // do not expand Verbs
+    if(pos.startsWith("V")) return coveringNode;
+    
+    if(pos.startsWith("N")){
+      // get first NP node:
+      while(coveringNode != null && !coveringNode.getNodeType().equals("NP")){
+        coveringNode = coveringNode.getParent();
+      }
+    }else if(pos.startsWith("J")){
+      while(coveringNode != null && !coveringNode.getNodeType().equals("ADJP")){
+        coveringNode = coveringNode.getParent();
+      }
+    }
+    if(coveringNode == null) coveringNode = terms.get(0);
+    return coveringNode;    
+  }
 }



Mime
View raw message