ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1575422 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/UmlsFeatureExtractor.java
Date Fri, 07 Mar 2014 21:50:03 GMT
Author: tmill
Date: Fri Mar  7 21:50:02 2014
New Revision: 1575422

URL: http://svn.apache.org/r1575422
Log:
Fix for feature extractor using umls features to look at events and entities.

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/UmlsFeatureExtractor.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/UmlsFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/UmlsFeatureExtractor.java?rev=1575422&r1=1575421&r2=1575422&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/UmlsFeatureExtractor.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/UmlsFeatureExtractor.java
Fri Mar  7 21:50:02 2014
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.constants.CONST;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -32,6 +33,8 @@ import org.apache.uima.jcas.JCas;
 import org.cleartk.classifier.Feature;
 import org.uimafit.util.JCasUtil;
 
+import com.google.common.collect.Lists;
+
 public class UmlsFeatureExtractor implements RelationFeaturesExtractor {
 
   @Override
@@ -40,22 +43,26 @@ public class UmlsFeatureExtractor implem
 
     List<Feature> features = new ArrayList<Feature>();
     
-    JCas systemView;
-    try {
-      systemView = jCas.getView("_InitialView");
-    } catch (CASException e) {
-      throw new AnalysisEngineProcessException(e);
-    }
+    JCas systemView = jCas;
+//    try {
+//      systemView = jCas.getView("_InitialView");
+//    } catch (CASException e) {
+//      throw new AnalysisEngineProcessException(e);
+//    }
+    List<String> arg1Types = Lists.newArrayList();
+    List<String> arg2Types = Lists.newArrayList();
     
     if(arg1 instanceof EventMention) {
 //      List<EntityMention> entityMentions = JCasUtil.selectCovering(systemView, EntityMention.class,
arg1.getBegin(), arg1.getEnd());
 
       CounterMap<String> typeCounts = 
-          getMentionTypes(JCasUtil.selectCovering(systemView, EntityMention.class, arg1.getBegin(),
arg1.getEnd()));
+          getMentionTypes(JCasUtil.selectCovering(systemView, EventMention.class, arg1.getBegin(),
arg1.getEnd()));
       
       // print out totals:
       for(String typeId : typeCounts.keySet()){
-        features.add(new Feature("arg1EntityTypeID_"+typeId, typeCounts.get(typeId)));  
     
+        String featName = "arg1EntityTypeID_"+typeId;
+        arg1Types.add(featName);
+        features.add(new Feature(featName, typeCounts.get(typeId)));        
       }
       
       // TO print out just the types without counts:
@@ -72,20 +79,31 @@ public class UmlsFeatureExtractor implem
 
     if(arg2 instanceof EventMention){
       CounterMap<String> typeCounts = 
-          getMentionTypes(JCasUtil.selectCovering(systemView, EntityMention.class, arg2.getBegin(),
arg2.getEnd()));
+          getMentionTypes(JCasUtil.selectCovering(systemView, EventMention.class, arg2.getBegin(),
arg2.getEnd()));
       
       // print out totals:
       for(String typeId : typeCounts.keySet()){
-        features.add(new Feature("arg2EntityTypeID_"+typeId, typeCounts.get(typeId)));  
     
+        String featName = "arg2EntityTypeID_"+typeId;
+        arg2Types.add(featName);
+        features.add(new Feature(featName, typeCounts.get(typeId)));        
       }      
     }
+    
+    if(arg1Types.size() == 0) arg1Types.add("arg1NotUMLS");
+    if(arg2Types.size() == 0) arg2Types.add("arg2NotUMLS");
+    for(String arg1Type : arg1Types){
+      for(String arg2Type : arg2Types){
+        features.add(new Feature("ArgPair-" + arg1Type + "_" + arg2Type));
+      }
+    }
     return features;
   }
   
-  private static CounterMap<String> getMentionTypes(List<EntityMention> entities){
+  private static CounterMap<String> getMentionTypes(List<EventMention> entities){
     CounterMap<String> typeCounts = new CounterMap<String>();
-    for(EntityMention entityMention : entities) {
-      typeCounts.add(String.valueOf(entityMention.getTypeID()));
+    for(EventMention entityMention : entities) {
+      if(entityMention.getDiscoveryTechnique() == CONST.NE_DISCOVERY_TECH_DICT_LOOKUP)
+        typeCounts.add(String.valueOf(entityMention.getTypeID()));
     }
     return typeCounts;
     



Mime
View raw message