ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1584351 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: data/analysis/SignSymptomDurations.java duration/PreserveCertainEventEventRelationsInGold.java duration/Utils.java
Date Thu, 03 Apr 2014 19:44:54 GMT
Author: dligach
Date: Thu Apr  3 19:44:53 2014
New Revision: 1584351

URL: http://svn.apache.org/r1584351
Log:
added code needed to filter out d/d, s/s, drugs, procedures, general events that have no duration
data

Removed:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java
Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java?rev=1584351&r1=1584350&r2=1584351&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
Thu Apr  3 19:44:53 2014
@@ -4,13 +4,13 @@ import java.io.File;
 import java.io.IOException;
 import java.util.Map;
 
-import org.apache.ctakes.temporal.duration.Utils.Callback;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
 import org.apache.ctakes.typesystem.type.relation.RelationArgument;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 import org.uimafit.component.JCasAnnotator_ImplBase;
 import org.uimafit.util.JCasUtil;
 
@@ -22,7 +22,7 @@ import com.google.common.io.Files;
  * Preserve only those event-event relations whose both event arguments have duration data.
  */
 public class PreserveCertainEventEventRelationsInGold extends JCasAnnotator_ImplBase {  
                                            
-  
+
   public static final String GOLD_VIEW_NAME = "GoldView";
 
   @Override                                                                             
                                    
@@ -30,34 +30,34 @@ public class PreserveCertainEventEventRe
 
     Map<String, Map<String, Float>> textToDistribution = null;              
                                                  
     try {                                                                               
                                      
-      textToDistribution = Files.readLines(new File(Utils.durationDistributionPath), Charsets.UTF_8,
new Callback());                                    
+      textToDistribution = Files.readLines(new File(Utils.durationDistributionPath), Charsets.UTF_8,
new Utils.Callback());                                    
     } catch(IOException e) {                                                            
                                      
       e.printStackTrace();                                                              
                                      
       return;                                                                           
                                      
     }  
-    
+
     JCas goldView;                                                                      
                                    
     try {                                                                               
                                    
       goldView = jCas.getView(GOLD_VIEW_NAME);                                          
                                    
     } catch (CASException e) {                                                          
                                    
       throw new AnalysisEngineProcessException(e);                                      
                                    
     }                                                                                   
                                                                                         
                                                           
-    
+
     // remove relations where one or both arguments have no duration data
     for(BinaryTextRelation relation : Lists.newArrayList(JCasUtil.select(goldView, BinaryTextRelation.class)))
{            
       RelationArgument arg1 = relation.getArg1();                                       
                                     
       RelationArgument arg2 = relation.getArg2(); 
 
-      String event2Text;
       String event1Text;
+      String event2Text;
       if(arg1.getArgument() instanceof EventMention && arg2.getArgument() instanceof
EventMention) {
-        event1Text = arg1.getArgument().getCoveredText().toLowerCase();
-        event2Text = arg2.getArgument().getCoveredText().toLowerCase();
+        event1Text = getText(jCas, arg1.getArgument());
+        event2Text = getText(jCas, arg2.getArgument());
       } else {
         // this is not an event-event relation
         continue;
       }
-      
+
       if(textToDistribution.containsKey(event1Text) && textToDistribution.containsKey(event2Text))
{
         // we have duration distributions for both arguments, so keep it
         continue;
@@ -67,15 +67,53 @@ public class PreserveCertainEventEventRe
       arg2.removeFromIndexes();                                                         
                                  
       relation.removeFromIndexes();
     }
-    
+
     // remove events (that didn't participate in relations) that have no data
     for(EventMention mention : Lists.newArrayList(JCasUtil.select(goldView, EventMention.class)))
{
-      if(textToDistribution.containsKey(mention.getCoveredText().toLowerCase())) {
+      String mentionText = getText(jCas, mention);
+      if(textToDistribution.containsKey(mentionText)) {
         // these are the kind we keep
         continue;
       } 
-      
+
       mention.removeFromIndexes();
     }
-  }                                                                                     
                                    
+  }
+
+  /**
+   * Lemmatize this annotation if this is a verb. 
+   * Otherwise return as is. Lowercase before returning.
+   * 
+   * TODO: check if there's a covering UMLS concept before lemmatizing
+   */
+  public static String getText(JCas jCas, Annotation annotation) 
+      throws AnalysisEngineProcessException {
+
+    JCas systemView;
+    try {
+      systemView = jCas.getView("_InitialView");
+    } catch (CASException e) {
+      throw new AnalysisEngineProcessException(e);
+    }
+
+    String pos = Utils.getPosTag(systemView, annotation);
+    if(pos == null) {
+      return annotation.getCoveredText().toLowerCase();
+    }
+
+    String text;
+    if(pos.startsWith("V")) {
+      try {
+        text = Utils.lemmatize(annotation.getCoveredText(), pos);
+      } catch (IOException e) {
+        System.out.println("couldn't lemmatize: " + annotation.getCoveredText());
+        e.printStackTrace();
+        return annotation.getCoveredText().toLowerCase();
+      }
+    } else {
+      text = annotation.getCoveredText();
+    }
+
+    return text.toLowerCase();
+  }
 }                                                                                       
                                    
\ No newline at end of file

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java?rev=1584351&r1=1584350&r2=1584351&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
Thu Apr  3 19:44:53 2014
@@ -19,8 +19,12 @@ import java.util.Map;
 
 import org.apache.ctakes.core.resource.FileLocator;
 import org.apache.ctakes.temporal.ae.feature.duration.DurationEventTimeFeatureExtractor;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 import org.threeten.bp.temporal.TemporalField;
 import org.threeten.bp.temporal.TemporalUnit;
+import org.uimafit.util.JCasUtil;
 
 import scala.collection.immutable.Set;
 import scala.util.Try;
@@ -39,7 +43,7 @@ import com.googlecode.clearnlp.reader.Ab
 public class Utils {
 
   // events and their duration distributions
-  public static final String durationDistributionPath = "/Users/dima/Boston/Thyme/Duration/Data/Combined/Distribution/mimic.txt";
+  public static final String durationDistributionPath = "/Users/dima/Boston/Thyme/Duration/Data/Combined/Distribution/all.txt";
   
   // eight bins over which we define a duration distribution
   public static final String[] bins = {"second", "minute", "hour", "day", "week", "month",
"year", "decade"};
@@ -230,6 +234,24 @@ public class Utils {
   }
   
   /**
+   * Return system generated POS tag or null if none available.
+   */
+  public static String getPosTag(JCas systemView, Annotation annotation) {
+    
+    List<BaseToken> coveringBaseTokens = JCasUtil.selectCovered(
+        systemView,
+        BaseToken.class,
+        annotation.getBegin(),
+        annotation.getEnd());
+    
+    if(coveringBaseTokens.size() < 1) {
+      return null;
+    }
+    
+    return coveringBaseTokens.get(0).getPartOfSpeech();
+  }
+  
+  /**
    * Read event duration distributions from file.
    */
   public static class Callback implements LineProcessor <Map<String, Map<String,
Float>>> {



Mime
View raw message