ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1584759 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/feature/duration/ duration/
Date Fri, 04 Apr 2014 15:45:35 GMT
Author: dligach
Date: Fri Apr  4 15:45:34 2014
New Revision: 1584759

URL: http://svn.apache.org/r1584759
Log:
Updates due to changes in time normalization logic

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventTimeRelationsInGold.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java?rev=1584759&r1=1584758&r2=1584759&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
Fri Apr  4 15:45:34 2014
@@ -42,8 +42,8 @@ public class DurationEventEventFeatureEx
 
     List<Feature> features = new ArrayList<Feature>();
 
-    String arg1text = Utils.getText(jCas, arg1);
-    String arg2text = Utils.getText(jCas, arg2);
+    String arg1text = Utils.normalizeEventText(jCas, arg1);
+    String arg2text = Utils.normalizeEventText(jCas, arg2);
     
     Float expectedDuration1;
     Float expectedDuration2;

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java?rev=1584759&r1=1584758&r2=1584759&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java
Fri Apr  4 15:45:34 2014
@@ -21,6 +21,7 @@ package org.apache.ctakes.temporal.ae.fe
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 
@@ -30,9 +31,6 @@ import org.apache.ctakes.typesystem.type
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
 import org.cleartk.classifier.Feature;
-import org.threeten.bp.temporal.TemporalUnit;
-
-import scala.collection.immutable.Set;
 
 import com.google.common.base.Charsets;
 import com.google.common.io.Files;
@@ -48,7 +46,7 @@ public class DurationEventTimeFeatureExt
     
     List<Feature> features = new ArrayList<Feature>();
     
-    String eventText = arg1.getCoveredText().toLowerCase(); // arg1 is an event
+    String eventText = Utils.normalizeEventText(jCas, arg1); // arg1 is an event
     String timeText = arg2.getCoveredText().toLowerCase();  // arg2 is a time mention
 
     File durationLookup = new File(Utils.durationDistributionPath);
@@ -63,15 +61,13 @@ public class DurationEventTimeFeatureExt
     Map<String, Float> eventDistribution = textToDistribution.get(eventText);
     float eventExpectedDuration = Utils.expectedDuration(eventDistribution);
 
-    Set<TemporalUnit> units = Utils.runTimexParser(timeText);
-    scala.collection.Iterator<TemporalUnit> iterator = units.iterator();
-    while(iterator.hasNext()) {
-      TemporalUnit unit = iterator.next();
-      Map<String, Float> distribution = Utils.convertToDistribution(unit.getName());
-      float timeExpectedDuration = Utils.expectedDuration(distribution);
-      features.add(new Feature("expected_duration_difference", timeExpectedDuration - eventExpectedDuration));
-      continue; // ignore multiple time units (almost never happens)
-    } 
+    HashSet<String> timeUnits = Utils.getTimeUnits(timeText);
+    for(String timeUnit : timeUnits) {
+      Map<String, Float> timeDistribution = Utils.convertToDistribution(timeUnit);
+      float timeExpectedDuration = Utils.expectedDuration(timeDistribution);
+      features.add(new Feature("duration_difference", timeExpectedDuration - eventExpectedDuration));
+      break; // for now only use firs time unit
+    }
 
     return features; 
   }

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java?rev=1584759&r1=1584758&r2=1584759&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventEventRelationsInGold.java
Fri Apr  4 15:45:34 2014
@@ -50,8 +50,8 @@ public class PreserveCertainEventEventRe
       String event1Text;
       String event2Text;
       if(arg1.getArgument() instanceof EventMention && arg2.getArgument() instanceof
EventMention) {
-        event1Text = Utils.getText(jCas, arg1.getArgument());
-        event2Text = Utils.getText(jCas, arg2.getArgument());
+        event1Text = Utils.normalizeEventText(jCas, arg1.getArgument());
+        event2Text = Utils.normalizeEventText(jCas, arg2.getArgument());
       } else {
         // this is not an event-event relation
         continue;
@@ -69,7 +69,7 @@ public class PreserveCertainEventEventRe
 
     // remove events (that didn't participate in relations) that have no data
     for(EventMention mention : Lists.newArrayList(JCasUtil.select(goldView, EventMention.class)))
{
-      String mentionText = Utils.getText(jCas, mention);
+      String mentionText = Utils.normalizeEventText(jCas, mention);
       if(textToDistribution.containsKey(mentionText)) {
         // these are the kind we keep
         continue;

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventTimeRelationsInGold.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventTimeRelationsInGold.java?rev=1584759&r1=1584758&r2=1584759&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventTimeRelationsInGold.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/PreserveCertainEventTimeRelationsInGold.java
Fri Apr  4 15:45:34 2014
@@ -2,9 +2,9 @@ package org.apache.ctakes.temporal.durat
 
 import java.io.File;
 import java.io.IOException;
+import java.util.HashSet;
 import java.util.Map;
 
-import org.apache.ctakes.temporal.duration.Utils.Callback;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
 import org.apache.ctakes.typesystem.type.relation.RelationArgument;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
@@ -12,12 +12,9 @@ import org.apache.ctakes.typesystem.type
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.jcas.JCas;
-import org.threeten.bp.temporal.TemporalUnit;
 import org.uimafit.component.JCasAnnotator_ImplBase;
 import org.uimafit.util.JCasUtil;
 
-import scala.collection.immutable.Set;
-
 import com.google.common.base.Charsets;
 import com.google.common.collect.Lists;
 import com.google.common.io.Files;
@@ -36,7 +33,7 @@ public class PreserveCertainEventTimeRel
     File durationLookup = new File(Utils.durationDistributionPath);                     

     Map<String, Map<String, Float>> textToDistribution = null;              
                                                  
     try {                                                                               
                                      
-      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Callback());
                                   
+      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Utils.Callback());
                                   
     } catch(IOException e) {                                                            
                                      
       e.printStackTrace();                                                              
                                      
       return;                                                                           
                                      
@@ -58,21 +55,21 @@ public class PreserveCertainEventTimeRel
       String timeText;
       if(arg1.getArgument() instanceof TimeMention && arg2.getArgument() instanceof
EventMention) {
         timeText = arg1.getArgument().getCoveredText().toLowerCase(); 
-        eventText = arg2.getArgument().getCoveredText().toLowerCase();  
+        eventText = Utils.normalizeEventText(jCas, arg2.getArgument());
       } else if(arg1.getArgument() instanceof EventMention && arg2.getArgument()
instanceof TimeMention) {
-        eventText = arg1.getArgument().getCoveredText().toLowerCase(); 
+        eventText = Utils.normalizeEventText(jCas, arg1.getArgument());
         timeText = arg2.getArgument().getCoveredText().toLowerCase();  
       } else {
         // this is not a event-time relation
         continue;
       }    
 
-      Set<TemporalUnit> units = Utils.runTimexParser(timeText);
-      if(textToDistribution.containsKey(eventText) && units != null) {
+      HashSet<String> timeUnits = Utils.getTimeUnits(timeText);
+      if(textToDistribution.containsKey(eventText) && timeUnits.size() > 0) {
         // there is duration information and we are able to get time units, so keep this
         continue;
       }
-
+      
       arg1.removeFromIndexes();                                                         
                                  
       arg2.removeFromIndexes();                                                         
                                  
       relation.removeFromIndexes();
@@ -80,7 +77,8 @@ public class PreserveCertainEventTimeRel
     
     // remove events (that didn't participate in relations) that have no data
     for(EventMention mention : Lists.newArrayList(JCasUtil.select(goldView, EventMention.class)))
{
-      if(textToDistribution.containsKey(mention.getCoveredText().toLowerCase())) {
+      String eventText = Utils.normalizeEventText(jCas, mention);
+      if(textToDistribution.containsKey(eventText)) {
         // these are the kind we keep
         continue;
       } 
@@ -89,10 +87,8 @@ public class PreserveCertainEventTimeRel
     
     // finally remove time expressions (that didn't participate in relations) that have no
data
     for(TimeMention mention : Lists.newArrayList(JCasUtil.select(goldView, TimeMention.class)))
{
-      String timeText = mention.getCoveredText().toLowerCase();
-      Set<TemporalUnit> units = Utils.runTimexParser(timeText);
-      if(units != null) {
-        // these are the kind we keep
+      HashSet<String> timeUnits = Utils.getTimeUnits(mention.getCoveredText().toLowerCase());
+      if(timeUnits.size() > 0) {
         continue;
       }
       mention.removeFromIndexes();

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java?rev=1584759&r1=1584758&r2=1584759&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/Utils.java
Fri Apr  4 15:45:34 2014
@@ -52,9 +52,9 @@ public class Utils {
   public static final String[] bins = {"second", "minute", "hour", "day", "week", "month",
"year", "decade"};
   
   /**
-   * Extract time unit(s) from a temporal expression.
-   * Extracted time units should be a subset of the bins above.
-   * Return empty set if time units couldnot be extracted.
+   * Extract time unit(s) from a temporal expression 
+   * and put in one of the eight bins above.
+   * Return empty set if time units could not be extracted.
    * E.g. July 5, 1984 -> day
    */
   public static HashSet<String> getTimeUnits(String timex) {
@@ -258,12 +258,10 @@ public class Utils {
   }
   
   /**
-   * Check if the annotation is a UMLS concept. If it is, return as is.
-   * Otherwise, lemmatize this annotation if this is a verb. 
-   * Return as is if not verb.
+   * Keep UMLS concepts and non-verbs intact. Lemmatize verbs.
    * Lowercase before returning.
    */
-  public static String getText(JCas jCas, Annotation annotation) 
+  public static String normalizeEventText(JCas jCas, Annotation annotation) 
       throws AnalysisEngineProcessException {
 
     JCas systemView;



Mime
View raw message