ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1581335 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java
Date Tue, 25 Mar 2014 14:14:12 GMT
Author: dligach
Date: Tue Mar 25 14:14:12 2014
New Revision: 1581335

URL: http://svn.apache.org/r1581335
Log:
Added support for a new bin ("decade")

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java?rev=1581335&r1=1581334&r2=1581335&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java
Tue Mar 25 14:14:12 2014
@@ -12,6 +12,8 @@ import java.net.URL;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
 import java.util.Map;
 
 import org.threeten.bp.temporal.TemporalField;
@@ -20,7 +22,9 @@ import org.threeten.bp.temporal.Temporal
 import scala.collection.immutable.Set;
 import scala.util.Try;
 
+import com.google.common.base.Joiner;
 import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Multiset;
 import com.google.common.io.LineProcessor;
 
 /**
@@ -31,49 +35,54 @@ public class Utils {
   // events and their duration distributions
   public static final String durationDistributionPath = "/Users/dima/Boston/Thyme/Duration/Data/Combined/Distribution/mimic.txt";
   
-  // time units over which we define a duration distribution
-  public static final String[] bins = {"second", "minute", "hour", "day", "week", "month",
"year"};
+  // eight bins over which we define a duration distribution
+  public static final String[] bins = {"second", "minute", "hour", "day", "week", "month",
"year", "decade"};
   
   /**
-   * Take the time unit from Steven Bethard's noramlizer
-   * and output a coarser time unit: {"second", "minute", "hour", "day", "week", "month",
"year"}.
+   * Take the time unit from Bethard's noramlizer
+   * and output a coarser time unit, i.e. one of the eight bins
    */
   public static String makeCoarse(String timeUnit) {
     
     HashSet<String> allowableTimeUnits = new HashSet<String>(Arrays.asList(bins));
     
-    // map output of Steven Behard's normalizer to coarser time units
+    // map output of Behard's normalizer to coarser time units
     Map<String, String> mapping = ImmutableMap.<String, String>builder()
         .put("afternoon", "hour")
-        .put("decade", "year")
         .put("evening", "hour")
         .put("fall", "month")
         .put("winter", "month")
         .put("morning", "hour")
         .put("night", "hour")
-        .put("quarteryear", "year")
+        .put("quarteryear", "month")
         .put("spring", "month")
         .put("summer", "month")
         .build(); 
-    
+
+    // e.g. Years -> year
     String singularAndLowercased = timeUnit.substring(0, timeUnit.length() - 1).toLowerCase();
+
+    // is this one of the bins?
     if(allowableTimeUnits.contains(singularAndLowercased)) {
       return singularAndLowercased;
     } 
+    
+    // it's not one of the bins; can we map to to a bin?
     if(mapping.get(singularAndLowercased) != null) {
       return mapping.get(singularAndLowercased);
     }
-    
+
+    // we couldn't map it to a bin
     return null;
   }
   
   /**
-   * Compute expected duration in seconds. Normalize by number of seconds in a year.
+   * Compute expected duration in seconds. Normalize by number of seconds in a decade.
    */
   public static float expectedDuration(Map<String, Float> distribution) {
     
     // unit of time -> duration in seconds
-    final Map<String, Integer> converter = ImmutableMap.<String, Integer>builder()
+    final Map<String, Integer> timeUnitInSeconds = ImmutableMap.<String, Integer>builder()
         .put("second", 1)
         .put("minute", 60)
         .put("hour", 60 * 60)
@@ -81,16 +90,20 @@ public class Utils {
         .put("week", 60 * 60 * 24 * 7)
         .put("month", 60 * 60 * 24 * 30)
         .put("year", 60 * 60 * 24 * 365)
+        .put("decade", 60 * 60 * 24 * 365 * 10)
         .build();
 
     float expectation = 0f;
     for(String unit : distribution.keySet()) {
-      expectation = expectation + (converter.get(unit) * distribution.get(unit));
+      expectation = expectation + (timeUnitInSeconds.get(unit) * distribution.get(unit));
     }
   
-    return expectation / converter.get("year");
+    return expectation / timeUnitInSeconds.get("decade");
   }
-  
+
+  /*
+   * Use Bethard normalizer to map a temporal expression to a time unit.
+   */
   public static Set<TemporalUnit> normalize(String timex) {
 
     URL grammarURL = DurationTimeUnitFeatureExtractor.class.getResource("/info/bethard/timenorm/en.grammar");
@@ -139,6 +152,41 @@ public class Utils {
   }
   
   /**
+   * Convert duration distribution multiset to a format that's easy to parse automatically.
+   * Format: <sign/symptom>, <time bin>:<count>, ...
+   * Example: apnea, second:5, minute:1, hour:5, day:10, week:1, month:0, year:0
+   */
+  public static String formatDistribution(
+      String mentionText, 
+      Multiset<String> durationDistribution, 
+      String separator,
+      boolean normalize) {
+    
+    List<String> distribution = new LinkedList<String>();
+    distribution.add(mentionText);
+
+    double total = 0;
+    if(normalize) {
+      for(String bin : bins) {
+        total += durationDistribution.count(bin);
+      }
+    }
+    
+    for(String bin : bins) {
+      if(normalize) {
+        distribution.add(String.format("%s:%.3f", bin, durationDistribution.count(bin) /
total));  
+      } else {
+        distribution.add(String.format("%s:%d", bin, durationDistribution.count(bin)));
+      }
+      
+    }
+    
+    Joiner joiner = Joiner.on(separator);
+    return joiner.join(distribution);
+  }
+
+  
+  /**
    * Read event duration distributions from file.
    */
   public static class Callback implements LineProcessor <Map<String, Map<String,
Float>>> {



Mime
View raw message