ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1577196 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/ ae/feature/duration/ data/analysis/
Date Thu, 13 Mar 2014 15:23:38 GMT
Author: dligach
Date: Thu Mar 13 15:23:37 2014
New Revision: 1577196

URL: http://svn.apache.org/r1577196
Log:
Moved various utility methods used by duration-related classes to one place (feature.duration.Utils)

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java
  (with props)
Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventTimeRelationsInGold.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventsInGold.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationDistributionFeatureExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationExpectationFeatureExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationTimeUnitFeatureExtractor.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/PrintNormalizedTemporalExpressions.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventTimeRelationsInGold.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventTimeRelationsInGold.java?rev=1577196&r1=1577195&r2=1577196&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventTimeRelationsInGold.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventTimeRelationsInGold.java
Thu Mar 13 15:23:37 2014
@@ -4,8 +4,8 @@ import java.io.File;
 import java.io.IOException;
 import java.util.Map;
 
-import org.apache.ctakes.temporal.ae.feature.duration.DurationDistributionFeatureExtractor.Callback;
-import org.apache.ctakes.temporal.ae.feature.duration.DurationTimeUnitFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.duration.Utils;
+import org.apache.ctakes.temporal.ae.feature.duration.Utils.Callback;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
 import org.apache.ctakes.typesystem.type.relation.RelationArgument;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
@@ -68,7 +68,7 @@ public class PreserveCertainEventTimeRel
         continue;
       }    
 
-      Set<TemporalUnit> units = DurationTimeUnitFeatureExtractor.normalize(timeText);
+      Set<TemporalUnit> units = Utils.normalize(timeText);
       if(textToDistribution.containsKey(eventText) && units != null) {
         // there is duration information and we are able to get time units, so keep this
         continue;
@@ -91,7 +91,7 @@ public class PreserveCertainEventTimeRel
     // finally remove time expressions (that didn't participate in relations) that have no
data
     for(TimeMention mention : Lists.newArrayList(JCasUtil.select(goldView, TimeMention.class)))
{
       String timeText = mention.getCoveredText().toLowerCase();
-      Set<TemporalUnit> units = DurationTimeUnitFeatureExtractor.normalize(timeText);
+      Set<TemporalUnit> units = Utils.normalize(timeText);
       if(units != null) {
         // these are the kind we keep
         continue;

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventsInGold.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventsInGold.java?rev=1577196&r1=1577195&r2=1577196&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventsInGold.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/PreserveCertainEventsInGold.java
Thu Mar 13 15:23:37 2014
@@ -4,7 +4,7 @@ import java.io.File;
 import java.io.IOException;
 import java.util.Map;
 
-import org.apache.ctakes.temporal.ae.feature.duration.DurationDistributionFeatureExtractor.Callback;
+import org.apache.ctakes.temporal.ae.feature.duration.Utils;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CASException;
@@ -29,7 +29,7 @@ public class PreserveCertainEventsInGold
     File durationLookup = new File("/Users/Dima/Boston/Thyme/Duration/Output/Duration/distribution.txt");
                     
     Map<String, Map<String, Float>> textToDistribution = null;              
                                                  
     try {                                                                               
                                      
-      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Callback());
                                   
+      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Utils.Callback());
                                   
     } catch(IOException e) {                                                            
                                      
       e.printStackTrace();                                                              
                                      
       return;                                                                           
                                      

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationDistributionFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationDistributionFeatureExtractor.java?rev=1577196&r1=1577195&r2=1577196&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationDistributionFeatureExtractor.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationDistributionFeatureExtractor.java
Thu Mar 13 15:23:37 2014
@@ -21,7 +21,6 @@ package org.apache.ctakes.temporal.ae.fe
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -33,7 +32,6 @@ import org.cleartk.classifier.feature.ex
 
 import com.google.common.base.Charsets;
 import com.google.common.io.Files;
-import com.google.common.io.LineProcessor;
 
 public class DurationDistributionFeatureExtractor implements SimpleFeatureExtractor {
 
@@ -46,7 +44,7 @@ public class DurationDistributionFeature
     
     Map<String, Map<String, Float>> textToDistribution = null;
     try {
-      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Callback());
+      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Utils.Callback());
     } catch(IOException e) {
       e.printStackTrace();
       return features;
@@ -63,33 +61,4 @@ public class DurationDistributionFeature
     
     return features;
   }
-
-  public static class Callback implements LineProcessor <Map<String, Map<String,
Float>>> {
-
-    // map event text to its duration distribution
-    private Map<String, Map<String, Float>> textToDistribution;
-    
-    public Callback() {
-      textToDistribution = new HashMap<String, Map<String, Float>>();
-    }
-    
-    public boolean processLine(String line) throws IOException {
-
-      String[] elements = line.split(", "); // e.g. pain, second:0.000, minute:0.005, hour:0.099,
...
-      Map<String, Float> distribution = new HashMap<String, Float>();
-      
-      for(int durationBinNumber = 1; durationBinNumber < elements.length; durationBinNumber++)
{
-        String[] durationAndValue = elements[durationBinNumber].split(":"); // e.g. "day:0.475"
-        distribution.put(durationAndValue[0], Float.parseFloat(durationAndValue[1]));
-      }
-      
-      textToDistribution.put(elements[0], distribution);
-      return true;
-    }
-
-    public Map<String, Map<String, Float>> getResult() {
-
-      return textToDistribution;
-    }
-  }
 }
\ No newline at end of file

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java?rev=1577196&r1=1577195&r2=1577196&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventEventFeatureExtractor.java
Thu Mar 13 15:23:37 2014
@@ -25,7 +25,6 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
-import org.apache.ctakes.temporal.ae.feature.duration.DurationDistributionFeatureExtractor.Callback;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
@@ -47,7 +46,7 @@ public class DurationEventEventFeatureEx
     
     Map<String, Map<String, Float>> textToDistribution = null;
     try {
-      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Callback());
+      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Utils.Callback());
     } catch(IOException e) {
       e.printStackTrace();
       return features;

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java?rev=1577196&r1=1577195&r2=1577196&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationEventTimeFeatureExtractor.java
Thu Mar 13 15:23:37 2014
@@ -25,7 +25,6 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
-import org.apache.ctakes.temporal.ae.feature.duration.DurationDistributionFeatureExtractor.Callback;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
@@ -47,7 +46,7 @@ public class DurationEventTimeFeatureExt
     
     Map<String, Map<String, Float>> textToDistribution = null;
     try {
-      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Callback());
+      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Utils.Callback());
     } catch(IOException e) {
       e.printStackTrace();
       return features;

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationExpectationFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationExpectationFeatureExtractor.java?rev=1577196&r1=1577195&r2=1577196&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationExpectationFeatureExtractor.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationExpectationFeatureExtractor.java
Thu Mar 13 15:23:37 2014
@@ -24,7 +24,6 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.ctakes.temporal.ae.feature.duration.DurationDistributionFeatureExtractor.Callback;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.cleartk.classifier.Feature;
@@ -32,7 +31,6 @@ import org.cleartk.classifier.feature.ex
 import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
 
 import com.google.common.base.Charsets;
-import com.google.common.collect.ImmutableMap;
 import com.google.common.io.Files;
 
 public class DurationExpectationFeatureExtractor implements SimpleFeatureExtractor {
@@ -46,7 +44,7 @@ public class DurationExpectationFeatureE
     
     Map<String, Map<String, Float>> textToDistribution = null;
     try {
-      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Callback());
+      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Utils.Callback());
     } catch(IOException e) {
       e.printStackTrace();
       return features;
@@ -56,34 +54,10 @@ public class DurationExpectationFeatureE
     if(eventDistribution == null) {
       features.add(new Feature("no_duration_info"));
     } else {
-      float expectation = expectedDuration(eventDistribution);
+      float expectation = Utils.expectedDuration(eventDistribution);
       features.add(new Feature("expected_duration", expectation));
     }
     
     return features;
   }
-
-  /**
-   * Compute expected duration in seconds. Normalize by number of seconds in a year.
-   */
-  public static float expectedDuration(Map<String, Float> distribution) {
-    
-    // unit of time -> duration in seconds
-    final Map<String, Integer> converter = ImmutableMap.<String, Integer>builder()
-        .put("second", 1)
-        .put("minute", 60)
-        .put("hour", 60 * 60)
-        .put("day", 60 * 60 * 24)
-        .put("week", 60 * 60 * 24 * 7)
-        .put("month", 60 * 60 * 24 * 30)
-        .put("year", 60 * 60 * 24 * 365)
-        .build();
-
-    float expectation = 0f;
-    for(String unit : distribution.keySet()) {
-      expectation = expectation + (converter.get(unit) * distribution.get(unit));
-    }
-  
-    return expectation / converter.get("year");
-  }
 }
\ No newline at end of file

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationTimeUnitFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationTimeUnitFeatureExtractor.java?rev=1577196&r1=1577195&r2=1577196&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationTimeUnitFeatureExtractor.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/DurationTimeUnitFeatureExtractor.java
Thu Mar 13 15:23:37 2014
@@ -18,32 +18,20 @@
  */
 package org.apache.ctakes.temporal.ae.feature.duration;
 
-import info.bethard.timenorm.Period;
-import info.bethard.timenorm.PeriodSet;
-import info.bethard.timenorm.Temporal;
-import info.bethard.timenorm.TemporalExpressionParser;
-import info.bethard.timenorm.TimeSpan;
-import info.bethard.timenorm.TimeSpanSet;
-
 import java.io.File;
 import java.io.IOException;
-import java.net.URL;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
-import org.apache.ctakes.temporal.ae.feature.duration.DurationDistributionFeatureExtractor.Callback;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
 import org.cleartk.classifier.Feature;
-import org.threeten.bp.temporal.TemporalField;
 import org.threeten.bp.temporal.TemporalUnit;
 
 import scala.collection.immutable.Set;
-import scala.util.Try;
 
 import com.google.common.base.Charsets;
 import com.google.common.io.Files;
@@ -65,73 +53,25 @@ public class DurationTimeUnitFeatureExtr
     File durationLookup = new File("/Users/dima/Boston/Thyme/Duration/Output/Duration/distribution.txt");
     Map<String, Map<String, Float>> textToDistribution = null;
     try {
-      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Callback());
+      textToDistribution = Files.readLines(durationLookup, Charsets.UTF_8, new Utils.Callback());
     } catch(IOException e) {
       e.printStackTrace();
       return features;
     }
 
     Map<String, Float> eventDistribution = textToDistribution.get(eventText);
-    float eventExpectedDuration = DurationExpectationFeatureExtractor.expectedDuration(eventDistribution);
+    float eventExpectedDuration = Utils.expectedDuration(eventDistribution);
 
-    Set<TemporalUnit> units = normalize(timeText);
+    Set<TemporalUnit> units = Utils.normalize(timeText);
     scala.collection.Iterator<TemporalUnit> iterator = units.iterator();
     while(iterator.hasNext()) {
       TemporalUnit unit = iterator.next();
-      Map<String, Float> distribution = convertToDistribution(unit.getName());
-      float timeExpectedDuration = DurationExpectationFeatureExtractor.expectedDuration(distribution);
+      Map<String, Float> distribution = Utils.convertToDistribution(unit.getName());
+      float timeExpectedDuration = Utils.expectedDuration(distribution);
       features.add(new Feature("expected_duration_difference", timeExpectedDuration - eventExpectedDuration));
       continue; // ignore multiple time units (almost never happens)
     } 
 
     return features; 
   }
-  
-  public static Set<TemporalUnit> normalize(String timex) {
-
-    URL grammarURL = DurationTimeUnitFeatureExtractor.class.getResource("/info/bethard/timenorm/en.grammar");
-    TemporalExpressionParser parser = new TemporalExpressionParser(grammarURL);
-    TimeSpan anchor = TimeSpan.of(2013, 12, 16);
-    Try<Temporal> result = parser.parse(timex, anchor);
-
-    Set<TemporalUnit> units = null;
-    if (result.isSuccess()) {
-      Temporal temporal = result.get();
-
-      if (temporal instanceof Period) {
-        units = ((Period) temporal).unitAmounts().keySet();
-      } else if (temporal instanceof PeriodSet) {
-        units = ((PeriodSet) temporal).period().unitAmounts().keySet();
-      } else if (temporal instanceof TimeSpan) {
-        units = ((TimeSpan) temporal).period().unitAmounts().keySet();
-      } else if (temporal instanceof TimeSpanSet) {
-        Set<TemporalField> fields = ((TimeSpanSet) temporal).fields().keySet();
-        units = null; // fill units by calling .getBaseUnit() on each field
-      }
-    }
-    
-    return units;
-  }
-  
-  /**
-   * Take a time unit and return a probability distribution
-   * in which p(this time unit) = 1 and all others are zero.
-   */
-  public static Map<String, Float> convertToDistribution(String timeUnit) {
-    
-    String[] bins = {"second", "minute", "hour", "day", "week", "month", "year"};
-    Map<String, Float> distribution = new HashMap<String, Float>();
-    
-    for(String bin: bins) {
-      // convert things like "Hours" to "hour"
-      String normalized = timeUnit.substring(0, timeUnit.length() - 1).toLowerCase(); 
-      if(bin.equals(normalized)) {
-        distribution.put(bin, 1.0f);
-      } else {
-        distribution.put(bin, 0.0f);
-      }
-    }
-    
-    return distribution;
-  }
 }

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java?rev=1577196&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java
Thu Mar 13 15:23:37 2014
@@ -0,0 +1,132 @@
+package org.apache.ctakes.temporal.ae.feature.duration;
+
+import info.bethard.timenorm.Period;
+import info.bethard.timenorm.PeriodSet;
+import info.bethard.timenorm.Temporal;
+import info.bethard.timenorm.TemporalExpressionParser;
+import info.bethard.timenorm.TimeSpan;
+import info.bethard.timenorm.TimeSpanSet;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.threeten.bp.temporal.TemporalField;
+import org.threeten.bp.temporal.TemporalUnit;
+
+import scala.collection.immutable.Set;
+import scala.util.Try;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.io.LineProcessor;
+
+/**
+ * Various useful classes and methods for evaluating event duration data.
+ */
+public class Utils {
+
+  /**
+   * Compute expected duration in seconds. Normalize by number of seconds in a year.
+   */
+  public static float expectedDuration(Map<String, Float> distribution) {
+    
+    // unit of time -> duration in seconds
+    final Map<String, Integer> converter = ImmutableMap.<String, Integer>builder()
+        .put("second", 1)
+        .put("minute", 60)
+        .put("hour", 60 * 60)
+        .put("day", 60 * 60 * 24)
+        .put("week", 60 * 60 * 24 * 7)
+        .put("month", 60 * 60 * 24 * 30)
+        .put("year", 60 * 60 * 24 * 365)
+        .build();
+
+    float expectation = 0f;
+    for(String unit : distribution.keySet()) {
+      expectation = expectation + (converter.get(unit) * distribution.get(unit));
+    }
+  
+    return expectation / converter.get("year");
+  }
+  
+  public static Set<TemporalUnit> normalize(String timex) {
+
+    URL grammarURL = DurationTimeUnitFeatureExtractor.class.getResource("/info/bethard/timenorm/en.grammar");
+    TemporalExpressionParser parser = new TemporalExpressionParser(grammarURL);
+    TimeSpan anchor = TimeSpan.of(2013, 12, 16);
+    Try<Temporal> result = parser.parse(timex, anchor);
+
+    Set<TemporalUnit> units = null;
+    if (result.isSuccess()) {
+      Temporal temporal = result.get();
+
+      if (temporal instanceof Period) {
+        units = ((Period) temporal).unitAmounts().keySet();
+      } else if (temporal instanceof PeriodSet) {
+        units = ((PeriodSet) temporal).period().unitAmounts().keySet();
+      } else if (temporal instanceof TimeSpan) {
+        units = ((TimeSpan) temporal).period().unitAmounts().keySet();
+      } else if (temporal instanceof TimeSpanSet) {
+        Set<TemporalField> fields = ((TimeSpanSet) temporal).fields().keySet();
+        units = null; // fill units by calling .getBaseUnit() on each field
+      }
+    }
+    
+    return units;
+  }
+  
+  /**
+   * Take a time unit and return a probability distribution
+   * in which p(this time unit) = 1 and all others are zero.
+   */
+  public static Map<String, Float> convertToDistribution(String timeUnit) {
+    
+    String[] bins = {"second", "minute", "hour", "day", "week", "month", "year"};
+    Map<String, Float> distribution = new HashMap<String, Float>();
+    
+    for(String bin: bins) {
+      // convert things like "Hours" to "hour"
+      String normalized = timeUnit.substring(0, timeUnit.length() - 1).toLowerCase(); 
+      if(bin.equals(normalized)) {
+        distribution.put(bin, 1.0f);
+      } else {
+        distribution.put(bin, 0.0f);
+      }
+    }
+    
+    return distribution;
+  }
+  
+  /**
+   * Read event duration distributions from file.
+   */
+  public static class Callback implements LineProcessor <Map<String, Map<String,
Float>>> {
+
+    // map event text to its duration distribution
+    private Map<String, Map<String, Float>> textToDistribution;
+
+    public Callback() {
+      textToDistribution = new HashMap<String, Map<String, Float>>();
+    }
+
+    public boolean processLine(String line) throws IOException {
+
+      String[] elements = line.split(", "); // e.g. pain, second:0.000, minute:0.005, hour:0.099,
...
+      Map<String, Float> distribution = new HashMap<String, Float>();
+
+      for(int durationBinNumber = 1; durationBinNumber < elements.length; durationBinNumber++)
{
+        String[] durationAndValue = elements[durationBinNumber].split(":"); // e.g. "day:0.475"
+        distribution.put(durationAndValue[0], Float.parseFloat(durationAndValue[1]));
+      }
+
+      textToDistribution.put(elements[0], distribution);
+      return true;
+    }
+
+    public Map<String, Map<String, Float>> getResult() {
+
+      return textToDistribution;
+    }
+  }
+}

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/duration/Utils.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/PrintNormalizedTemporalExpressions.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/PrintNormalizedTemporalExpressions.java?rev=1577196&r1=1577195&r2=1577196&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/PrintNormalizedTemporalExpressions.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/PrintNormalizedTemporalExpressions.java
Thu Mar 13 15:23:37 2014
@@ -1,6 +1,6 @@
 package org.apache.ctakes.temporal.data.analysis;
 
-import org.apache.ctakes.temporal.ae.feature.duration.DurationTimeUnitFeatureExtractor;
+import org.apache.ctakes.temporal.ae.feature.duration.Utils;
 import org.apache.ctakes.typesystem.type.textsem.TimeMention;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CASException;
@@ -33,7 +33,7 @@ public class PrintNormalizedTemporalExpr
     
     for(TimeMention mention : Lists.newArrayList(JCasUtil.select(goldView, TimeMention.class)))
{
       String timex = mention.getCoveredText().toLowerCase();
-      Set<TemporalUnit> units = DurationTimeUnitFeatureExtractor.normalize(timex);
+      Set<TemporalUnit> units = Utils.normalize(timex);
 
       if(units == null) {
         System.out.println(timex + "|" + "n/a");



Mime
View raw message