ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1524207 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java
Date Tue, 17 Sep 2013 20:17:42 GMT
Author: dligach
Date: Tue Sep 17 20:17:41 2013
New Revision: 1524207

URL: http://svn.apache.org/r1524207
Log:
now capturing abbreviations like hr, min, etc.

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java?rev=1524207&r1=1524206&r2=1524207&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java
Tue Sep 17 20:17:41 2013
@@ -33,6 +33,7 @@ import com.google.common.base.Function;
 import com.google.common.base.Functions;
 import com.google.common.base.Joiner;
 import com.google.common.collect.HashMultiset;
+import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Multiset;
 import com.google.common.collect.Ordering;
 
@@ -40,12 +41,28 @@ import com.google.common.collect.Orderin
  * Extract durations of signs/symptoms.
  * 
  * TODO: check drinking.txt; fewer day durations are captured than exist in data.
- * TODO: need to take care of abbreviations (e.g. wk, yr, etc.)
  * 
  * @author dmitriy dligach
  */
 public class SignSymptomDurations {
 
+  // regular expression to match temporal durations
+  public final static String REGEX = "(sec|min|hour|hr|day|week|wk|mo|year|yr)";
+  
+  // mapping between temporal durations and their normal forms
+  public final static Map<String, String> MAPPING = ImmutableMap.<String, String>builder()
+      .put("sec", "second")
+      .put("min", "minute")
+      .put("hour", "hour")
+      .put("hr", "hour")
+      .put("day", "day")
+      .put("week", "week")
+      .put("wk", "week")
+      .put("mo", "month")
+      .put("year", "year")
+      .put("yr", "year")
+      .build(); 
+
   public static class Options extends Options_ImplBase {
 
     @Option(
@@ -72,10 +89,10 @@ public class SignSymptomDurations {
   public static class DurationPrinter extends JCasAnnotator_ImplBase {
 
     // max distance between a time and an evenet
-    final int maxDistance = 2;
+    final int MAXDISTANCE = 2;
 
     // regex to match different time granularities
-    Pattern pattern = Pattern.compile("(second|minute|hour|day|week|month|year)", Pattern.CASE_INSENSITIVE);
+    Pattern pattern = Pattern.compile(REGEX, Pattern.CASE_INSENSITIVE);
     
     @Override
     public void process(JCas jCas) throws AnalysisEngineProcessException {
@@ -96,15 +113,17 @@ public class SignSymptomDurations {
             Matcher matcher = pattern.matcher(nearestTimeMention.getCoveredText());
 
             while(matcher.find()) {
-              durationDistribution.add(matcher.group());
+              String matchedDuration = matcher.group(); // e.g. "wks"
+              String normalizedDuration = MAPPING.get(matchedDuration);
+              durationDistribution.add(normalizedDuration);
             }
           }
         }
       }
 
       if(durationDistribution.size() > 0) { 
-        System.out.println(signSymptomText + "," + convertToString(durationDistribution));
-        // System.out.println(signSymptomText + ": " + durationDistribution);
+        // System.out.println(signSymptomText + "," + convertToString(durationDistribution));
+        System.out.println(signSymptomText + ": " + durationDistribution);
       }
     }
     



Mime
View raw message