Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 09A641004B for ; Tue, 25 Feb 2014 16:14:14 +0000 (UTC) Received: (qmail 71558 invoked by uid 500); 25 Feb 2014 16:14:13 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 71519 invoked by uid 500); 25 Feb 2014 16:14:10 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 71512 invoked by uid 99); 25 Feb 2014 16:14:08 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 25 Feb 2014 16:14:08 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 25 Feb 2014 16:14:05 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 6F8EA23888FE; Tue, 25 Feb 2014 16:13:22 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1571722 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java Date: Tue, 25 Feb 2014 16:13:22 -0000 To: commits@ctakes.apache.org From: dligach@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140225161322.6F8EA23888FE@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: dligach Date: Tue Feb 25 16:13:22 2014 New Revision: 1571722 URL: http://svn.apache.org/r1571722 Log: refactored so that it can be now used for sign/symptoms as well as disease/disorders (and other event mentions) Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java - copied, changed from r1571720, ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java Copied: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java (from r1571720, ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java) URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java?p2=ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java&p1=ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java&r1=1571720&r2=1571722&rev=1571722&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/SignSymptomDurations.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java Tue Feb 25 16:13:22 2014 @@ -14,6 +14,7 @@ import java.util.regex.Pattern; import org.apache.ctakes.relationextractor.eval.XMIReader; import org.apache.ctakes.typesystem.type.structured.DocumentID; import org.apache.ctakes.typesystem.type.syntax.BaseToken; +import org.apache.ctakes.typesystem.type.textsem.EventMention; import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention; import org.apache.ctakes.typesystem.type.textsem.TimeMention; import org.apache.uima.analysis_engine.AnalysisEngine; @@ -35,12 +36,14 @@ import com.google.common.collect.Immutab import com.google.common.collect.Multiset; /** - * Extract durations of signs/symptoms. + * Extract durations of event mentions (e.g. sign/symptom or disease/disorder). * * @author dmitriy dligach */ -public class SignSymptomDurations { +public class EventDurationDistribution { + private static Class targetClass = SignSymptomMention.class; + public static class Options { @Option( @@ -108,18 +111,18 @@ public class SignSymptomDurations { Collection ids = JCasUtil.select(jCas, DocumentID.class); String fileName = ids.iterator().next().getDocumentID(); - String signSymptomText = fileName.split("\\.")[0]; // e.g. "smoker.txt" + String mentionText = fileName.split("\\.")[0]; // e.g. "smoker.txt" // counts of different time granularities for this sign/symptom Multiset durationDistribution = HashMultiset.create(); - for(SignSymptomMention signSymptomMention : JCasUtil.select(jCas, SignSymptomMention.class)) { - if(signSymptomMention.getCoveredText().equals(signSymptomText)) { - if(isNegated(jCas, signSymptomMention) || isMedicationPattern(jCas, signSymptomMention)) { + for(EventMention mention : JCasUtil.select(jCas, targetClass)) { + if(mention.getCoveredText().equals(mentionText)) { + if(isNegated(jCas, mention) || isMedicationPattern(jCas, mention)) { continue; } - TimeMention nearestTimeMention = getNearestTimeMention(jCas, signSymptomMention); + TimeMention nearestTimeMention = getNearestTimeMention(jCas, mention); if(nearestTimeMention != null) { Matcher matcher = pattern.matcher(nearestTimeMention.getCoveredText()); @@ -134,9 +137,9 @@ public class SignSymptomDurations { } if(durationDistribution.size() > 0) { - System.out.println(formatDistribution(signSymptomText, durationDistribution, ", ", true) + "[" + durationDistribution.size() + " instances]"); + System.out.println(formatDistribution(mentionText, durationDistribution, ", ", true) + "[" + durationDistribution.size() + " instances]"); }else{ - System.out.println(signSymptomText + ": No duration information found."); + System.out.println(mentionText + ": No duration information found."); } } @@ -144,9 +147,9 @@ public class SignSymptomDurations { * Return true if sign/symptom is negated. * TODO: using rules for now; switch to using a negation module */ - private static boolean isNegated(JCas jCas, SignSymptomMention signSymptomMention) { + private static boolean isNegated(JCas jCas, EventMention mention) { - for(BaseToken token : JCasUtil.selectPreceding(jCas, BaseToken.class, signSymptomMention, 3)) { + for(BaseToken token : JCasUtil.selectPreceding(jCas, BaseToken.class, mention, 3)) { if(token.getCoveredText().equals("no")) { return true; } @@ -159,9 +162,9 @@ public class SignSymptomDurations { * Return true of this is a medication pattern. * E.g. five (5) ml po qid (4 times a day) as needed for heartburn for 2 weeks. */ - private static boolean isMedicationPattern(JCas jCas, SignSymptomMention signSymptomMention) { + private static boolean isMedicationPattern(JCas jCas, EventMention mention) { - for(BaseToken token : JCasUtil.selectPreceding(jCas, BaseToken.class, signSymptomMention, 1)) { + for(BaseToken token : JCasUtil.selectPreceding(jCas, BaseToken.class, mention, 1)) { if(token.getCoveredText().equals("for")) { return true; } @@ -174,9 +177,9 @@ public class SignSymptomDurations { * Find nearest time mention that is within allowable distance. * Return null if none found. */ - private static TimeMention getNearestTimeMention(JCas jCas, SignSymptomMention signSymptomMention) { + private static TimeMention getNearestTimeMention(JCas jCas, EventMention mention) { - List timeMentions = JCasUtil.selectFollowing(jCas, TimeMention.class, signSymptomMention, 1); + List timeMentions = JCasUtil.selectFollowing(jCas, TimeMention.class, mention, 1); if(timeMentions.size() < 1) { return null; } @@ -184,7 +187,7 @@ public class SignSymptomDurations { assert timeMentions.size() == 1; TimeMention nearestTimeMention = timeMentions.get(0); - int distance = JCasUtil.selectBetween(jCas, BaseToken.class, signSymptomMention, nearestTimeMention).size(); + int distance = JCasUtil.selectBetween(jCas, BaseToken.class, mention, nearestTimeMention).size(); if(distance > MAXDISTANCE) { return null; } @@ -222,13 +225,13 @@ public class SignSymptomDurations { * Example: apnea, second:5, minute:1, hour:5, day:10, week:1, month:0, year:0 */ private static String formatDistribution( - String signSymptomText, + String mentionText, Multiset durationDistribution, String separator, boolean normalize) { List distribution = new LinkedList(); - distribution.add(signSymptomText); + distribution.add(mentionText); double total = 0; if(normalize) {