Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 99DD0109F7 for ; Wed, 2 Apr 2014 15:21:20 +0000 (UTC) Received: (qmail 60667 invoked by uid 500); 2 Apr 2014 15:21:15 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 60282 invoked by uid 500); 2 Apr 2014 15:21:06 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 60232 invoked by uid 99); 2 Apr 2014 15:21:01 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 02 Apr 2014 15:21:01 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 02 Apr 2014 15:20:59 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 53BDA238883D; Wed, 2 Apr 2014 15:20:39 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1584068 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: data/analysis/EventDurationDistribution.java duration/EventDurationDistribution.java Date: Wed, 02 Apr 2014 15:20:39 -0000 To: commits@ctakes.apache.org From: dligach@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140402152039.53BDA238883D@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: dligach Date: Wed Apr 2 15:20:38 2014 New Revision: 1584068 URL: http://svn.apache.org/r1584068 Log: now outputting event duration distributions to a file Removed: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/EventDurationDistribution.java Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java?rev=1584068&r1=1584067&r2=1584068&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/duration/EventDurationDistribution.java Wed Apr 2 15:20:38 2014 @@ -1,6 +1,7 @@ package org.apache.ctakes.temporal.duration; import java.io.File; +import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -18,23 +19,28 @@ import org.apache.ctakes.typesystem.type import org.apache.ctakes.typesystem.type.textsem.EventMention; import org.apache.ctakes.typesystem.type.textsem.MedicationMention; import org.apache.ctakes.typesystem.type.textsem.TimeMention; +import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.collection.CollectionReader; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; +import org.apache.uima.resource.ResourceInitializationException; import org.kohsuke.args4j.CmdLineParser; import org.kohsuke.args4j.Option; import org.uimafit.component.JCasAnnotator_ImplBase; +import org.uimafit.descriptor.ConfigurationParameter; import org.uimafit.factory.AnalysisEngineFactory; import org.uimafit.factory.CollectionReaderFactory; import org.uimafit.pipeline.SimplePipeline; import org.uimafit.util.JCasUtil; +import com.google.common.base.Charsets; import com.google.common.base.Joiner; import com.google.common.collect.HashMultiset; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Multiset; +import com.google.common.io.Files; /** * Extract durations of event mentions (e.g. sign/symptom or disease/disorder). @@ -46,12 +52,17 @@ public class EventDurationDistribution { private static Class targetClass = MedicationMention.class; public static class Options { - @Option( name = "--input-dir", usage = "specify the path to the directory containing the xmi files", required = true) public File inputDirectory; + + @Option( + name = "--output-file", + usage = "specify the path to the output file", + required = true) + public String outputFile; } public static void main(String[] args) throws Exception { @@ -65,13 +76,22 @@ public class EventDurationDistribution { CollectionReader collectionReader = getCollectionReader(trainFiles); AnalysisEngine temporalDurationExtractor = AnalysisEngineFactory.createPrimitive( - TemporalDurationExtractor.class); + TemporalDurationExtractor.class, + "OutputFile", + options.outputFile); SimplePipeline.runPipeline(collectionReader, temporalDurationExtractor); } public static class TemporalDurationExtractor extends JCasAnnotator_ImplBase { + @ConfigurationParameter( + name = "OutputFile", + mandatory = true, + description = "path to the output file that will store the distributions") + private String outputFilePath; + private File outputFile; + // regular expression to match temporal durations in time mention annotations private final static String regex = "(sec|min|hour|hrs|day|week|wk|month|year|yr|decade)"; @@ -97,6 +117,17 @@ public class EventDurationDistribution { Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE); @Override + public void initialize(UimaContext context) throws ResourceInitializationException { + super.initialize(context); + outputFile = new File(outputFilePath); + if(outputFile.exists()) { + System.out.println(outputFile + " exists... deleting..."); + outputFile.delete(); + } + } + + + @Override public void process(JCas jCas) throws AnalysisEngineProcessException { Collection ids = JCasUtil.select(jCas, DocumentID.class); @@ -137,7 +168,11 @@ public class EventDurationDistribution { } if(durationDistribution.size() > 0) { - System.out.println(Utils.formatDistribution(mentionText, durationDistribution, ", ", false)); + try { + Files.append(Utils.formatDistribution(mentionText, durationDistribution, ", ", false) + "\n", outputFile, Charsets.UTF_8); + } catch (IOException e) { + System.out.println("Could not open output file: " + outputFile); + } } }