Return-Path: X-Original-To: apmail-incubator-ctakes-commits-archive@minotaur.apache.org Delivered-To: apmail-incubator-ctakes-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 23199D177 for ; Tue, 29 Jan 2013 19:23:13 +0000 (UTC) Received: (qmail 24230 invoked by uid 500); 29 Jan 2013 19:23:13 -0000 Delivered-To: apmail-incubator-ctakes-commits-archive@incubator.apache.org Received: (qmail 24199 invoked by uid 500); 29 Jan 2013 19:23:13 -0000 Mailing-List: contact ctakes-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: ctakes-dev@incubator.apache.org Delivered-To: mailing list ctakes-commits@incubator.apache.org Received: (qmail 24171 invoked by uid 99); 29 Jan 2013 19:23:13 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 29 Jan 2013 19:23:13 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 29 Jan 2013 19:23:11 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 43E332388978; Tue, 29 Jan 2013 19:22:52 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1440068 - in /incubator/ctakes/trunk/ctakes-temporal: pom.xml src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java Date: Tue, 29 Jan 2013 19:22:52 -0000 To: ctakes-commits@incubator.apache.org From: stevenbethard@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130129192252.43E332388978@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: stevenbethard Date: Tue Jan 29 19:22:51 2013 New Revision: 1440068 URL: http://svn.apache.org/viewvc?rev=1440068&view=rev Log: Adds initial draft of event-time temporal relation annotator, based on relation extraction framework Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java (with props) incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java (with props) Modified: incubator/ctakes/trunk/ctakes-temporal/pom.xml Modified: incubator/ctakes/trunk/ctakes-temporal/pom.xml URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/pom.xml?rev=1440068&r1=1440067&r2=1440068&view=diff ============================================================================== --- incubator/ctakes/trunk/ctakes-temporal/pom.xml (original) +++ incubator/ctakes/trunk/ctakes-temporal/pom.xml Tue Jan 29 19:22:51 2013 @@ -96,6 +96,10 @@ ctakes-dependency-parser + org.apache.ctakes + ctakes-relation-extractor + + net.sourceforge.ctakesresources ctakes-resources-umls2011ab 3.1.0 Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java?rev=1440068&view=auto ============================================================================== --- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java (added) +++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java Tue Jan 29 19:22:51 2013 @@ -0,0 +1,72 @@ +package org.apache.ctakes.temporal.ae; + +import java.io.File; +import java.util.List; + +import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator; +import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor; +import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor; +import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor; +import org.apache.ctakes.typesystem.type.textsem.EventMention; +import org.apache.ctakes.typesystem.type.textsem.TimeMention; +import org.apache.ctakes.typesystem.type.textspan.Sentence; +import org.apache.uima.analysis_engine.AnalysisEngineDescription; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; +import org.cleartk.classifier.CleartkAnnotator; +import org.cleartk.classifier.DataWriter; +import org.cleartk.classifier.jar.DefaultDataWriterFactory; +import org.cleartk.classifier.jar.DirectoryDataWriterFactory; +import org.cleartk.classifier.jar.GenericJarClassifierFactory; +import org.uimafit.factory.AnalysisEngineFactory; +import org.uimafit.util.JCasUtil; + +import com.google.common.collect.Lists; + +public class EventTimeRelationAnnotator extends RelationExtractorAnnotator { + + public static AnalysisEngineDescription createDataWriterDescription( + Class> dataWriterClass, + File outputDirectory, + double probabilityOfKeepingANegativeExample) throws ResourceInitializationException { + return AnalysisEngineFactory.createPrimitiveDescription( + EventTimeRelationAnnotator.class, + CleartkAnnotator.PARAM_IS_TRAINING, + true, + DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, + dataWriterClass, + DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, + outputDirectory, + RelationExtractorAnnotator.PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE, + // not sure why this has to be cast; something funny going on in uimaFIT maybe? + (float)probabilityOfKeepingANegativeExample); + } + + public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory) + throws ResourceInitializationException { + return AnalysisEngineFactory.createPrimitiveDescription( + EventTimeRelationAnnotator.class, + CleartkAnnotator.PARAM_IS_TRAINING, + false, + GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, + new File(modelDirectory, "model.jar")); + } + + @Override + protected List getFeatureExtractors() { + return Lists.newArrayList(new TokenFeaturesExtractor(), new PartOfSpeechFeaturesExtractor()); + } + + @Override + public List getCandidateRelationArgumentPairs( + JCas jCas, + Sentence sentence) { + List pairs = Lists.newArrayList(); + for (EventMention event : JCasUtil.selectCovered(jCas, EventMention.class, sentence)) { + for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, sentence)) { + pairs.add(new IdentifiedAnnotationPair(event, time)); + } + } + return pairs; + } +} Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java?rev=1440068&view=auto ============================================================================== --- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java (added) +++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java Tue Jan 29 19:22:51 2013 @@ -0,0 +1,159 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.ctakes.temporal.eval; + +import java.io.File; +import java.util.Collection; +import java.util.EnumSet; +import java.util.List; + +import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments; +import org.apache.ctakes.temporal.ae.EventTimeRelationAnnotator; +import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation; +import org.apache.ctakes.typesystem.type.textsem.EventMention; +import org.apache.ctakes.typesystem.type.textsem.TimeMention; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CAS; +import org.apache.uima.collection.CollectionReader; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.TOP; +import org.cleartk.classifier.jar.JarClassifierBuilder; +import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter; +import org.cleartk.eval.AnnotationStatistics; +import org.uimafit.component.JCasAnnotator_ImplBase; +import org.uimafit.factory.AggregateBuilder; +import org.uimafit.factory.AnalysisEngineFactory; +import org.uimafit.pipeline.JCasIterable; +import org.uimafit.pipeline.SimplePipeline; +import org.uimafit.util.JCasUtil; + +import com.google.common.base.Function; +import com.google.common.collect.Lists; +import com.lexicalscope.jewel.cli.CliFactory; + +public class EvaluationOfTemporalRelations extends + Evaluation_ImplBase> { + + public static void main(String[] args) throws Exception { + Options options = CliFactory.parseArguments(Options.class, args); + List patientSets = options.getPatients().getList(); + List trainItems = THYMEData.getTrainPatientSets(patientSets); + List devItems = THYMEData.getDevPatientSets(patientSets); + EvaluationOfTemporalRelations evaluation = new EvaluationOfTemporalRelations( + new File("target/eval/temporal-relations"), + options.getRawTextDirectory(), + options.getKnowtatorXMLDirectory()); + AnnotationStatistics stats = evaluation.trainAndTest(trainItems, devItems); + System.err.println(stats); + } + + public EvaluationOfTemporalRelations( + File baseDirectory, + File rawTextDirectory, + File knowtatorXMLDirectory) { + super( + baseDirectory, + rawTextDirectory, + knowtatorXMLDirectory, + EnumSet.of(AnnotatorType.PART_OF_SPEECH_TAGS)); + } + + @Override + protected List> getAnnotationClassesThatShouldBeGoldAtTestTime() { + List> result = super.getAnnotationClassesThatShouldBeGoldAtTestTime(); + result.add(EventMention.class); + result.add(TimeMention.class); + return result; + } + + @Override + protected void train(CollectionReader collectionReader, File directory) throws Exception { + AggregateBuilder aggregateBuilder = new AggregateBuilder(); + aggregateBuilder.add(this.getPreprocessorTrainDescription()); + aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveNonTLINKRelations.class)); + aggregateBuilder.add(EventTimeRelationAnnotator.createDataWriterDescription( + LIBSVMStringOutcomeDataWriter.class, + directory, + 1.0)); + SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate()); + JarClassifierBuilder.trainAndPackage(directory, "-c", "1000"); + } + + @Override + protected AnnotationStatistics test(CollectionReader collectionReader, File directory) + throws Exception { + AggregateBuilder aggregateBuilder = new AggregateBuilder(); + aggregateBuilder.add(this.getPreprocessorTestDescription()); + aggregateBuilder.add( + AnalysisEngineFactory.createPrimitiveDescription(RemoveNonTLINKRelations.class), + CAS.NAME_DEFAULT_SOFA, + GOLD_VIEW_NAME); + aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveRelations.class)); + aggregateBuilder.add(EventTimeRelationAnnotator.createAnnotatorDescription(directory)); + + Function getSpan = new Function() { + public HashableArguments apply(BinaryTextRelation relation) { + return new HashableArguments(relation); + } + }; + Function getOutcome = AnnotationStatistics.annotationToFeatureValue("category"); + + AnnotationStatistics stats = new AnnotationStatistics(); + for (JCas jCas : new JCasIterable(collectionReader, aggregateBuilder.createAggregate())) { + JCas goldView = jCas.getView(GOLD_VIEW_NAME); + JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA); + Collection goldRelations = JCasUtil.select( + goldView, + BinaryTextRelation.class); + Collection systemRelations = JCasUtil.select( + systemView, + BinaryTextRelation.class); + stats.add(goldRelations, systemRelations, getSpan, getOutcome); + } + return stats; + } + + public static class RemoveNonTLINKRelations extends JCasAnnotator_ImplBase { + @Override + public void process(JCas jCas) throws AnalysisEngineProcessException { + for (BinaryTextRelation relation : Lists.newArrayList(JCasUtil.select( + jCas, + BinaryTextRelation.class))) { + if (!relation.getCategory().startsWith("TLINK")) { + relation.getArg1().removeFromIndexes(); + relation.getArg2().removeFromIndexes(); + relation.removeFromIndexes(); + } + } + } + } + + public static class RemoveRelations extends JCasAnnotator_ImplBase { + @Override + public void process(JCas jCas) throws AnalysisEngineProcessException { + for (BinaryTextRelation relation : Lists.newArrayList(JCasUtil.select( + jCas, + BinaryTextRelation.class))) { + relation.getArg1().removeFromIndexes(); + relation.getArg2().removeFromIndexes(); + relation.removeFromIndexes(); + } + } + } +} Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java ------------------------------------------------------------------------------ svn:mime-type = text/plain