Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id AA54BC994 for ; Thu, 27 Jun 2013 15:29:26 +0000 (UTC) Received: (qmail 32369 invoked by uid 500); 27 Jun 2013 15:29:26 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 32308 invoked by uid 500); 27 Jun 2013 15:29:24 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 31817 invoked by uid 99); 27 Jun 2013 15:29:22 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 27 Jun 2013 15:29:22 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 27 Jun 2013 15:29:20 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id F333F23889EC; Thu, 27 Jun 2013 15:29:00 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1497397 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/ eval/ Date: Thu, 27 Jun 2013 15:29:00 -0000 To: commits@ctakes.apache.org From: tmill@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130627152900.F333F23889EC@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: tmill Date: Thu Jun 27 15:29:00 2013 New Revision: 1497397 URL: http://svn.apache.org/r1497397 Log: Checked in changes to allow for meta-timex classifier. Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java?rev=1497397&r1=1497396&r2=1497397&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java Thu Jun 27 15:29:00 2013 @@ -1,5 +1,6 @@ package org.apache.ctakes.temporal.ae; +import java.io.File; import java.util.ArrayList; import java.util.List; @@ -10,9 +11,13 @@ import org.apache.ctakes.typesystem.type import org.apache.ctakes.typesystem.type.textspan.Segment; import org.apache.ctakes.typesystem.type.textspan.Sentence; import org.apache.uima.UimaContext; +import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CASException; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; +import org.cleartk.classifier.CleartkAnnotator; +import org.cleartk.classifier.DataWriter; import org.cleartk.classifier.Feature; import org.cleartk.classifier.Instance; import org.cleartk.classifier.chunking.BIOChunking; @@ -25,12 +30,40 @@ import org.cleartk.classifier.feature.ex import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor; import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor; import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor; +import org.cleartk.classifier.jar.DefaultDataWriterFactory; +import org.cleartk.classifier.jar.DirectoryDataWriterFactory; +import org.cleartk.classifier.jar.GenericJarClassifierFactory; +import org.uimafit.factory.AnalysisEngineFactory; import org.uimafit.util.JCasUtil; import com.google.common.collect.Lists; -public class BackwardsTimeAnnotator extends TemporalEntityAnnotator_ImplBase{ +public class BackwardsTimeAnnotator extends TemporalEntityAnnotator_ImplBase { + + public static AnalysisEngineDescription createDataWriterDescription( + Class> dataWriterClass, File outputDirectory) + throws ResourceInitializationException { + return AnalysisEngineFactory.createPrimitiveDescription( + BackwardsTimeAnnotator.class, + CleartkAnnotator.PARAM_IS_TRAINING, + true, + DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, + dataWriterClass, + DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, + outputDirectory); + } + + public static AnalysisEngineDescription createAnnotatorDescription( + File modelDirectory) throws ResourceInitializationException { + return AnalysisEngineFactory.createPrimitiveDescription( + BackwardsTimeAnnotator.class, + CleartkAnnotator.PARAM_IS_TRAINING, + false, + GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, + new File(modelDirectory, "model.jar")); + } + protected List tokenFeatureExtractors; @@ -141,9 +174,14 @@ public class BackwardsTimeAnnotator exte if (!this.isTraining()) { tokens = Lists.reverse(tokens); outcomes = Lists.reverse(outcomes); - this.timeChunking.createChunks(jCas, tokens, outcomes); + JCas timexCas; + try{ + timexCas = jCas.getView(TimeAnnotator.TIMEX_VIEW); + }catch(CASException e){ + throw new AnalysisEngineProcessException(e); + } + this.timeChunking.createChunks(timexCas, tokens, outcomes); } } } - } Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java?rev=1497397&r1=1497396&r2=1497397&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java Thu Jun 27 15:29:00 2013 @@ -4,17 +4,16 @@ import java.io.File; import java.util.ArrayList; import java.util.List; -import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils; import org.apache.ctakes.temporal.ae.feature.ParseSpanFeatureExtractor; import org.apache.ctakes.temporal.ae.feature.TimeWordTypeExtractor; import org.apache.ctakes.typesystem.type.syntax.BaseToken; -import org.apache.ctakes.typesystem.type.syntax.TreebankNode; import org.apache.ctakes.typesystem.type.textsem.TimeMention; import org.apache.ctakes.typesystem.type.textspan.Segment; import org.apache.ctakes.typesystem.type.textspan.Sentence; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CASException; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import org.cleartk.classifier.CleartkAnnotator; @@ -25,12 +24,10 @@ import org.cleartk.classifier.chunking.B import org.cleartk.classifier.feature.extractor.CleartkExtractor; import org.cleartk.classifier.feature.extractor.CleartkExtractor.Following; import org.cleartk.classifier.feature.extractor.CleartkExtractor.Preceding; -import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor; import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor; import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor; import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor; import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor; -import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor.PatternType; import org.cleartk.classifier.jar.DefaultDataWriterFactory; import org.cleartk.classifier.jar.DirectoryDataWriterFactory; import org.cleartk.classifier.jar.GenericJarClassifierFactory; @@ -119,10 +116,10 @@ public class CRFTimeAnnotator extends Te } // extract features for all tokens - int tokenIndex = -1; +// int tokenIndex = -1; List> allFeatures = new ArrayList>(); for (BaseToken token : tokens) { - ++tokenIndex; +// ++tokenIndex; List features = new ArrayList(); // features from token attributes @@ -153,7 +150,7 @@ public class CRFTimeAnnotator extends Te // } // startToken = tokens.get(i); // } - TreebankNode preTerm = AnnotationTreeUtils.annotationNode(jCas, token); +// TreebankNode preTerm = AnnotationTreeUtils.annotationNode(jCas, token); features.addAll(parseExtractor.extract(jCas, token.getBegin(), token.getEnd())); //if(preTerm != null && preTerm.getParent() != null){ // features.addAll(parseExtractor.extract(jCas, preTerm.getParent().getBegin(), preTerm.getParent().getEnd())); @@ -176,7 +173,13 @@ public class CRFTimeAnnotator extends Te }else{ // outcomes.add(this.classifier.classify(features)); outcomes = this.classifier.classify(allFeatures); - this.timeChunking.createChunks(jCas, tokens, outcomes); + JCas timexCas; + try { + timexCas = jCas.getView(TimeAnnotator.TIMEX_VIEW); + } catch (CASException e) { + throw new AnalysisEngineProcessException(e); + } + this.timeChunking.createChunks(timexCas, tokens, outcomes); } } } Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java?rev=1497397&r1=1497396&r2=1497397&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java Thu Jun 27 15:29:00 2013 @@ -15,9 +15,11 @@ import org.apache.ctakes.typesystem.type import org.apache.ctakes.typesystem.type.syntax.TreebankNode; import org.apache.ctakes.typesystem.type.textsem.TimeMention; import org.apache.ctakes.typesystem.type.textspan.Segment; +import org.apache.log4j.Logger; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CASException; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import org.cleartk.classifier.CleartkAnnotator; @@ -25,7 +27,8 @@ import org.cleartk.classifier.DataWriter import org.cleartk.classifier.Feature; import org.cleartk.classifier.Instance; import org.cleartk.classifier.feature.extractor.CleartkExtractor; -import static org.cleartk.classifier.feature.extractor.CleartkExtractor.*; +import org.cleartk.classifier.feature.extractor.CleartkExtractor.Bag; +import org.cleartk.classifier.feature.extractor.CleartkExtractor.Covered; import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor; import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor.PatternType; import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor; @@ -48,9 +51,9 @@ TemporalEntityAnnotator_ImplBase { private static final String NON_MENTION = "NON_TIME_MENTION"; private static final String MENTION = "TIME_MENTION"; + private static Logger logger = Logger.getLogger(ConstituencyBasedTimeAnnotator.class); private static final int SPAN_LIMIT = 12; - public static AnalysisEngineDescription createDataWriterDescription( Class> dataWriterClass, File outputDirectory) throws ResourceInitializationException { @@ -82,13 +85,14 @@ TemporalEntityAnnotator_ImplBase { private Map wordTypes; - private Set timeWords; - @Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); + CombinedExtractor charExtractors = new CombinedExtractor(new CharacterCategoryPatternExtractor(PatternType.REPEATS_MERGED), + new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR)); + this.wordTypes = Maps.newHashMap(); URL url = TimeWordsExtractor.class.getResource(LOOKUP_PATH); try { @@ -102,18 +106,17 @@ TemporalEntityAnnotator_ImplBase { } catch (IOException e) { throw new ResourceInitializationException(e); } - this.timeWords = this.wordTypes.keySet(); CombinedExtractor allExtractors = new CombinedExtractor( new CoveredTextExtractor(), // new TimeWordTypeExtractor(), - new CharacterCategoryPatternExtractor(PatternType.REPEATS_MERGED), - new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR), + charExtractors, new TypePathExtractor(BaseToken.class, "partOfSpeech")); - + featureExtractors = new ArrayList(); // featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(), new Bag(new Covered()))); featureExtractors.add(new CleartkExtractor(BaseToken.class, allExtractors, new Bag(new Covered()))); +// featureExtractors.add(charExtractors); wordTypeExtractor = new CleartkExtractor(BaseToken.class, new TimeWordTypeExtractor(), new Bag(new Covered())); // featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(), new Bag(new Preceding(1)))); // featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(), new Bag(new Following(1)))); @@ -137,13 +140,14 @@ TemporalEntityAnnotator_ImplBase { // } for(TopTreebankNode root : JCasUtil.selectCovered(TopTreebankNode.class, segment)){ - recursivelyProcessNode(jCas, root.getChildren(0), NON_MENTION, mentions); + recursivelyProcessNode(jCas, root.getChildren(0), mentions, 0.0); } } - private void recursivelyProcessNode(JCas jCas, TreebankNode node, String parentCategory, Set mentions) throws AnalysisEngineProcessException { + private double recursivelyProcessNode(JCas jCas, TreebankNode node, Set mentions, double parentScore) throws AnalysisEngineProcessException { // accumulate features: - double score=0.0; + double score=0.0; + parentScore = 0.0; ArrayList features = new ArrayList(); String category = NON_MENTION; @@ -151,21 +155,10 @@ TemporalEntityAnnotator_ImplBase { if(node.getParent().getParent() == null) features.add(new Feature("IS_ROOT")); features.add(new Feature("NODE_LABEL", node.getNodeType())); features.add(new Feature("PARENT_LABEL", node.getParent().getNodeType())); - features.add(new Feature("PARENT_CAT", parentCategory)); + List coveredTokens = JCasUtil.selectCovered(BaseToken.class, node); //check span length, check if a small node contains any time word - int numTokens = JCasUtil.selectCovered(BaseToken.class, node).size(); - boolean containTimeWord = false; - boolean containGoldTime = false; - //if (numTokens < SPAN_LIMIT){//check if it contains time word - for(BaseToken bt : JCasUtil.selectCovered(BaseToken.class, node)){ - String btword = bt.getCoveredText().toLowerCase(); - if(this.timeWords.contains(btword)){ - containTimeWord = true; - break; - } - } - //} + int numTokens = coveredTokens.size(); if(node.getLeaf()){ features.add(new Feature("IS_LEAF")); @@ -190,72 +183,65 @@ TemporalEntityAnnotator_ImplBase { if(this.isTraining()){ List goldMentions = JCasUtil.selectCovered(TimeMention.class, node); - if( goldMentions != null){ - containGoldTime = true; - - for(TimeMention mention : goldMentions){ - if(mention.getBegin() == node.getBegin() && mention.getEnd() == node.getEnd()){ - category = MENTION; - score=1.0; - mentions.remove(mention); - } - } + for(TimeMention mention : goldMentions){ + if(mention.getBegin() == node.getBegin() && mention.getEnd() == node.getEnd()){ + category = MENTION; + score=1.0; + mentions.remove(mention); + if(node.getCoveredText().contains("postoperative")){ + System.out.println("*** Positive Example: ***"); + System.out.println("*** Parent: " + node.getParent().getCoveredText()); + printFeatures(node, features); + } + } } if(numTokens < SPAN_LIMIT){ - this.dataWriter.write(new Instance(category, features)); + this.dataWriter.write(new Instance(category, features)); } }else{ score = this.classifier.score(features, 1).get(0).getScore(); category = this.classifier.classify(features); if(category.equals(MENTION)){ // add to cas - TimeMention mention = new TimeMention(jCas, node.getBegin(), node.getEnd()); + JCas timexCas; + try { + timexCas = jCas.getView(TimeAnnotator.TIMEX_VIEW); + } catch (CASException e) { + throw new AnalysisEngineProcessException(e); + } + + TimeMention mention = new TimeMention(timexCas, node.getBegin(), node.getEnd()); mention.setConfidence((float)score); mention.addToIndexes(); + }else{ + score = 1 - score; } } // now do children if not a leaf & not a mention - if(node.getLeaf() || MENTION.equals(category)) return; + if(node.getLeaf() || MENTION.equals(category)) return score; - //double highestScoringChild = 0.0; - if(!containGoldTime && !containTimeWord && numTokens >= SPAN_LIMIT) return; + double highestScore = 0.5; + TreebankNode highestScoringChild = null; - if(!node.getLeaf()){ - for(int i = 0; i < node.getChildren().size(); i++){ - TreebankNode child = node.getChildren(i); - recursivelyProcessNode(jCas, child, category, mentions); - } + for(int i = 0; i < node.getChildren().size(); i++){ + TreebankNode child = node.getChildren(i); + double childScore = recursivelyProcessNode(jCas, child, mentions, Math.max(score, parentScore)); + if(childScore > highestScore){ + highestScoringChild = child; + highestScore = childScore; + } } - - -// if(MENTION.equals(category) && score > highestScoringChild && score > parentScore){ - -// } + if(!this.isTraining() && MENTION.equals(category)){ + logger.info(String.format("\nFound mention (%s) with score %f\n\tParent (%s) : %f\n\tBest child (%s) : %f\n", node.getCoveredText(), score, node.getParent().getCoveredText(), parentScore, highestScoringChild == null ? "(none)" : highestScoringChild.getCoveredText(), highestScore)); + } + return score; } -// private static String getSiblingCategory(TreebankNode node, int offset) throws AnalysisEngineProcessException{ -// String cat = null; -// -// TreebankNode parent = node.getParent(); -// int nodeIndex = -1; -// for(int i = 0; i < parent.getChildren().size(); i++){ -// if(parent.getChildren(i) == node){ -// nodeIndex = i; -// break; -// } -// } -// -// if(nodeIndex == -1){ -// throw new AnalysisEngineProcessException(); -// }else if(nodeIndex + offset < 0){ -// cat = "<"; -// }else if(nodeIndex + offset >= parent.getChildren().size()){ -// cat = ">"; -// }else{ -// cat = parent.getChildren(nodeIndex+offset).getNodeType(); -// } -// -// return cat; -// } + private static void printFeatures(TreebankNode node, List features) { + System.out.println(node.getCoveredText()); + for(Feature feat : features){ + System.out.printf("%s => %s\n", feat.getName(), feat.getValue()); + } + } } Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java?rev=1497397&r1=1497396&r2=1497397&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java Thu Jun 27 15:29:00 2013 @@ -31,6 +31,7 @@ import org.apache.ctakes.typesystem.type import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CASException; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import org.cleartk.classifier.CleartkAnnotator; @@ -55,6 +56,8 @@ import org.uimafit.util.JCasUtil; public class TimeAnnotator extends TemporalEntityAnnotator_ImplBase { + public static final String TIMEX_VIEW = "TimexView"; + public static AnalysisEngineDescription createDataWriterDescription( Class> dataWriterClass, File outputDirectory) throws ResourceInitializationException { @@ -185,7 +188,13 @@ public class TimeAnnotator extends Tempo // during prediction, convert chunk labels to times and add them to the CAS if (!this.isTraining()) { - this.timeChunking.createChunks(jCas, tokens, outcomes); + JCas timexCas; + try { + timexCas = jCas.getView(TIMEX_VIEW); + } catch (CASException e) { + throw new AnalysisEngineProcessException(e); + } + this.timeChunking.createChunks(timexCas, tokens, outcomes); } } } Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java?rev=1497397&r1=1497396&r2=1497397&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java Thu Jun 27 15:29:00 2013 @@ -22,6 +22,7 @@ import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.Collection; +import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.TreeSet; @@ -98,7 +99,7 @@ protected abstract AnalysisEngineDescrip protected void train(CollectionReader collectionReader, File directory) throws Exception { AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder(); aggregateBuilder.add(CopyFromGold.getDescription(this.annotationClass)); - aggregateBuilder.add(this.getDataWriterDescription(directory)); + aggregateBuilder.add(this.getDataWriterDescription(directory), "TimexView", CAS.NAME_DEFAULT_SOFA); SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate()); this.trainAndPackage(directory); } @@ -114,7 +115,7 @@ protected abstract AnalysisEngineDescrip protected AnnotationStatistics test(CollectionReader collectionReader, File directory) throws Exception { AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder(); - aggregateBuilder.add(this.getAnnotatorDescription(directory)); + aggregateBuilder.add(this.getAnnotatorDescription(directory), "TimexView", CAS.NAME_DEFAULT_SOFA); AnnotationStatistics stats = new AnnotationStatistics(); Ordering bySpans = Ordering. natural().lexicographical().onResultOf( @@ -174,6 +175,45 @@ protected abstract AnalysisEngineDescrip text.substring(end, windowEnd))); } } + Set partialGold = new HashSet(); + Set partialSystem = new HashSet(); + + // get overlapping spans + if(this.printOverlapping){ + // iterate over all remaining gold annotations + for(Annotation gold : goldOnly){ + Annotation bestSystem = null; + int bestOverlap = 0; + for(Annotation system : systemOnly){ + if(system.getBegin() >= gold.getBegin() && system.getEnd() <= gold.getEnd()){ + // system completely contained by gold + int overlap = system.getEnd() - system.getBegin(); + if(overlap > bestOverlap){ + bestOverlap = overlap; + bestSystem = system; + } + }else if(gold.getBegin() >= system.getBegin() && gold.getEnd() <= system.getEnd()){ + // gold completely contained by gold + int overlap = gold.getEnd() - gold.getBegin(); + if(overlap > bestOverlap){ + bestOverlap = overlap; + bestSystem = system; + } + } + } + if(bestSystem != null){ + this.logger.info(String.format("Allowed overlapping annotation: Gold(%s) => System(%s)\n", gold.getCoveredText(), bestSystem.getCoveredText())); + partialGold.add(gold); + partialSystem.add(bestSystem); + } + } + if(partialGold.size() > 0){ + goldOnly.removeAll(partialGold); + systemOnly.removeAll(partialSystem); + assert partialGold.size() == partialSystem.size(); + this.logger.info(String.format("Found %d overlapping spans and removed from gold/system errors\n", partialGold.size())); + } + } } } } Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java?rev=1497397&r1=1497396&r2=1497397&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java Thu Jun 27 15:29:00 2013 @@ -24,8 +24,10 @@ import java.util.List; import java.util.Map; import java.util.logging.Level; +import org.apache.ctakes.temporal.ae.BackwardsTimeAnnotator; import org.apache.ctakes.temporal.ae.CRFTimeAnnotator; import org.apache.ctakes.temporal.ae.ConstituencyBasedTimeAnnotator; +import org.apache.ctakes.temporal.ae.MetaTimeAnnotator; import org.apache.ctakes.temporal.ae.TimeAnnotator; import org.apache.ctakes.typesystem.type.textsem.TimeMention; import org.apache.ctakes.typesystem.type.textspan.Segment; @@ -62,13 +64,17 @@ public class EvaluationOfTimeSpans exten // specify the annotator classes to use List> annotatorClasses = Lists.newArrayList(); + annotatorClasses.add(BackwardsTimeAnnotator.class); annotatorClasses.add(TimeAnnotator.class); annotatorClasses.add(ConstituencyBasedTimeAnnotator.class); annotatorClasses.add(CRFTimeAnnotator.class); + annotatorClasses.add(MetaTimeAnnotator.class); Map, String[]> annotatorTrainingArguments = Maps.newHashMap(); + annotatorTrainingArguments.put(BackwardsTimeAnnotator.class, new String[]{"-c", "0.1"}); annotatorTrainingArguments.put(TimeAnnotator.class, new String[]{"-c", "0.1"}); annotatorTrainingArguments.put(ConstituencyBasedTimeAnnotator.class, new String[]{"-c", "0.1"}); annotatorTrainingArguments.put(CRFTimeAnnotator.class, new String[]{"-p", "c2=0.1"}); + annotatorTrainingArguments.put(MetaTimeAnnotator.class, new String[]{"-c", "1.0"}); // run one evaluation per annotator class final Map, AnnotationStatistics> annotatorStats = Maps.newHashMap(); @@ -80,6 +86,7 @@ public class EvaluationOfTimeSpans exten options.getXMIDirectory(), options.getTreebankDirectory(), annotatorClass, + options.getPrintOverlappingSpans(), annotatorTrainingArguments.get(annotatorClass)); evaluation.prepareXMIsFor(patientSets); String name = String.format("%s.errors", annotatorClass.getSimpleName()); @@ -116,16 +123,20 @@ public class EvaluationOfTimeSpans exten File xmiDirectory, File treebankDirectory, Class annotatorClass, + boolean printOverlapping, String[] trainingArguments) { super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory, treebankDirectory, TimeMention.class); this.annotatorClass = annotatorClass; this.trainingArguments = trainingArguments; + this.printOverlapping = printOverlapping; } @Override protected AnalysisEngineDescription getDataWriterDescription(File directory) throws ResourceInitializationException { - if(CleartkAnnotator.class.isAssignableFrom(this.annotatorClass)){ + if(MetaTimeAnnotator.class.isAssignableFrom(this.annotatorClass)){ + return MetaTimeAnnotator.getDataWriterDescription(LIBLINEARStringOutcomeDataWriter.class, directory); + }else if(CleartkAnnotator.class.isAssignableFrom(this.annotatorClass)){ return AnalysisEngineFactory.createPrimitiveDescription( this.annotatorClass, CleartkAnnotator.PARAM_IS_TRAINING, @@ -156,6 +167,9 @@ public class EvaluationOfTimeSpans exten @Override protected AnalysisEngineDescription getAnnotatorDescription(File directory) throws ResourceInitializationException { + if(MetaTimeAnnotator.class.isAssignableFrom(this.annotatorClass)){ + return MetaTimeAnnotator.getAnnotatorDescription(directory); + } return AnalysisEngineFactory.createPrimitiveDescription( this.annotatorClass, CleartkAnnotator.PARAM_IS_TRAINING, Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java?rev=1497397&r1=1497396&r2=1497397&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java Thu Jun 27 15:29:00 2013 @@ -123,6 +123,9 @@ public abstract class Evaluation_ImplBas @Option public boolean getPrintErrors(); + @Option + public boolean getPrintOverlappingSpans(); + @Option(longName = "kernelParams", defaultToNull=true) public String getKernelParams(); } @@ -139,6 +142,8 @@ public abstract class Evaluation_ImplBas protected boolean printErrors = false; + protected boolean printOverlapping = false; + protected String[] kernelParams; public Evaluation_ImplBase(