ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1497397 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/ eval/
Date Thu, 27 Jun 2013 15:29:00 GMT
Author: tmill
Date: Thu Jun 27 15:29:00 2013
New Revision: 1497397

URL: http://svn.apache.org/r1497397
Log:
Checked in changes to allow for meta-timex classifier.

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java?rev=1497397&r1=1497396&r2=1497397&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/BackwardsTimeAnnotator.java
Thu Jun 27 15:29:00 2013
@@ -1,5 +1,6 @@
 package org.apache.ctakes.temporal.ae;
 
+import java.io.File;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -10,9 +11,13 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.DataWriter;
 import org.cleartk.classifier.Feature;
 import org.cleartk.classifier.Instance;
 import org.cleartk.classifier.chunking.BIOChunking;
@@ -25,12 +30,40 @@ import org.cleartk.classifier.feature.ex
 import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
 import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
 import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
+import org.cleartk.classifier.jar.GenericJarClassifierFactory;
+import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.util.JCasUtil;
 
 import com.google.common.collect.Lists;
 
 
-public class BackwardsTimeAnnotator extends TemporalEntityAnnotator_ImplBase{
+public class BackwardsTimeAnnotator extends TemporalEntityAnnotator_ImplBase {
+
+  public static AnalysisEngineDescription createDataWriterDescription(
+      Class<? extends DataWriter<String>> dataWriterClass, File outputDirectory)
+      throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        BackwardsTimeAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        true,
+        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+        dataWriterClass,
+        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+        outputDirectory);
+  }
+
+  public static AnalysisEngineDescription createAnnotatorDescription(
+      File modelDirectory) throws ResourceInitializationException {
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        BackwardsTimeAnnotator.class,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        false,
+        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+        new File(modelDirectory, "model.jar"));
+  }
+
 
   protected List<SimpleFeatureExtractor> tokenFeatureExtractors;
 
@@ -141,9 +174,14 @@ public class BackwardsTimeAnnotator exte
       if (!this.isTraining()) {
         tokens = Lists.reverse(tokens);
         outcomes = Lists.reverse(outcomes);
-        this.timeChunking.createChunks(jCas, tokens, outcomes);
+        JCas timexCas;
+        try{
+          timexCas = jCas.getView(TimeAnnotator.TIMEX_VIEW);
+        }catch(CASException e){
+          throw new AnalysisEngineProcessException(e);
+        }
+        this.timeChunking.createChunks(timexCas, tokens, outcomes);
       }
     }
   }
-
 }

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java?rev=1497397&r1=1497396&r2=1497397&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/CRFTimeAnnotator.java
Thu Jun 27 15:29:00 2013
@@ -4,17 +4,16 @@ import java.io.File;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils;
 import org.apache.ctakes.temporal.ae.feature.ParseSpanFeatureExtractor;
 import org.apache.ctakes.temporal.ae.feature.TimeWordTypeExtractor;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
 import org.apache.ctakes.typesystem.type.textsem.TimeMention;
 import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.cleartk.classifier.CleartkAnnotator;
@@ -25,12 +24,10 @@ import org.cleartk.classifier.chunking.B
 import org.cleartk.classifier.feature.extractor.CleartkExtractor;
 import org.cleartk.classifier.feature.extractor.CleartkExtractor.Following;
 import org.cleartk.classifier.feature.extractor.CleartkExtractor.Preceding;
-import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
 import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
 import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
 import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
 import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
-import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor.PatternType;
 import org.cleartk.classifier.jar.DefaultDataWriterFactory;
 import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
 import org.cleartk.classifier.jar.GenericJarClassifierFactory;
@@ -119,10 +116,10 @@ public class CRFTimeAnnotator extends Te
       }
 
       // extract features for all tokens
-      int tokenIndex = -1;
+//      int tokenIndex = -1;
       List<List<Feature>> allFeatures = new ArrayList<List<Feature>>();
       for (BaseToken token : tokens) {
-        ++tokenIndex;
+//        ++tokenIndex;
 
         List<Feature> features = new ArrayList<Feature>();
         // features from token attributes
@@ -153,7 +150,7 @@ public class CRFTimeAnnotator extends Te
 //          }
 //          startToken = tokens.get(i);
 //        }
-        TreebankNode preTerm = AnnotationTreeUtils.annotationNode(jCas, token);
+//        TreebankNode preTerm = AnnotationTreeUtils.annotationNode(jCas, token);
         features.addAll(parseExtractor.extract(jCas, token.getBegin(), token.getEnd()));
         //if(preTerm != null && preTerm.getParent() != null){
         //  features.addAll(parseExtractor.extract(jCas, preTerm.getParent().getBegin(),
preTerm.getParent().getEnd()));
@@ -176,7 +173,13 @@ public class CRFTimeAnnotator extends Te
       }else{
 //        outcomes.add(this.classifier.classify(features));
         outcomes = this.classifier.classify(allFeatures);
-        this.timeChunking.createChunks(jCas, tokens, outcomes);
+        JCas timexCas;
+        try {
+          timexCas = jCas.getView(TimeAnnotator.TIMEX_VIEW);
+        } catch (CASException e) {
+          throw new AnalysisEngineProcessException(e);
+        }
+        this.timeChunking.createChunks(timexCas, tokens, outcomes);
       }
     }
   }

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java?rev=1497397&r1=1497396&r2=1497397&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.java
Thu Jun 27 15:29:00 2013
@@ -15,9 +15,11 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
 import org.apache.ctakes.typesystem.type.textsem.TimeMention;
 import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.cleartk.classifier.CleartkAnnotator;
@@ -25,7 +27,8 @@ import org.cleartk.classifier.DataWriter
 import org.cleartk.classifier.Feature;
 import org.cleartk.classifier.Instance;
 import org.cleartk.classifier.feature.extractor.CleartkExtractor;
-import static org.cleartk.classifier.feature.extractor.CleartkExtractor.*;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Bag;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor.Covered;
 import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
 import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor.PatternType;
 import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
@@ -48,9 +51,9 @@ TemporalEntityAnnotator_ImplBase {
 
   private static final String NON_MENTION = "NON_TIME_MENTION";
   private static final String MENTION = "TIME_MENTION";
+  private static Logger logger = Logger.getLogger(ConstituencyBasedTimeAnnotator.class);
   private static final int	SPAN_LIMIT = 12;
 
-
   public static AnalysisEngineDescription createDataWriterDescription(
       Class<? extends DataWriter<String>> dataWriterClass,
           File outputDirectory) throws ResourceInitializationException {
@@ -82,13 +85,14 @@ TemporalEntityAnnotator_ImplBase {
   
   private Map<String, String> wordTypes;
   
-  private Set<String> timeWords;
-
   @Override
   public void initialize(UimaContext context)
       throws ResourceInitializationException {
     super.initialize(context);
 
+    CombinedExtractor charExtractors = new CombinedExtractor(new CharacterCategoryPatternExtractor(PatternType.REPEATS_MERGED),
+            new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR));
+    
     this.wordTypes = Maps.newHashMap();
     URL url = TimeWordsExtractor.class.getResource(LOOKUP_PATH);
     try {
@@ -102,18 +106,17 @@ TemporalEntityAnnotator_ImplBase {
     } catch (IOException e) {
       throw new ResourceInitializationException(e);
     }
-    this.timeWords = this.wordTypes.keySet();
     
     CombinedExtractor allExtractors = new CombinedExtractor(
         new CoveredTextExtractor(),
 //        new TimeWordTypeExtractor(),
-        new CharacterCategoryPatternExtractor(PatternType.REPEATS_MERGED),
-        new CharacterCategoryPatternExtractor(PatternType.ONE_PER_CHAR),
+        charExtractors,
         new TypePathExtractor(BaseToken.class, "partOfSpeech"));
-
+    
     featureExtractors = new ArrayList<SimpleFeatureExtractor>();
 //    featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(),
new Bag(new Covered())));
     featureExtractors.add(new CleartkExtractor(BaseToken.class, allExtractors, new Bag(new
Covered())));
+//    featureExtractors.add(charExtractors);
     wordTypeExtractor = new CleartkExtractor(BaseToken.class, new TimeWordTypeExtractor(),
new Bag(new Covered()));
 //    featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(),
new Bag(new Preceding(1))));
  //   featureExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(),
new Bag(new Following(1))));
@@ -137,13 +140,14 @@ TemporalEntityAnnotator_ImplBase {
 //    }
 	  
     for(TopTreebankNode root : JCasUtil.selectCovered(TopTreebankNode.class, segment)){
-      recursivelyProcessNode(jCas, root.getChildren(0), NON_MENTION, mentions);
+      recursivelyProcessNode(jCas, root.getChildren(0), mentions, 0.0);
     }
   }
 
-  private void recursivelyProcessNode(JCas jCas, TreebankNode node, String parentCategory,
Set<TimeMention> mentions) throws AnalysisEngineProcessException {
+  private double recursivelyProcessNode(JCas jCas, TreebankNode node, Set<TimeMention>
mentions, double parentScore) throws AnalysisEngineProcessException {
     // accumulate features:
-	double score=0.0;
+    double score=0.0;
+    parentScore = 0.0;
     ArrayList<Feature> features = new ArrayList<Feature>();
     String category = NON_MENTION;
 
@@ -151,21 +155,10 @@ TemporalEntityAnnotator_ImplBase {
     if(node.getParent().getParent() == null) features.add(new Feature("IS_ROOT"));
     features.add(new Feature("NODE_LABEL", node.getNodeType()));
     features.add(new Feature("PARENT_LABEL", node.getParent().getNodeType()));
-    features.add(new Feature("PARENT_CAT", parentCategory));
+    List<BaseToken> coveredTokens = JCasUtil.selectCovered(BaseToken.class, node);
     
     //check span length, check if a small node contains any time word
-    int numTokens = JCasUtil.selectCovered(BaseToken.class, node).size();
-    boolean containTimeWord = false;
-    boolean containGoldTime = false;
-    //if (numTokens < SPAN_LIMIT){//check if it contains time word
-    	for(BaseToken bt : JCasUtil.selectCovered(BaseToken.class, node)){
-    		String btword = bt.getCoveredText().toLowerCase();
-    		if(this.timeWords.contains(btword)){
-    			containTimeWord = true;
-    			break;
-    		}
-    	} 	
-    //}
+    int numTokens = coveredTokens.size();
     
     if(node.getLeaf()){
       features.add(new Feature("IS_LEAF"));
@@ -190,72 +183,65 @@ TemporalEntityAnnotator_ImplBase {
       
     if(this.isTraining()){
       List<TimeMention> goldMentions = JCasUtil.selectCovered(TimeMention.class, node);
-      if( goldMentions != null){
-    	  containGoldTime = true;
-
-    	  for(TimeMention mention : goldMentions){
-    		  if(mention.getBegin() == node.getBegin() && mention.getEnd() == node.getEnd()){
-    			  category = MENTION;
-    			  score=1.0;
-    			  mentions.remove(mention);
-    		  }
-    	  }
+      for(TimeMention mention : goldMentions){
+        if(mention.getBegin() == node.getBegin() && mention.getEnd() == node.getEnd()){
+          category = MENTION;
+          score=1.0;
+          mentions.remove(mention);
+          if(node.getCoveredText().contains("postoperative")){
+            System.out.println("*** Positive Example: ***");
+            System.out.println("*** Parent: " + node.getParent().getCoveredText());
+            printFeatures(node, features);
+          }
+        }
       }
       if(numTokens < SPAN_LIMIT){
-    	  this.dataWriter.write(new Instance<String>(category, features));
+        this.dataWriter.write(new Instance<String>(category, features));
       }
     }else{
       score = this.classifier.score(features, 1).get(0).getScore();
       category = this.classifier.classify(features);
       if(category.equals(MENTION)){
         // add to cas
-        TimeMention mention = new TimeMention(jCas, node.getBegin(), node.getEnd());
+        JCas timexCas;
+        try {
+          timexCas = jCas.getView(TimeAnnotator.TIMEX_VIEW);
+        } catch (CASException e) {
+          throw new AnalysisEngineProcessException(e);
+        }
+
+        TimeMention mention = new TimeMention(timexCas, node.getBegin(), node.getEnd());
         mention.setConfidence((float)score);
         mention.addToIndexes();
+      }else{
+        score = 1 - score;
       }
     }
 
     // now do children if not a leaf & not a mention
-    if(node.getLeaf() || MENTION.equals(category)) return;
+    if(node.getLeaf() || MENTION.equals(category)) return score;
     
-    //double highestScoringChild = 0.0;
-    if(!containGoldTime && !containTimeWord && numTokens >= SPAN_LIMIT)
return;
+    double highestScore = 0.5;
+    TreebankNode highestScoringChild = null;
     
-    if(!node.getLeaf()){
-    	for(int i = 0; i < node.getChildren().size(); i++){
-    		TreebankNode child = node.getChildren(i);
-    		recursivelyProcessNode(jCas, child, category, mentions);
-    	}
+    for(int i = 0; i < node.getChildren().size(); i++){
+      TreebankNode child = node.getChildren(i);
+      double childScore = recursivelyProcessNode(jCas, child, mentions, Math.max(score, parentScore));
+      if(childScore > highestScore){
+        highestScoringChild = child;
+        highestScore = childScore;
+      }
     }
-    
-    
-//    if(MENTION.equals(category) && score > highestScoringChild && score
> parentScore){
-    	
-//    }
+    if(!this.isTraining() && MENTION.equals(category)){
+      logger.info(String.format("\nFound mention (%s) with score %f\n\tParent (%s) : %f\n\tBest
child (%s) : %f\n", node.getCoveredText(), score, node.getParent().getCoveredText(), parentScore,
highestScoringChild == null ? "(none)" : highestScoringChild.getCoveredText(), highestScore));
+    }
+    return score;
   }
   
-//  private static String getSiblingCategory(TreebankNode node, int offset) throws AnalysisEngineProcessException{
-//	  String cat = null;
-//	  
-//	  TreebankNode parent = node.getParent();
-//	  int nodeIndex = -1;
-//	  for(int i = 0; i < parent.getChildren().size(); i++){
-//		  if(parent.getChildren(i) == node){
-//			  nodeIndex = i;
-//			  break;
-//		  }
-//	  }
-//	  
-//	  if(nodeIndex == -1){
-//		  throw new AnalysisEngineProcessException();
-//	  }else if(nodeIndex + offset < 0){
-//		  cat = "<";
-//	  }else if(nodeIndex + offset >= parent.getChildren().size()){
-//		  cat = ">";
-//	  }else{
-//		  cat = parent.getChildren(nodeIndex+offset).getNodeType();
-//	  }
-//	  
-//	  return cat;
-//  }
+  private static void printFeatures(TreebankNode node, List<Feature> features) {
+    System.out.println(node.getCoveredText());
+    for(Feature feat : features){
+      System.out.printf("%s => %s\n", feat.getName(), feat.getValue());
+    }    
+  }
 }

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java?rev=1497397&r1=1497396&r2=1497397&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TimeAnnotator.java
Thu Jun 27 15:29:00 2013
@@ -31,6 +31,7 @@ import org.apache.ctakes.typesystem.type
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.cleartk.classifier.CleartkAnnotator;
@@ -55,6 +56,8 @@ import org.uimafit.util.JCasUtil;
 
 public class TimeAnnotator extends TemporalEntityAnnotator_ImplBase {
 
+  public static final String TIMEX_VIEW = "TimexView";
+  
   public static AnalysisEngineDescription createDataWriterDescription(
       Class<? extends DataWriter<String>> dataWriterClass,
       File outputDirectory) throws ResourceInitializationException {
@@ -185,7 +188,13 @@ public class TimeAnnotator extends Tempo
 
       // during prediction, convert chunk labels to times and add them to the CAS
       if (!this.isTraining()) {
-        this.timeChunking.createChunks(jCas, tokens, outcomes);
+        JCas timexCas;
+        try {
+          timexCas = jCas.getView(TIMEX_VIEW);
+        } catch (CASException e) {
+          throw new AnalysisEngineProcessException(e);
+        }
+        this.timeChunking.createChunks(timexCas, tokens, outcomes);
       }
     }
   }

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java?rev=1497397&r1=1497396&r2=1497397&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
Thu Jun 27 15:29:00 2013
@@ -22,6 +22,7 @@ import java.io.File;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.TreeSet;
@@ -98,7 +99,7 @@ protected abstract AnalysisEngineDescrip
   protected void train(CollectionReader collectionReader, File directory) throws Exception
{
     AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
     aggregateBuilder.add(CopyFromGold.getDescription(this.annotationClass));
-    aggregateBuilder.add(this.getDataWriterDescription(directory));
+    aggregateBuilder.add(this.getDataWriterDescription(directory), "TimexView", CAS.NAME_DEFAULT_SOFA);
     SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
     this.trainAndPackage(directory);
   }
@@ -114,7 +115,7 @@ protected abstract AnalysisEngineDescrip
   protected AnnotationStatistics<String> test(CollectionReader collectionReader, File
directory)
       throws Exception {
     AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
-    aggregateBuilder.add(this.getAnnotatorDescription(directory));
+    aggregateBuilder.add(this.getAnnotatorDescription(directory), "TimexView", CAS.NAME_DEFAULT_SOFA);
 
     AnnotationStatistics<String> stats = new AnnotationStatistics<String>();
     Ordering<Annotation> bySpans = Ordering.<Integer> natural().lexicographical().onResultOf(
@@ -174,6 +175,45 @@ protected abstract AnalysisEngineDescrip
                   text.substring(end, windowEnd)));
             }
           }
+          Set<Annotation> partialGold = new HashSet<Annotation>();
+          Set<Annotation> partialSystem = new HashSet<Annotation>();
+
+          // get overlapping spans
+          if(this.printOverlapping){
+            // iterate over all remaining gold annotations
+            for(Annotation gold : goldOnly){
+              Annotation bestSystem = null;
+              int bestOverlap = 0;
+              for(Annotation system : systemOnly){
+                if(system.getBegin() >= gold.getBegin() && system.getEnd() <=
gold.getEnd()){
+                  // system completely contained by gold
+                  int overlap = system.getEnd() - system.getBegin();
+                  if(overlap > bestOverlap){
+                    bestOverlap = overlap;
+                    bestSystem = system;
+                  }
+                }else if(gold.getBegin() >= system.getBegin() && gold.getEnd()
<= system.getEnd()){
+                  // gold completely contained by gold
+                  int overlap = gold.getEnd() - gold.getBegin();
+                  if(overlap > bestOverlap){
+                    bestOverlap = overlap;
+                    bestSystem = system;
+                  }
+                }
+              }
+              if(bestSystem != null){
+                this.logger.info(String.format("Allowed overlapping annotation: Gold(%s)
=> System(%s)\n", gold.getCoveredText(), bestSystem.getCoveredText()));
+                partialGold.add(gold);
+                partialSystem.add(bestSystem);
+              }
+            }
+            if(partialGold.size() > 0){
+              goldOnly.removeAll(partialGold);
+              systemOnly.removeAll(partialSystem);
+              assert partialGold.size() == partialSystem.size();
+              this.logger.info(String.format("Found %d overlapping spans and removed from
gold/system errors\n", partialGold.size()));
+            }
+          }
         }
       }
     }

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java?rev=1497397&r1=1497396&r2=1497397&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
Thu Jun 27 15:29:00 2013
@@ -24,8 +24,10 @@ import java.util.List;
 import java.util.Map;
 import java.util.logging.Level;
 
+import org.apache.ctakes.temporal.ae.BackwardsTimeAnnotator;
 import org.apache.ctakes.temporal.ae.CRFTimeAnnotator;
 import org.apache.ctakes.temporal.ae.ConstituencyBasedTimeAnnotator;
+import org.apache.ctakes.temporal.ae.MetaTimeAnnotator;
 import org.apache.ctakes.temporal.ae.TimeAnnotator;
 import org.apache.ctakes.typesystem.type.textsem.TimeMention;
 import org.apache.ctakes.typesystem.type.textspan.Segment;
@@ -62,13 +64,17 @@ public class EvaluationOfTimeSpans exten
     
     // specify the annotator classes to use
     List<Class<? extends JCasAnnotator_ImplBase>> annotatorClasses = Lists.newArrayList();
+    annotatorClasses.add(BackwardsTimeAnnotator.class);
     annotatorClasses.add(TimeAnnotator.class);
     annotatorClasses.add(ConstituencyBasedTimeAnnotator.class);
     annotatorClasses.add(CRFTimeAnnotator.class);
+    annotatorClasses.add(MetaTimeAnnotator.class);
     Map<Class<? extends JCasAnnotator_ImplBase>, String[]> annotatorTrainingArguments
= Maps.newHashMap();
+    annotatorTrainingArguments.put(BackwardsTimeAnnotator.class, new String[]{"-c", "0.1"});
     annotatorTrainingArguments.put(TimeAnnotator.class, new String[]{"-c", "0.1"});
     annotatorTrainingArguments.put(ConstituencyBasedTimeAnnotator.class, new String[]{"-c",
"0.1"});
     annotatorTrainingArguments.put(CRFTimeAnnotator.class, new String[]{"-p", "c2=0.1"});
+    annotatorTrainingArguments.put(MetaTimeAnnotator.class, new String[]{"-c", "1.0"});
     
     // run one evaluation per annotator class
     final Map<Class<?>, AnnotationStatistics<?>> annotatorStats = Maps.newHashMap();
@@ -80,6 +86,7 @@ public class EvaluationOfTimeSpans exten
           options.getXMIDirectory(),
           options.getTreebankDirectory(),
           annotatorClass,
+          options.getPrintOverlappingSpans(),
           annotatorTrainingArguments.get(annotatorClass));
       evaluation.prepareXMIsFor(patientSets);
       String name = String.format("%s.errors", annotatorClass.getSimpleName());
@@ -116,16 +123,20 @@ public class EvaluationOfTimeSpans exten
       File xmiDirectory,
       File treebankDirectory,
       Class<? extends JCasAnnotator_ImplBase> annotatorClass,
+      boolean printOverlapping,
       String[] trainingArguments) {
     super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory, treebankDirectory,
TimeMention.class);
     this.annotatorClass = annotatorClass;
     this.trainingArguments = trainingArguments;
+    this.printOverlapping = printOverlapping;
   }
 
   @Override
   protected AnalysisEngineDescription getDataWriterDescription(File directory)
       throws ResourceInitializationException {
-    if(CleartkAnnotator.class.isAssignableFrom(this.annotatorClass)){
+    if(MetaTimeAnnotator.class.isAssignableFrom(this.annotatorClass)){
+      return MetaTimeAnnotator.getDataWriterDescription(LIBLINEARStringOutcomeDataWriter.class,
directory);          
+    }else if(CleartkAnnotator.class.isAssignableFrom(this.annotatorClass)){
       return AnalysisEngineFactory.createPrimitiveDescription(
           this.annotatorClass,
           CleartkAnnotator.PARAM_IS_TRAINING,
@@ -156,6 +167,9 @@ public class EvaluationOfTimeSpans exten
   @Override
   protected AnalysisEngineDescription getAnnotatorDescription(File directory)
       throws ResourceInitializationException {
+    if(MetaTimeAnnotator.class.isAssignableFrom(this.annotatorClass)){
+      return MetaTimeAnnotator.getAnnotatorDescription(directory);
+    }
     return AnalysisEngineFactory.createPrimitiveDescription(
         this.annotatorClass,
         CleartkAnnotator.PARAM_IS_TRAINING,

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java?rev=1497397&r1=1497396&r2=1497397&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
Thu Jun 27 15:29:00 2013
@@ -123,6 +123,9 @@ public abstract class Evaluation_ImplBas
     @Option
     public boolean getPrintErrors();
     
+    @Option
+    public boolean getPrintOverlappingSpans();
+    
     @Option(longName = "kernelParams", defaultToNull=true)
     public String getKernelParams();
   }
@@ -139,6 +142,8 @@ public abstract class Evaluation_ImplBas
   
   protected boolean printErrors = false;
   
+  protected boolean printOverlapping = false;
+  
   protected String[] kernelParams;
   
   public Evaluation_ImplBase(



Mime
View raw message