ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From stevenbeth...@apache.org
Subject svn commit: r1486365 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
Date Sat, 25 May 2013 21:10:32 GMT
Author: stevenbethard
Date: Sat May 25 21:10:32 2013
New Revision: 1486365

URL: http://svn.apache.org/r1486365
Log:
Makes EvaluationOfTimeSpans evaluate both token-based and constituent-based time annotators

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java?rev=1486365&r1=1486364&r2=1486365&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
Sat May 25 21:10:32 2013
@@ -21,19 +21,29 @@ package org.apache.ctakes.temporal.eval;
 import java.io.File;
 import java.util.Collection;
 import java.util.List;
+import java.util.Map;
 import java.util.logging.Level;
 
 import org.apache.ctakes.temporal.ae.ConstituencyBasedTimeAnnotator;
+import org.apache.ctakes.temporal.ae.TimeAnnotator;
 import org.apache.ctakes.typesystem.type.textsem.TimeMention;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.CleartkAnnotator;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
+import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
+import org.cleartk.classifier.jar.GenericJarClassifierFactory;
 import org.cleartk.classifier.jar.JarClassifierBuilder;
 import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
 import org.cleartk.eval.AnnotationStatistics;
 import org.uimafit.factory.AnalysisEngineFactory;
 
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Ordering;
 import com.lexicalscope.jewel.cli.CliFactory;
 
 public class EvaluationOfTimeSpans extends EvaluationOfAnnotationSpans_ImplBase {
@@ -43,50 +53,86 @@ public class EvaluationOfTimeSpans exten
     List<Integer> patientSets = options.getPatients().getList();
     List<Integer> trainItems = THYMEData.getTrainPatientSets(patientSets);
     List<Integer> devItems = THYMEData.getDevPatientSets(patientSets);
-    EvaluationOfTimeSpans evaluation = new EvaluationOfTimeSpans(
-        new File("target/eval/time-spans"),
-        options.getRawTextDirectory(),
-        options.getKnowtatorXMLDirectory(),
-        options.getXMIDirectory(),
-        options.getTreebankDirectory());
-    evaluation.prepareXMIsFor(patientSets);
-    evaluation.setLogging(Level.FINE, new File("target/eval/ctakes-time-errors.log"));
-    AnnotationStatistics<String> stats = evaluation.trainAndTest(trainItems, devItems);
-    System.err.println(stats);
+    
+    // specify the annotator classes to use
+    List<Class<? extends CleartkAnnotator<String>>> annotatorClasses =
Lists.newArrayList();
+    annotatorClasses.add(TimeAnnotator.class);
+    annotatorClasses.add(ConstituencyBasedTimeAnnotator.class);
+    
+    // run one evaluation per annotator class
+    final Map<Class<?>, AnnotationStatistics<?>> annotatorStats = Maps.newHashMap();
+    for (Class<? extends CleartkAnnotator<String>> annotatorClass : annotatorClasses)
{
+      EvaluationOfTimeSpans evaluation = new EvaluationOfTimeSpans(
+          new File("target/eval/time-spans"),
+          options.getRawTextDirectory(),
+          options.getKnowtatorXMLDirectory(),
+          options.getXMIDirectory(),
+          options.getTreebankDirectory(),
+          annotatorClass);
+      evaluation.prepareXMIsFor(patientSets);
+      String name = String.format("%s.errors", annotatorClass.getSimpleName());
+      evaluation.setLogging(Level.FINE, new File("target/eval", name));
+      AnnotationStatistics<String> stats = evaluation.trainAndTest(trainItems, devItems);
+      annotatorStats.put(annotatorClass, stats);
+    }
+
+    // allow ordering of models by F1
+    Ordering<Class<? extends CleartkAnnotator<String>>> byF1 = Ordering.natural().onResultOf(
+      new Function<Class<? extends CleartkAnnotator<String>>, Double>()
{
+        @Override
+        public Double apply(
+            Class<? extends CleartkAnnotator<String>> annotatorClass) {
+          return annotatorStats.get(annotatorClass).f1();
+        }
+      });
+
+    // print out models, ordered by F1
+    for (Class<?> annotatorClass : byF1.sortedCopy(annotatorClasses)) {
+      System.err.printf("===== %s =====\n", annotatorClass.getSimpleName());
+      System.err.println(annotatorStats.get(annotatorClass));
+    }
   }
 
+  private Class<? extends CleartkAnnotator<String>> annotatorClass;
+
   public EvaluationOfTimeSpans(
       File baseDirectory,
       File rawTextDirectory,
       File knowtatorXMLDirectory,
       File xmiDirectory,
-      File treebankDirectory) {
+      File treebankDirectory,
+      Class<? extends CleartkAnnotator<String>> annotatorClass) {
     super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory, treebankDirectory,
TimeMention.class);
+    this.annotatorClass = annotatorClass;
   }
 
   @Override
   protected AnalysisEngineDescription getDataWriterDescription(File directory)
       throws ResourceInitializationException {
-    return AnalysisEngineFactory.createAggregateDescription(
-//        TimeAnnotator.createDataWriterDescription(LIBSVMStringOutcomeDataWriter.class,
new File(directory, "/seq")),
-        ConstituencyBasedTimeAnnotator.createDataWriterDescription(LIBSVMStringOutcomeDataWriter.class,
new File(directory, "/tree")));
-//    return ConstituencyBasedTimeAnnotator.createDataWriterDescription(LIBSVMStringOutcomeDataWriter.class,
directory);
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        this.annotatorClass,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        true,
+        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
+        LIBSVMStringOutcomeDataWriter.class,
+        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+        this.getModelDirectory(directory));
   }
 
   @Override
   protected void trainAndPackage(File directory) throws Exception {
-//    JarClassifierBuilder.trainAndPackage(new File(directory, "/seq"), "-c", "10000");
-    JarClassifierBuilder.trainAndPackage(new File(directory, "/tree"), "-c", "10000");
+    JarClassifierBuilder.trainAndPackage(this.getModelDirectory(directory), "-c", "10000");
   }
 
   @Override
   protected AnalysisEngineDescription getAnnotatorDescription(File directory)
       throws ResourceInitializationException {
-    return AnalysisEngineFactory.createAggregateDescription(
-//        TimeAnnotator.createAnnotatorDescription(new File(directory, "/seq")),
-//        AnalysisEngineFactory.createPrimitiveDescription(RemoveTreeAlignedMentions.class,
RemoveTreeAlignedMentions.PARAM_GOLDVIEW_NAME, GOLD_VIEW_NAME),
-        ConstituencyBasedTimeAnnotator.createAnnotatorDescription(new File(directory, "/tree")));
-//    return ConstituencyBasedTimeAnnotator.createAnnotatorDescription(directory);
+    return AnalysisEngineFactory.createPrimitiveDescription(
+        this.annotatorClass,
+        CleartkAnnotator.PARAM_IS_TRAINING,
+        false,
+        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+        new File(this.getModelDirectory(directory), "model.jar"));
   }
 
   @Override
@@ -98,4 +144,8 @@ public class EvaluationOfTimeSpans exten
   protected Collection<? extends Annotation> getSystemAnnotations(JCas jCas) {
     return selectExact(jCas, TimeMention.class);
   }
+  
+  private File getModelDirectory(File directory) {
+    return new File(directory, this.annotatorClass.getSimpleName());
+  }
 }



Mime
View raw message