incubator-ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From stevenbeth...@apache.org
Subject svn commit: r1445008 - in /incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/ eval/
Date Tue, 12 Feb 2013 00:52:23 GMT
Author: stevenbethard
Date: Tue Feb 12 00:52:23 2013
New Revision: 1445008

URL: http://svn.apache.org/r1445008
Log:
Allows ctakes-temporal evaluations to save and load preprocessed CASes as XMI. Should signficantly
reduce the time to run experiments.

Removed:
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/AnnotationCopier.java
Modified:
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java?rev=1445008&r1=1445007&r2=1445008&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
(original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
Tue Feb 12 00:52:23 2013
@@ -89,15 +89,15 @@ public class DocTimeRelAnnotator extends
   @Override
   public void process(JCas jCas) throws AnalysisEngineProcessException {
     for (EventMention eventMention : JCasUtil.select(jCas, EventMention.class)) {
-      List<Feature> features = this.contextExtractor.extract(jCas, eventMention);
-      if (this.isTraining()) {
-        String outcome = eventMention.getEvent().getProperties().getDocTimeRel();
-        this.dataWriter.write(new Instance<String>(outcome, features));
-      } else {
-    	if (eventMention.getEvent() != null){
-    		String outcome = this.classifier.classify(features);
-    		eventMention.getEvent().getProperties().setDocTimeRel(outcome);
-    	}
+      if (eventMention.getEvent() != null) {
+        List<Feature> features = this.contextExtractor.extract(jCas, eventMention);
+        if (this.isTraining()) {
+          String outcome = eventMention.getEvent().getProperties().getDocTimeRel();
+          this.dataWriter.write(new Instance<String>(outcome, features));
+        } else {
+          String outcome = this.classifier.classify(features);
+          eventMention.getEvent().getProperties().setDocTimeRel(outcome);
+        }
       }
     }
   }

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java?rev=1445008&r1=1445007&r2=1445008&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
(original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeRelationAnnotator.java
Tue Feb 12 00:52:23 2013
@@ -68,8 +68,11 @@ public class EventTimeRelationAnnotator 
       Sentence sentence) {
     List<IdentifiedAnnotationPair> pairs = Lists.newArrayList();
     for (EventMention event : JCasUtil.selectCovered(jCas, EventMention.class, sentence))
{
-      for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, sentence))
{
-        pairs.add(new IdentifiedAnnotationPair(event, time));
+      // ignore subclasses like Procedure and Disease/Disorder
+      if (event.getClass().equals(EventMention.class)) {
+        for (TimeMention time : JCasUtil.selectCovered(jCas, TimeMention.class, sentence))
{
+          pairs.add(new IdentifiedAnnotationPair(event, time));
+        }
       }
     }
     return pairs;

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java?rev=1445008&r1=1445007&r2=1445008&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
(original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfAnnotationSpans_ImplBase.java
Tue Feb 12 00:52:23 2013
@@ -50,7 +50,6 @@ public abstract class EvaluationOfAnnota
     Evaluation_ImplBase<AnnotationStatistics<String>> {
 
   private final Logger logger = Logger.getLogger(this.getClass().getName());
-
   public void setLogging(Level level, File outputFile) throws IOException {
     if (!outputFile.getParentFile().exists()) {
       outputFile.getParentFile().mkdirs();
@@ -66,14 +65,18 @@ public abstract class EvaluationOfAnnota
     this.logger.addHandler(handler);
   }
 
+  private Class<? extends Annotation> annotationClass;
+
   public EvaluationOfAnnotationSpans_ImplBase(
       File baseDirectory,
       File rawTextDirectory,
       File knowtatorXMLDirectory,
-      Set<AnnotatorType> annotatorFlags) {
-    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, annotatorFlags);
+      File xmiDirectory,
+      Class<? extends Annotation> annotationClass) {
+    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory);
+    this.annotationClass = annotationClass;
   }
-
+  
   protected abstract AnalysisEngineDescription getDataWriterDescription(File directory)
       throws ResourceInitializationException;
 
@@ -81,8 +84,8 @@ public abstract class EvaluationOfAnnota
 
   @Override
   protected void train(CollectionReader collectionReader, File directory) throws Exception
{
-    AggregateBuilder aggregateBuilder = new AggregateBuilder();
-    aggregateBuilder.add(this.getPreprocessorTrainDescription());
+    AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+    aggregateBuilder.add(CopyFromGold.getDescription(this.annotationClass));
     aggregateBuilder.add(this.getDataWriterDescription(directory));
     SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
     this.trainAndPackage(directory);
@@ -98,8 +101,7 @@ public abstract class EvaluationOfAnnota
   @Override
   protected AnnotationStatistics<String> test(CollectionReader collectionReader, File
directory)
       throws Exception {
-    AggregateBuilder aggregateBuilder = new AggregateBuilder();
-    aggregateBuilder.add(this.getPreprocessorTestDescription());
+    AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
     aggregateBuilder.add(this.getAnnotatorDescription(directory));
 
     AnnotationStatistics<String> stats = new AnnotationStatistics<String>();

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java?rev=1445008&r1=1445007&r2=1445008&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java
(original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKEventSpans.java
Tue Feb 12 00:52:23 2013
@@ -20,7 +20,6 @@ package org.apache.ctakes.temporal.eval;
 
 import java.io.File;
 import java.util.Collection;
-import java.util.EnumSet;
 import java.util.List;
 import java.util.logging.Level;
 
@@ -48,30 +47,25 @@ public class EvaluationOfClearTKEventSpa
 
   public static void main(String[] args) throws Exception {
     Options options = CliFactory.parseArguments(Options.class, args);
+    List<Integer> patientSets = options.getPatients().getList();
+    List<Integer> trainItems = THYMEData.getTrainPatientSets(patientSets);
+    List<Integer> devItems = THYMEData.getDevPatientSets(patientSets);
     EvaluationOfClearTKEventSpans evaluation = new EvaluationOfClearTKEventSpans(
         new File("target/eval/cleartk-event-spans"),
         options.getRawTextDirectory(),
-        options.getKnowtatorXMLDirectory());
+        options.getKnowtatorXMLDirectory(),
+        options.getXMIDirectory());
     evaluation.setLogging(Level.FINE, new File("target/eval/cleartk-event-errors.log"));
-    List<AnnotationStatistics<String>> foldStats = evaluation.crossValidation(
-        options.getPatients().getList(),
-        4);
-    for (AnnotationStatistics<String> stats : foldStats) {
-      System.err.println(stats);
-    }
-    System.err.println("OVERALL");
-    System.err.println(AnnotationStatistics.addAll(foldStats));
+    AnnotationStatistics<String> stats = evaluation.trainAndTest(trainItems, devItems);
+    System.err.println(stats);
   }
 
   public EvaluationOfClearTKEventSpans(
       File baseDirectory,
       File rawTextDirectory,
-      File knowtatorXMLDirectory) {
-    super(
-        baseDirectory,
-        rawTextDirectory,
-        knowtatorXMLDirectory,
-        EnumSet.noneOf(AnnotatorType.class));
+      File knowtatorXMLDirectory,
+      File xmiDirectory) {
+    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory, EventMention.class);
   }
 
   @Override
@@ -101,7 +95,7 @@ public class EvaluationOfClearTKEventSpa
 
   @Override
   protected Collection<? extends Annotation> getGoldAnnotations(JCas jCas) {
-    return JCasUtil.select(jCas, EventMention.class);
+    return selectExact(jCas, EventMention.class);
   }
 
   @Override

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java?rev=1445008&r1=1445007&r2=1445008&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java
(original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfClearTKTimeSpans.java
Tue Feb 12 00:52:23 2013
@@ -20,7 +20,6 @@ package org.apache.ctakes.temporal.eval;
 
 import java.io.File;
 import java.util.Collection;
-import java.util.EnumSet;
 import java.util.List;
 import java.util.logging.Level;
 
@@ -47,30 +46,25 @@ public class EvaluationOfClearTKTimeSpan
 
   public static void main(String[] args) throws Exception {
     Options options = CliFactory.parseArguments(Options.class, args);
+    List<Integer> patientSets = options.getPatients().getList();
+    List<Integer> trainItems = THYMEData.getTrainPatientSets(patientSets);
+    List<Integer> devItems = THYMEData.getDevPatientSets(patientSets);
     EvaluationOfClearTKTimeSpans evaluation = new EvaluationOfClearTKTimeSpans(
         new File("target/eval/cleartk-time-spans"),
         options.getRawTextDirectory(),
-        options.getKnowtatorXMLDirectory());
+        options.getKnowtatorXMLDirectory(),
+        options.getXMIDirectory());
     evaluation.setLogging(Level.FINE, new File("target/eval/cleartk-time-errors.log"));
-    List<AnnotationStatistics<String>> foldStats = evaluation.crossValidation(
-        options.getPatients().getList(),
-        4);
-    for (AnnotationStatistics<String> stats : foldStats) {
-      System.err.println(stats);
-    }
-    System.err.println("OVERALL");
-    System.err.println(AnnotationStatistics.addAll(foldStats));
+    AnnotationStatistics<String> stats = evaluation.trainAndTest(trainItems, devItems);
+    System.err.println(stats);
   }
 
   public EvaluationOfClearTKTimeSpans(
       File baseDirectory,
       File rawTextDirectory,
-      File knowtatorXMLDirectory) {
-    super(
-        baseDirectory,
-        rawTextDirectory,
-        knowtatorXMLDirectory,
-        EnumSet.noneOf(AnnotatorType.class));
+      File knowtatorXMLDirectory,
+      File xmiDirectory) {
+    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory, TimeMention.class);
   }
 
   @Override

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java?rev=1445008&r1=1445007&r2=1445008&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
(original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventProperties.java
Tue Feb 12 00:52:23 2013
@@ -21,7 +21,6 @@ package org.apache.ctakes.temporal.eval;
 import java.io.File;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -34,7 +33,6 @@ import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.Feature;
 import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.TOP;
 import org.cleartk.classifier.jar.JarClassifierBuilder;
 import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
 import org.cleartk.eval.AnnotationStatistics;
@@ -57,57 +55,35 @@ public class EvaluationOfEventProperties
 
   public static void main(String[] args) throws Exception {
     Options options = CliFactory.parseArguments(Options.class, args);
+    List<Integer> patientSets = options.getPatients().getList();
+    List<Integer> trainItems = THYMEData.getTrainPatientSets(patientSets);
+    List<Integer> devItems = THYMEData.getDevPatientSets(patientSets);
     EvaluationOfEventProperties evaluation = new EvaluationOfEventProperties(
         new File("target/eval/event-properties"),
         options.getRawTextDirectory(),
-        options.getKnowtatorXMLDirectory());
-    List<Map<String, AnnotationStatistics<String>>> foldStats = evaluation.crossValidation(
-        options.getPatients().getList(), 4);
-    Map<String, AnnotationStatistics<String>> overallStats = new HashMap<String,
AnnotationStatistics<String>>();
-    for (String name : PROPERTY_NAMES) {
-      overallStats.put(name, new AnnotationStatistics<String>());
-    }
-    for (Map<String, AnnotationStatistics<String>> propertyStats : foldStats)
{
-      for (String key : propertyStats.keySet()) {
-        overallStats.get(key).addAll(propertyStats.get(key));
-      }
-    }
+        options.getKnowtatorXMLDirectory(),
+        options.getXMIDirectory());
+    Map<String, AnnotationStatistics<String>> stats = evaluation.trainAndTest(trainItems,
devItems);
     for (String name : PROPERTY_NAMES) {
       System.err.println("====================");
       System.err.println(name);
-      for (int i = 0; i < foldStats.size(); ++i) {
-        System.err.println("--------------------");
-        System.err.println("Fold " + i);
-        System.err.println(foldStats.get(i).get(name));
-      }
       System.err.println("--------------------");
-      System.err.println("Overall");
-      System.err.println(overallStats.get(name));
+      System.err.println(stats.get(name));
     }
   }
 
   public EvaluationOfEventProperties(
       File baseDirectory,
       File rawTextDirectory,
-      File knowtatorXMLDirectory) {
-    super(
-        baseDirectory,
-        rawTextDirectory,
-        knowtatorXMLDirectory,
-        EnumSet.of(AnnotatorType.PART_OF_SPEECH_TAGS));
-  }
-
-  @Override
-  protected List<Class<? extends TOP>> getAnnotationClassesThatShouldBeGoldAtTestTime()
{
-    List<Class<? extends TOP>> result = super.getAnnotationClassesThatShouldBeGoldAtTestTime();
-    result.add(EventMention.class);
-    return result;
+      File knowtatorXMLDirectory,
+      File xmiDirectory) {
+    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory);
   }
 
   @Override
   protected void train(CollectionReader collectionReader, File directory) throws Exception
{
-    AggregateBuilder aggregateBuilder = new AggregateBuilder();
-    aggregateBuilder.add(this.getPreprocessorTrainDescription());
+    AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+    aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class));
     aggregateBuilder.add(DocTimeRelAnnotator.createDataWriterDescription(
         LIBSVMStringOutcomeDataWriter.class,
         directory));
@@ -119,8 +95,8 @@ public class EvaluationOfEventProperties
   protected Map<String, AnnotationStatistics<String>> test(
       CollectionReader collectionReader,
       File directory) throws Exception {
-    AggregateBuilder aggregateBuilder = new AggregateBuilder();
-    aggregateBuilder.add(this.getPreprocessorTestDescription());
+    AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+    aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class));
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearEventProperties.class));
     aggregateBuilder.add(DocTimeRelAnnotator.createAnnotatorDescription(directory));
 
@@ -136,8 +112,8 @@ public class EvaluationOfEventProperties
     for (JCas jCas : new JCasIterable(collectionReader, aggregateBuilder.createAggregate()))
{
       JCas goldView = jCas.getView(GOLD_VIEW_NAME);
       JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
-      Collection<EventMention> goldEvents = JCasUtil.select(goldView, EventMention.class);
-      Collection<EventMention> systemEvents = JCasUtil.select(systemView, EventMention.class);
+      Collection<EventMention> goldEvents = selectExact(goldView, EventMention.class);
+      Collection<EventMention> systemEvents = selectExact(systemView, EventMention.class);
       for (String name : PROPERTY_NAMES) {
         statsMap.get(name).add(
             goldEvents,

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java?rev=1445008&r1=1445007&r2=1445008&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java
(original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventSpans.java
Tue Feb 12 00:52:23 2013
@@ -20,18 +20,14 @@ package org.apache.ctakes.temporal.eval;
 
 import java.io.File;
 import java.util.Collection;
-import java.util.EnumSet;
 import java.util.List;
 import java.util.logging.Level;
 
 import org.apache.ctakes.temporal.ae.EventAnnotator;
 import org.apache.ctakes.temporal.ae.feature.selection.FeatureSelection;
-import org.apache.ctakes.typesystem.type.textsem.EntityMention;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
-import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.TOP;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.cleartk.classifier.Instance;
@@ -40,9 +36,6 @@ import org.cleartk.classifier.feature.tr
 import org.cleartk.classifier.jar.JarClassifierBuilder;
 import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
 import org.cleartk.eval.AnnotationStatistics;
-import org.uimafit.factory.AggregateBuilder;
-import org.uimafit.pipeline.SimplePipeline;
-import org.uimafit.util.JCasUtil;
 
 import com.lexicalscope.jewel.cli.CliFactory;
 import com.lexicalscope.jewel.cli.Option;
@@ -56,7 +49,7 @@ public class EvaluationOfEventSpans exte
 
     @Option(longName = "featureSelectionThreshold", defaultValue = "0")
     public float getFeatureSelectionThreshold();
-    
+
     @Option(longName = "SMOTENeighborNumber", defaultValue = "1")
     public float getSMOTENeighborNumber();
   }
@@ -70,6 +63,7 @@ public class EvaluationOfEventSpans exte
         new File("target/eval/event-spans"),
         options.getRawTextDirectory(),
         options.getKnowtatorXMLDirectory(),
+        options.getXMIDirectory(),
         options.getProbabilityOfKeepingANegativeExample(),
         options.getFeatureSelectionThreshold(),
         options.getSMOTENeighborNumber());
@@ -81,22 +75,18 @@ public class EvaluationOfEventSpans exte
   private float probabilityOfKeepingANegativeExample;
 
   private float featureSelectionThreshold;
-  
+
   private float smoteNeighborNumber;
 
   public EvaluationOfEventSpans(
       File baseDirectory,
       File rawTextDirectory,
       File knowtatorXMLDirectory,
+      File xmiDirectory,
       float probabilityOfKeepingANegativeExample,
-      float featureSelectionThreshold, float numOfSmoteNeighbors) {
-    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, EnumSet.of(
-        AnnotatorType.PART_OF_SPEECH_TAGS,
-        AnnotatorType.CHUNKS,
-        AnnotatorType.UMLS_NAMED_ENTITIES,
-        AnnotatorType.DEPENDENCIES,
-        AnnotatorType.SEMANTIC_ROLES));
-        //AnnotatorType.LEXICAL_VARIANTS,
+      float featureSelectionThreshold,
+      float numOfSmoteNeighbors) {
+    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory, EventMention.class);
     this.probabilityOfKeepingANegativeExample = probabilityOfKeepingANegativeExample;
     this.featureSelectionThreshold = featureSelectionThreshold;
     this.smoteNeighborNumber = numOfSmoteNeighbors;
@@ -117,12 +107,7 @@ public class EvaluationOfEventSpans exte
   }
 
   @Override
-  protected void train(CollectionReader collectionReader, File directory) throws Exception
{
-    AggregateBuilder aggregateBuilder = new AggregateBuilder();
-    aggregateBuilder.add(this.getPreprocessorTrainDescription());
-    aggregateBuilder.add(this.getDataWriterDescription(directory));
-    SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
-
+  protected void trainAndPackage(File directory) throws Exception {
     if (this.featureSelectionThreshold > 0) {
       // Extracting features and writing instances
       Iterable<Instance<String>> instances = InstanceStream.loadFromDirectory(directory);
@@ -138,22 +123,10 @@ public class EvaluationOfEventSpans exte
       dataWriter.finish();
     }
 
-    this.trainAndPackage(directory);
-  }
-
-  @Override
-  protected void trainAndPackage(File directory) throws Exception {
     JarClassifierBuilder.trainAndPackage(directory, "-c", "10000");
   }
 
   @Override
-  protected List<Class<? extends TOP>> getAnnotationClassesThatShouldBeGoldAtTestTime()
{
-    List<Class<? extends TOP>> result = super.getAnnotationClassesThatShouldBeGoldAtTestTime();
-    result.add(EntityMention.class);
-    return result;
-  }
-
-  @Override
   protected AnalysisEngineDescription getAnnotatorDescription(File directory)
       throws ResourceInitializationException {
     return EventAnnotator.createAnnotatorDescription(directory);
@@ -161,11 +134,11 @@ public class EvaluationOfEventSpans exte
 
   @Override
   protected Collection<? extends Annotation> getGoldAnnotations(JCas jCas) {
-    return JCasUtil.select(jCas, EventMention.class);
+    return selectExact(jCas, EventMention.class);
   }
 
   @Override
   protected Collection<? extends Annotation> getSystemAnnotations(JCas jCas) {
-    return JCasUtil.select(jCas, EventMention.class);
+    return selectExact(jCas, EventMention.class);
   }
 }

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java?rev=1445008&r1=1445007&r2=1445008&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
(original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTemporalRelations.java
Tue Feb 12 00:52:23 2013
@@ -20,7 +20,6 @@ package org.apache.ctakes.temporal.eval;
 
 import java.io.File;
 import java.util.Collection;
-import java.util.EnumSet;
 import java.util.List;
 import java.util.Map;
 
@@ -36,7 +35,6 @@ import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.TOP;
 import org.cleartk.classifier.jar.JarClassifierBuilder;
 import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
 import org.cleartk.eval.AnnotationStatistics;
@@ -64,7 +62,8 @@ public class EvaluationOfTemporalRelatio
     EvaluationOfTemporalRelations evaluation = new EvaluationOfTemporalRelations(
         new File("target/eval/temporal-relations"),
         options.getRawTextDirectory(),
-        options.getKnowtatorXMLDirectory());
+        options.getKnowtatorXMLDirectory(),
+        options.getXMIDirectory());
     AnnotationStatistics<String> stats = evaluation.trainAndTest(trainItems, devItems);
     System.err.println(stats);
   }
@@ -72,26 +71,15 @@ public class EvaluationOfTemporalRelatio
   public EvaluationOfTemporalRelations(
       File baseDirectory,
       File rawTextDirectory,
-      File knowtatorXMLDirectory) {
-    super(
-        baseDirectory,
-        rawTextDirectory,
-        knowtatorXMLDirectory,
-        EnumSet.of(AnnotatorType.PART_OF_SPEECH_TAGS));
-  }
-
-  @Override
-  protected List<Class<? extends TOP>> getAnnotationClassesThatShouldBeGoldAtTestTime()
{
-    List<Class<? extends TOP>> result = super.getAnnotationClassesThatShouldBeGoldAtTestTime();
-    result.add(EventMention.class);
-    result.add(TimeMention.class);
-    return result;
+      File knowtatorXMLDirectory,
+      File xmiDirectory) {
+    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory);
   }
 
   @Override
   protected void train(CollectionReader collectionReader, File directory) throws Exception
{
-    AggregateBuilder aggregateBuilder = new AggregateBuilder();
-    aggregateBuilder.add(this.getPreprocessorTrainDescription());
+    AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+    aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class, TimeMention.class,
BinaryTextRelation.class));
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveNonTLINKRelations.class));
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveCrossSentenceRelations.class));
     aggregateBuilder.add(EventTimeRelationAnnotator.createDataWriterDescription(
@@ -105,8 +93,8 @@ public class EvaluationOfTemporalRelatio
   @Override
   protected AnnotationStatistics<String> test(CollectionReader collectionReader, File
directory)
       throws Exception {
-    AggregateBuilder aggregateBuilder = new AggregateBuilder();
-    aggregateBuilder.add(this.getPreprocessorTestDescription());
+    AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder();
+    aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class, TimeMention.class));
     aggregateBuilder.add(
         AnalysisEngineFactory.createPrimitiveDescription(RemoveNonTLINKRelations.class),
         CAS.NAME_DEFAULT_SOFA,

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java?rev=1445008&r1=1445007&r2=1445008&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
(original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfTimeSpans.java
Tue Feb 12 00:52:23 2013
@@ -20,7 +20,6 @@ package org.apache.ctakes.temporal.eval;
 
 import java.io.File;
 import java.util.Collection;
-import java.util.EnumSet;
 import java.util.List;
 import java.util.logging.Level;
 
@@ -33,7 +32,6 @@ import org.apache.uima.resource.Resource
 import org.cleartk.classifier.jar.JarClassifierBuilder;
 import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
 import org.cleartk.eval.AnnotationStatistics;
-import org.uimafit.util.JCasUtil;
 
 import com.lexicalscope.jewel.cli.CliFactory;
 
@@ -47,7 +45,8 @@ public class EvaluationOfTimeSpans exten
     EvaluationOfTimeSpans evaluation = new EvaluationOfTimeSpans(
         new File("target/eval/time-spans"),
         options.getRawTextDirectory(),
-        options.getKnowtatorXMLDirectory());
+        options.getKnowtatorXMLDirectory(),
+        options.getXMIDirectory());
     evaluation.setLogging(Level.FINE, new File("target/eval/ctakes-time-errors.log"));
     AnnotationStatistics<String> stats = evaluation.trainAndTest(trainItems, devItems);
     System.err.println(stats);
@@ -56,12 +55,9 @@ public class EvaluationOfTimeSpans exten
   public EvaluationOfTimeSpans(
       File baseDirectory,
       File rawTextDirectory,
-      File knowtatorXMLDirectory) {
-    super(
-        baseDirectory,
-        rawTextDirectory,
-        knowtatorXMLDirectory,
-        EnumSet.of(AnnotatorType.PART_OF_SPEECH_TAGS));
+      File knowtatorXMLDirectory,
+      File xmiDirectory) {
+    super(baseDirectory, rawTextDirectory, knowtatorXMLDirectory, xmiDirectory, TimeMention.class);
   }
 
   @Override
@@ -83,11 +79,11 @@ public class EvaluationOfTimeSpans exten
 
   @Override
   protected Collection<? extends Annotation> getGoldAnnotations(JCas jCas) {
-    return JCasUtil.select(jCas, TimeMention.class);
+    return selectExact(jCas, TimeMention.class);
   }
 
   @Override
   protected Collection<? extends Annotation> getSystemAnnotations(JCas jCas) {
-    return JCasUtil.select(jCas, TimeMention.class);
+    return selectExact(jCas, TimeMention.class);
   }
 }

Modified: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java?rev=1445008&r1=1445007&r2=1445008&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
(original)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
Tue Feb 12 00:52:23 2013
@@ -19,9 +19,11 @@
 package org.apache.ctakes.temporal.eval;
 
 import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -47,21 +49,34 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
 import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
 import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.impl.XmiCasDeserializer;
+import org.apache.uima.cas.impl.XmiCasSerializer;
 import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.TOP;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.CasCopier;
+import org.apache.uima.util.XMLSerializer;
+import org.cleartk.util.ViewURIUtil;
 import org.cleartk.util.ae.UriToDocumentTextAnnotator;
 import org.cleartk.util.cr.UriCollectionReader;
 import org.uimafit.component.JCasAnnotator_ImplBase;
 import org.uimafit.component.ViewCreatorAnnotator;
 import org.uimafit.component.ViewTextCopierAnnotator;
+import org.uimafit.descriptor.ConfigurationParameter;
 import org.uimafit.factory.AggregateBuilder;
 import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.ExternalResourceFactory;
 import org.uimafit.util.JCasUtil;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
 
 import com.google.common.collect.Lists;
 import com.lexicalscope.jewel.cli.Option;
@@ -69,11 +84,7 @@ import com.lexicalscope.jewel.cli.Option
 public abstract class Evaluation_ImplBase<STATISTICS_TYPE> extends
     org.cleartk.eval.Evaluation_ImplBase<Integer, STATISTICS_TYPE> {
 
-  public enum AnnotatorType {
-    PART_OF_SPEECH_TAGS, UMLS_NAMED_ENTITIES, LEXICAL_VARIANTS, CHUNKS, DEPENDENCIES, SEMANTIC_ROLES
-  }
-
-  protected final String GOLD_VIEW_NAME = "GoldView";
+  public static final String GOLD_VIEW_NAME = "GoldView";
 
   static interface Options {
 
@@ -83,6 +94,9 @@ public abstract class Evaluation_ImplBas
     @Option(longName = "xml")
     public File getKnowtatorXMLDirectory();
 
+    @Option(longName = "xmi")
+    public File getXMIDirectory();
+
     @Option(longName = "patients")
     public CommandLine.IntegerRanges getPatients();
   }
@@ -90,18 +104,21 @@ public abstract class Evaluation_ImplBas
   protected File rawTextDirectory;
 
   protected File knowtatorXMLDirectory;
-
-  private Set<AnnotatorType> annotatorFlags;
+  
+  protected File xmiDirectory;
+  
+  private boolean xmiExists;
 
   public Evaluation_ImplBase(
       File baseDirectory,
       File rawTextDirectory,
       File knowtatorXMLDirectory,
-      Set<AnnotatorType> annotatorFlags) {
+      File xmiDirectory) {
     super(baseDirectory);
     this.rawTextDirectory = rawTextDirectory;
     this.knowtatorXMLDirectory = knowtatorXMLDirectory;
-    this.annotatorFlags = annotatorFlags;
+    this.xmiDirectory = xmiDirectory;
+    this.xmiExists = this.xmiDirectory.exists();
   }
 
   @Override
@@ -116,69 +133,52 @@ public abstract class Evaluation_ImplBas
     return UriCollectionReader.getCollectionReaderFromFiles(files);
   }
 
-  protected AnalysisEngineDescription getPreprocessorTrainDescription() throws Exception
{
-    return this.getPreprocessorDescription(PipelineType.TRAIN);
-  }
-
-  protected AnalysisEngineDescription getPreprocessorTestDescription() throws Exception {
-    return this.getPreprocessorDescription(PipelineType.TEST);
-  }
-
-  protected List<Class<? extends TOP>> getAnnotationClassesThatShouldBeGoldAtTestTime()
{
-    return new ArrayList<Class<? extends TOP>>();
-  }
-
-  private static enum PipelineType {
-    TRAIN, TEST
-  }
-
-  private AnalysisEngineDescription getPreprocessorDescription(PipelineType pipelineType)
+  protected AggregateBuilder getPreprocessorAggregateBuilder()
       throws Exception {
     AggregateBuilder aggregateBuilder = new AggregateBuilder();
     aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription());
-    switch (pipelineType) {
-      case TRAIN:
-        aggregateBuilder.add(THYMEKnowtatorXMLReader.getDescription(this.knowtatorXMLDirectory));
-        break;
-      case TEST:
-        aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-            ViewCreatorAnnotator.class,
-            ViewCreatorAnnotator.PARAM_VIEW_NAME,
-            GOLD_VIEW_NAME));
-        aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-            ViewTextCopierAnnotator.class,
-            ViewTextCopierAnnotator.PARAM_SOURCE_VIEW_NAME,
-            CAS.NAME_DEFAULT_SOFA,
-            ViewTextCopierAnnotator.PARAM_DESTINATION_VIEW_NAME,
-            GOLD_VIEW_NAME));
-        aggregateBuilder.add(
-            THYMEKnowtatorXMLReader.getDescription(this.knowtatorXMLDirectory),
-            CAS.NAME_DEFAULT_SOFA,
-            GOLD_VIEW_NAME);
-        for (Class<? extends TOP> annotationClass : this.getAnnotationClassesThatShouldBeGoldAtTestTime())
{
-          aggregateBuilder.add(AnnotationCopier.getDescription(
-              GOLD_VIEW_NAME,
-              CAS.NAME_DEFAULT_SOFA,
-              annotationClass));
-        }
-        break;
-    }
-    // identify segments
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
-    // identify sentences
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        SentenceDetector.class,
-        "MaxentModel",
-        ExternalResourceFactory.createExternalResourceDescription(
-            SuffixMaxentModelResourceImpl.class,
-            SentenceDetector.class.getResource("../sentdetect/sdmed.mod"))));
-    // identify tokens
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
-    // merge some tokens
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ContextDependentTokenizerAnnotator.class));
 
-    // identify part-of-speech tags if requested
-    if (this.annotatorFlags.contains(AnnotatorType.PART_OF_SPEECH_TAGS)) {
+    if (this.xmiExists) {
+      
+      // read the XMI from the directory
+      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+          XMIReader.class,
+          XMIReader.PARAM_XMI_DIRECTORY,
+          this.xmiDirectory));
+
+    } else {
+
+      // read manual annotations into gold view
+      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+          ViewCreatorAnnotator.class,
+          ViewCreatorAnnotator.PARAM_VIEW_NAME,
+          GOLD_VIEW_NAME));
+      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+          ViewTextCopierAnnotator.class,
+          ViewTextCopierAnnotator.PARAM_SOURCE_VIEW_NAME,
+          CAS.NAME_DEFAULT_SOFA,
+          ViewTextCopierAnnotator.PARAM_DESTINATION_VIEW_NAME,
+          GOLD_VIEW_NAME));
+      aggregateBuilder.add(
+          THYMEKnowtatorXMLReader.getDescription(this.knowtatorXMLDirectory),
+          CAS.NAME_DEFAULT_SOFA,
+          GOLD_VIEW_NAME);
+
+      // identify segments
+      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
+      // identify sentences
+      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+          SentenceDetector.class,
+          "MaxentModel",
+          ExternalResourceFactory.createExternalResourceDescription(
+              SuffixMaxentModelResourceImpl.class,
+              SentenceDetector.class.getResource("../sentdetect/sdmed.mod"))));
+      // identify tokens
+      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
+      // merge some tokens
+      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ContextDependentTokenizerAnnotator.class));
+  
+      // identify part-of-speech tags
       aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
           POSTagger.class,
           POSTagger.POS_MODEL_FILE_PARAM,
@@ -187,9 +187,7 @@ public abstract class Evaluation_ImplBas
           "org/apache/ctakes/postagger/models/tag.dictionary.txt",
           POSTagger.CASE_SENSITIVE_PARAM,
           true));
-    }
-    
-    if (this.annotatorFlags.contains(AnnotatorType.CHUNKS)) {
+      
       // identify chunks
       aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
           Chunker.class,
@@ -197,12 +195,9 @@ public abstract class Evaluation_ImplBas
           Chunker.class.getResource("../models/chunk-model.claims-1.5.zip").toURI().getPath(),
           Chunker.CHUNKER_CREATOR_CLASS_PARAM,
           DefaultChunkCreator.class));
-    }
+  
+      // identify UMLS named entities
 
-    // identify UMLS named entities if requested
-    if (this.annotatorFlags.contains(AnnotatorType.UMLS_NAMED_ENTITIES)) {
-      // remove gold mentions if they're there (we'll add cTAKES mentions later instead)
-      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(EntityMentionRemover.class));
       // adjust NP in NP NP to span both
       aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
           ChunkAdjuster.class,
@@ -269,10 +264,8 @@ public abstract class Evaluation_ImplBas
               true,
               "IndexDirectory",
               new File("target/unpacked/org/apache/ctakes/dictionary/lookup/OrangeBook").getAbsoluteFile())));
-    }
-
-    // add lvg annotator
-    if (this.annotatorFlags.contains(AnnotatorType.LEXICAL_VARIANTS)) {
+  
+      // add lvg annotator
       String[] XeroxTreebankMap = {
           "adj|JJ",
           "adv|RB",
@@ -331,20 +324,32 @@ public abstract class Evaluation_ImplBas
           "LvgCmdApi",
           ExternalResourceFactory.createExternalResourceDescription(
               LvgCmdApiResourceImpl.class,
-              "org/apache/ctakes/lvg/data/config/lvg.properties"));
+              new File(LvgCmdApiResourceImpl.class.getResource("/org/apache/ctakes/lvg/data/config/lvg.properties").toURI())));
       aggregateBuilder.add(lvgAnnotator);
-    }
-
-    // add dependency parser
-    if (this.annotatorFlags.contains(AnnotatorType.DEPENDENCIES)) {
+  
+      // add dependency parser
       aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearParserDependencyParserAE.class));
-    }
-
-    // add semantic role labeler
-    if (this.annotatorFlags.contains(AnnotatorType.SEMANTIC_ROLES)) {
+  
+      // add semantic role labeler
       aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearParserSemanticRoleLabelerAE.class));
+      
+      // write out the CAS after all the above annotations 
+      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+          XMIWriter.class,
+          XMIWriter.PARAM_XMI_DIRECTORY,
+          this.xmiDirectory));
     }
-    return aggregateBuilder.createAggregateDescription();
+    return aggregateBuilder;
+  }
+  
+  public static <T extends TOP> List<T> selectExact(JCas jCas, Class<T>
annotationClass) {
+    List<T> annotations = Lists.newArrayList();
+    for (T annotation : JCasUtil.select(jCas, annotationClass)) {
+      if (annotation.getClass().equals(annotationClass)) {
+        annotations.add(annotation);
+      }
+    }
+    return annotations;
   }
 
   public static class CopyNPChunksToLookupWindowAnnotations extends JCasAnnotator_ImplBase
{
@@ -385,4 +390,100 @@ public abstract class Evaluation_ImplBas
       }
     }
   }
+  
+  public static class XMIWriter extends JCasAnnotator_ImplBase {
+    
+    public static final String PARAM_XMI_DIRECTORY = "XMIDirectory";
+    @ConfigurationParameter(name = PARAM_XMI_DIRECTORY, mandatory = true)
+    private File xmiDirectory;
+
+    @Override
+    public void initialize(UimaContext context) throws ResourceInitializationException {
+      super.initialize(context);
+      if (!this.xmiDirectory.exists()) {
+        this.xmiDirectory.mkdirs();
+      }
+    }
+
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+      String fileName = new File(ViewURIUtil.getURI(jCas).getPath()).getName();
+      File xmiFile = new File(this.xmiDirectory, fileName + ".xmi");
+      try {
+        FileOutputStream outputStream = new FileOutputStream(xmiFile);
+        try {
+          XmiCasSerializer serializer = new XmiCasSerializer(jCas.getTypeSystem());
+          ContentHandler handler = new XMLSerializer(outputStream, false).getContentHandler();
+          serializer.serialize(jCas.getCas(), handler);
+        } finally {
+          outputStream.close();
+        }
+      } catch (SAXException e) {
+        throw new AnalysisEngineProcessException(e);
+      } catch (IOException e) {
+        throw new AnalysisEngineProcessException(e);
+      }
+    }
+  }
+
+  public static class XMIReader extends JCasAnnotator_ImplBase {
+    
+    public static final String PARAM_XMI_DIRECTORY = "XMIDirectory";
+    @ConfigurationParameter(name = PARAM_XMI_DIRECTORY, mandatory = true)
+    private File xmiDirectory;
+
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+      String fileName = new File(ViewURIUtil.getURI(jCas).getPath()).getName();
+      File xmiFile = new File(this.xmiDirectory, fileName + ".xmi");
+      try {
+        FileInputStream inputStream = new FileInputStream(xmiFile);
+        try {
+          XmiCasDeserializer.deserialize(inputStream, jCas.getCas());
+        } finally {
+          inputStream.close();
+        }
+      } catch (SAXException e) {
+        throw new AnalysisEngineProcessException(e);
+      } catch (IOException e) {
+        throw new AnalysisEngineProcessException(e);
+      }
+    }
+  }
+  
+  public static class CopyFromGold extends JCasAnnotator_ImplBase {
+    
+    public static AnalysisEngineDescription getDescription(Class<?> ... classes) throws
ResourceInitializationException {
+      return AnalysisEngineFactory.createPrimitiveDescription(
+          CopyFromGold.class,
+          CopyFromGold.PARAM_ANNOTATION_CLASSES,
+          classes);
+    }
+    
+    public static final String PARAM_ANNOTATION_CLASSES = "AnnotationClasses";
+    @ConfigurationParameter(name = PARAM_ANNOTATION_CLASSES, mandatory = true)
+    private Class<? extends TOP>[] annotationClasses;
+    
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+      JCas goldView, systemView;
+      try {
+        goldView = jCas.getView(GOLD_VIEW_NAME);
+        systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+      } catch (CASException e) {
+        throw new AnalysisEngineProcessException(e);
+      }
+      CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
+      Feature sofaFeature = jCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);
+      for (Class<? extends TOP> annotationClass : this.annotationClasses) {
+        for (TOP annotation : JCasUtil.select(goldView, annotationClass)) {
+          TOP copy = (TOP) copier.copyFs(annotation);
+          if (copy instanceof Annotation) {
+            copy.setFeatureValue(sofaFeature, systemView.getSofa());
+          }
+          copy.addToIndexes(systemView);
+        }
+      }
+    }
+  }
 }



Mime
View raw message