ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From c...@apache.org
Subject svn commit: r1614944 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
Date Thu, 31 Jul 2014 17:57:40 GMT
Author: clin
Date: Thu Jul 31 17:57:39 2014
New Revision: 1614944

URL: http://svn.apache.org/r1614944
Log:
enable i2b2 xml writer to handle null arg2

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java?rev=1614944&r1=1614943&r2=1614944&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java Thu Jul 31 17:57:39 2014
@@ -120,348 +120,348 @@ import com.google.common.collect.Lists;
 import com.lexicalscope.jewel.cli.Option;
 
 public abstract class Evaluation_ImplBase<STATISTICS_TYPE> extends
-    org.cleartk.eval.Evaluation_ImplBase<Integer, STATISTICS_TYPE> {
+org.cleartk.eval.Evaluation_ImplBase<Integer, STATISTICS_TYPE> {
 
-  private static Logger LOGGER = Logger.getLogger(Evaluation_ImplBase.class);
+	private static Logger LOGGER = Logger.getLogger(Evaluation_ImplBase.class);
 
-  public static final String GOLD_VIEW_NAME = "GoldView";
-  
-  enum XMLFormat { Knowtator, Anafora, I2B2 }
-
-  static interface Options {
-
-    @Option(longName = "text", defaultToNull = true)
-    public File getRawTextDirectory();
-
-    @Option(longName = "xml")
-    public File getXMLDirectory();
-
-    @Option(longName = "format", defaultValue="Anafora")
-    public XMLFormat getXMLFormat();
-
-    @Option(longName = "xmi")
-    public File getXMIDirectory();
-
-    @Option(longName = "patients")
-    public CommandLine.IntegerRanges getPatients();
-    
-    @Option(longName = "treebank", defaultToNull=true)
-    public File getTreebankDirectory();
-    
-    @Option(longName = "coreference", defaultToNull=true)
-    public File getCoreferenceDirectory();
-    
-    @Option
-    public boolean getUseGoldTrees();
-    
-    @Option
-    public boolean getGrid();
-    
-    @Option
-    public boolean getPrintErrors();
-    
-    @Option
-    public boolean getPrintOverlappingSpans();
-    
-    @Option
-    public boolean getTest();
-
-    @Option(longName = "kernelParams", defaultToNull=true)
-    public String getKernelParams();
-    
-    @Option(defaultToNull=true)
-    public String getI2B2Output();
-  }
-
-  protected File rawTextDirectory;
-
-  protected File xmlDirectory;
-  
-  protected XMLFormat xmlFormat;
-
-  protected File xmiDirectory;
-
-  private boolean xmiExists;
-
-  protected File treebankDirectory;
-  
-  protected File coreferenceDirectory;
-  
-  protected boolean printErrors = false;
-  
-  protected boolean printOverlapping = false;
-  
-  protected String i2b2Output = null;
-  
-  protected String[] kernelParams;
-  
-  public Evaluation_ImplBase(
-      File baseDirectory,
-      File rawTextDirectory,
-      File xmlDirectory,
-      XMLFormat xmlFormat,
-      File xmiDirectory,
-      File treebankDirectory,
-      File coreferenceDirectory) {
-    super(baseDirectory);
-    this.rawTextDirectory = rawTextDirectory;
-    this.xmlDirectory = xmlDirectory;
-    this.xmlFormat = xmlFormat;
-    this.xmiDirectory = xmiDirectory;
-    this.xmiExists = this.xmiDirectory.exists() && this.xmiDirectory.listFiles().length > 0;
-    this.treebankDirectory = treebankDirectory;
-    this.coreferenceDirectory = coreferenceDirectory;
-  }
-
-  public Evaluation_ImplBase(
-      File baseDirectory,
-      File rawTextDirectory,
-      File xmlDirectory,
-      XMLFormat xmlFormat,
-      File xmiDirectory,
-      File treebankDirectory) {
-    this(baseDirectory, rawTextDirectory, xmlDirectory, xmlFormat,
-        xmiDirectory, treebankDirectory, null);
-  }
-
-  public void setI2B2Output(String outDir){
-    i2b2Output = outDir;
-  }
-  
-  public void prepareXMIsFor(List<Integer> patientSets) throws Exception {
-    boolean needsXMIs = false;
-    for (File textFile : this.getFilesFor(patientSets)) {
-      if (!getXMIFile(this.xmiDirectory, textFile).exists()) {
-        needsXMIs = true;
-        break;
-      }
-    }
-    if (needsXMIs) {
-      CollectionReader reader = this.getCollectionReader(patientSets);
-      AnalysisEngine engine = this.getXMIWritingPreprocessorAggregateBuilder().createAggregate();
-      SimplePipeline.runPipeline(reader, engine);
-    }
-    this.xmiExists = true;
-  }
-  
-  private List<File> getFilesFor(List<Integer> patientSets) throws FileNotFoundException {
-	  List<File> files = new ArrayList<File>();
+	public static final String GOLD_VIEW_NAME = "GoldView";
+
+	enum XMLFormat { Knowtator, Anafora, I2B2 }
+
+	static interface Options {
+
+		@Option(longName = "text", defaultToNull = true)
+		public File getRawTextDirectory();
+
+		@Option(longName = "xml")
+		public File getXMLDirectory();
+
+		@Option(longName = "format", defaultValue="Anafora")
+		public XMLFormat getXMLFormat();
+
+		@Option(longName = "xmi")
+		public File getXMIDirectory();
+
+		@Option(longName = "patients")
+		public CommandLine.IntegerRanges getPatients();
+
+		@Option(longName = "treebank", defaultToNull=true)
+		public File getTreebankDirectory();
+
+		@Option(longName = "coreference", defaultToNull=true)
+		public File getCoreferenceDirectory();
+
+		@Option
+		public boolean getUseGoldTrees();
+
+		@Option
+		public boolean getGrid();
+
+		@Option
+		public boolean getPrintErrors();
+
+		@Option
+		public boolean getPrintOverlappingSpans();
+
+		@Option
+		public boolean getTest();
+
+		@Option(longName = "kernelParams", defaultToNull=true)
+		public String getKernelParams();
+
+		@Option(defaultToNull=true)
+		public String getI2B2Output();
+	}
+
+	protected File rawTextDirectory;
+
+	protected File xmlDirectory;
+
+	protected XMLFormat xmlFormat;
+
+	protected File xmiDirectory;
+
+	private boolean xmiExists;
+
+	protected File treebankDirectory;
+
+	protected File coreferenceDirectory;
+
+	protected boolean printErrors = false;
+
+	protected boolean printOverlapping = false;
+
+	protected String i2b2Output = null;
+
+	protected String[] kernelParams;
+
+	public Evaluation_ImplBase(
+			File baseDirectory,
+			File rawTextDirectory,
+			File xmlDirectory,
+			XMLFormat xmlFormat,
+			File xmiDirectory,
+			File treebankDirectory,
+			File coreferenceDirectory) {
+		super(baseDirectory);
+		this.rawTextDirectory = rawTextDirectory;
+		this.xmlDirectory = xmlDirectory;
+		this.xmlFormat = xmlFormat;
+		this.xmiDirectory = xmiDirectory;
+		this.xmiExists = this.xmiDirectory.exists() && this.xmiDirectory.listFiles().length > 0;
+		this.treebankDirectory = treebankDirectory;
+		this.coreferenceDirectory = coreferenceDirectory;
+	}
+
+	public Evaluation_ImplBase(
+			File baseDirectory,
+			File rawTextDirectory,
+			File xmlDirectory,
+			XMLFormat xmlFormat,
+			File xmiDirectory,
+			File treebankDirectory) {
+		this(baseDirectory, rawTextDirectory, xmlDirectory, xmlFormat,
+				xmiDirectory, treebankDirectory, null);
+	}
+
+	public void setI2B2Output(String outDir){
+		i2b2Output = outDir;
+	}
+
+	public void prepareXMIsFor(List<Integer> patientSets) throws Exception {
+		boolean needsXMIs = false;
+		for (File textFile : this.getFilesFor(patientSets)) {
+			if (!getXMIFile(this.xmiDirectory, textFile).exists()) {
+				needsXMIs = true;
+				break;
+			}
+		}
+		if (needsXMIs) {
+			CollectionReader reader = this.getCollectionReader(patientSets);
+			AnalysisEngine engine = this.getXMIWritingPreprocessorAggregateBuilder().createAggregate();
+			SimplePipeline.runPipeline(reader, engine);
+		}
+		this.xmiExists = true;
+	}
+
+	private List<File> getFilesFor(List<Integer> patientSets) throws FileNotFoundException {
+		List<File> files = new ArrayList<File>();
 		if (this.rawTextDirectory == null
-		    && this.xmlFormat == XMLFormat.Anafora) {
-		  for (File dir : this.xmlDirectory.listFiles()) {
-		    Set<String> ids = new HashSet<String>();
-		    for (Integer set : patientSets) {
-		      ids.add(String.format("ID%03d", set));
-		    }
-		    if (dir.isDirectory()) {
-		      if (ids.contains(dir.getName().substring(0, 5))) {
-		        File file = new File(dir, dir.getName());
-		        if (file.exists()) {
-		          files.add(file);
-		        } else {
-		          LOGGER.warn("Missing note: " + file);
-		        }
-		      } else {
-		        LOGGER.info("Skipping note: " + dir);
-		      }
-		    }
-		  }
+				&& this.xmlFormat == XMLFormat.Anafora) {
+			for (File dir : this.xmlDirectory.listFiles()) {
+				Set<String> ids = new HashSet<String>();
+				for (Integer set : patientSets) {
+					ids.add(String.format("ID%03d", set));
+				}
+				if (dir.isDirectory()) {
+					if (ids.contains(dir.getName().substring(0, 5))) {
+						File file = new File(dir, dir.getName());
+						if (file.exists()) {
+							files.add(file);
+						} else {
+							LOGGER.warn("Missing note: " + file);
+						}
+					} else {
+						LOGGER.info("Skipping note: " + dir);
+					}
+				}
+			}
 		} else if(this.xmlFormat == XMLFormat.I2B2) {
-		  File trainDir = new File(this.xmlDirectory, "training");
-		  File testDir = new File(this.xmlDirectory, "test");
-		  for (Integer pt : patientSets){
-		    File xmlTrain = new File(trainDir, pt+".xml");
-		    File train = new File(trainDir, pt+".xml.txt");
-		    if(train.exists()){
-		      if(xmlTrain.exists()){
-		        files.add(train);
-		      }else{
-		        System.err.println("Text file in training has no corresponding xml -- skipping: " + train);
-		      }
-		    }
-		    File xmlTest = new File(testDir, pt+".xml");
-		    File test = new File(testDir, pt+".xml.txt");
-		    if(xmlTest.exists()){
-		      if(test.exists()){
-		        files.add(test);
-		      }else{
-		        throw new FileNotFoundException("Could not find the test text file -- for cTAKES usage you must copy the text files into the xml directory for the test set.");
-		      }
-		    }
-		    assert !(train.exists() && test.exists());
-		  }
+			File trainDir = new File(this.xmlDirectory, "training");
+			File testDir = new File(this.xmlDirectory, "test");
+			for (Integer pt : patientSets){
+				File xmlTrain = new File(trainDir, pt+".xml");
+				File train = new File(trainDir, pt+".xml.txt");
+				if(train.exists()){
+					if(xmlTrain.exists()){
+						files.add(train);
+					}else{
+						System.err.println("Text file in training has no corresponding xml -- skipping: " + train);
+					}
+				}
+				File xmlTest = new File(testDir, pt+".xml");
+				File test = new File(testDir, pt+".xml.txt");
+				if(xmlTest.exists()){
+					if(test.exists()){
+						files.add(test);
+					}else{
+						throw new FileNotFoundException("Could not find the test text file -- for cTAKES usage you must copy the text files into the xml directory for the test set.");
+					}
+				}
+				assert !(train.exists() && test.exists());
+			}
 		}	else  {
-		  for (Integer set : patientSets) {
-		    final int setNum = set;
-		    for (File file : rawTextDirectory.listFiles(new FilenameFilter(){
-		      @Override
-		      public boolean accept(File dir, String name) {
-		        return name.contains(String.format("ID%03d", setNum));
-		      }})) {
-		      // skip hidden files like .svn
-		      if (!file.isHidden()) {
-		        if(xmlFormat == XMLFormat.Knowtator){
-		          files.add(file);
-		        }else{
-		          // look for equivalent in xml directory:
-		          File xmlFile = new File(xmlDirectory, file.getName());
-              if(xmlFile.exists()){
-                if(coreferenceDirectory != null){
-                  // verify that coref version of xml exists
-                  File corefFile = new File(coreferenceDirectory, file.getName()+".Coreference.gold.completed.xml");
-                  if(corefFile.exists() && xmlFile.exists()){
-                    files.add(file);
-                  }else{
-                    System.err.println("Missing coref patient file : " + corefFile);
-                  }
-                }else{
-                  files.add(file);
-                }
-              }else{
-                System.err.println("Missing patient file : " + xmlFile);
-              }
-		        }
-		      } 
-		    }
-		  }
-		}
-    return files;
-  }
-
-  @Override
-  protected CollectionReader getCollectionReader(List<Integer> patientSets) throws Exception {
-    return UriCollectionReader.getCollectionReaderFromFiles(this.getFilesFor(patientSets));
-  }
-
-  protected AggregateBuilder getPreprocessorAggregateBuilder() throws Exception {
-    return this.xmiExists
-        ? this.getXMIReadingPreprocessorAggregateBuilder()
-        : this.getXMIWritingPreprocessorAggregateBuilder();
-  }
-
-  protected AggregateBuilder getXMIReadingPreprocessorAggregateBuilder() throws UIMAException {
-    AggregateBuilder aggregateBuilder = new AggregateBuilder();
-    aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription());
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        XMIReader.class,
-        XMIReader.PARAM_XMI_DIRECTORY,
-        this.xmiDirectory));
-    return aggregateBuilder;
-  }
-
-  protected AggregateBuilder getXMIWritingPreprocessorAggregateBuilder()
-      throws Exception {
-    AggregateBuilder aggregateBuilder = new AggregateBuilder();
-    aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription());
-
-    // read manual annotations into gold view
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        ViewCreatorAnnotator.class,
-        ViewCreatorAnnotator.PARAM_VIEW_NAME,
-        GOLD_VIEW_NAME));
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        ViewTextCopierAnnotator.class,
-        ViewTextCopierAnnotator.PARAM_SOURCE_VIEW_NAME,
-        CAS.NAME_DEFAULT_SOFA,
-        ViewTextCopierAnnotator.PARAM_DESTINATION_VIEW_NAME,
-        GOLD_VIEW_NAME));
-    switch (this.xmlFormat) {
-    case Anafora:
-      aggregateBuilder.add(
-          THYMEAnaforaXMLReader.getDescription(this.xmlDirectory),
-          CAS.NAME_DEFAULT_SOFA,
-          GOLD_VIEW_NAME);
-      break;
-    case Knowtator:
-      aggregateBuilder.add(
-          THYMEKnowtatorXMLReader.getDescription(this.xmlDirectory),
-          CAS.NAME_DEFAULT_SOFA,
-          GOLD_VIEW_NAME);
-      break;
-    case I2B2:
-      aggregateBuilder.add(
-          I2B2TemporalXMLReader.getDescription(this.xmlDirectory),
-          CAS.NAME_DEFAULT_SOFA,
-          GOLD_VIEW_NAME);
-      break;
-    }
-
-    if(this.coreferenceDirectory != null){
-      aggregateBuilder.add(
-          THYMEAnaforaXMLReader.getDescription(this.coreferenceDirectory),
-          CAS.NAME_DEFAULT_SOFA,
-          GOLD_VIEW_NAME);
-    }
-    
-    // identify segments
-    if(this.xmlFormat == XMLFormat.I2B2){
-      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
-    }else{
-      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
-    }
-    // identify sentences
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        SentenceDetector.class,
-        SentenceDetector.SD_MODEL_FILE_PARAM,
-        "org/apache/ctakes/core/sentdetect/sd-med-model.zip"));
-    // identify tokens
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
-    // merge some tokens
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ContextDependentTokenizerAnnotator.class));
-
-    // identify part-of-speech tags
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        POSTagger.class,
-        TypeSystemDescriptionFactory.createTypeSystemDescription(),
-        TypePrioritiesFactory.createTypePriorities(Segment.class, Sentence.class, BaseToken.class),
-        POSTagger.POS_MODEL_FILE_PARAM,
-        "org/apache/ctakes/postagger/models/mayo-pos.zip"));
-
-    // identify chunks
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        Chunker.class,
-        Chunker.CHUNKER_MODEL_FILE_PARAM,
-        FileLocator.locateFile("org/apache/ctakes/chunker/models/chunker-model.zip"),
-        Chunker.CHUNKER_CREATOR_CLASS_PARAM,
-        DefaultChunkCreator.class));
-
-    // identify UMLS named entities
-
-    // adjust NP in NP NP to span both
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        1));
-    // adjust NP in NP PP NP to span all three
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "PP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        2));
-    // add lookup windows for each NP
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
-    // maximize lookup windows
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        OverlapAnnotator.class,
-        "A_ObjectClass",
-        LookupWindowAnnotation.class,
-        "B_ObjectClass",
-        LookupWindowAnnotation.class,
-        "OverlapType",
-        "A_ENV_B",
-        "ActionType",
-        "DELETE",
-        "DeleteAction",
-        new String[] { "selector=B" }));
-    // add UMLS on top of lookup windows
-    aggregateBuilder.add(
-    		UmlsDictionaryLookupAnnotator.createAnnotatorDescription()
-    		);
+			for (Integer set : patientSets) {
+				final int setNum = set;
+				for (File file : rawTextDirectory.listFiles(new FilenameFilter(){
+					@Override
+					public boolean accept(File dir, String name) {
+						return name.contains(String.format("ID%03d", setNum));
+					}})) {
+					// skip hidden files like .svn
+					if (!file.isHidden()) {
+						if(xmlFormat == XMLFormat.Knowtator){
+							files.add(file);
+						}else{
+							// look for equivalent in xml directory:
+							File xmlFile = new File(xmlDirectory, file.getName());
+							if(xmlFile.exists()){
+								if(coreferenceDirectory != null){
+									// verify that coref version of xml exists
+									File corefFile = new File(coreferenceDirectory, file.getName()+".Coreference.gold.completed.xml");
+									if(corefFile.exists() && xmlFile.exists()){
+										files.add(file);
+									}else{
+										System.err.println("Missing coref patient file : " + corefFile);
+									}
+								}else{
+									files.add(file);
+								}
+							}else{
+								System.err.println("Missing patient file : " + xmlFile);
+							}
+						}
+					} 
+				}
+			}
+		}
+		return files;
+	}
+
+	@Override
+	protected CollectionReader getCollectionReader(List<Integer> patientSets) throws Exception {
+		return UriCollectionReader.getCollectionReaderFromFiles(this.getFilesFor(patientSets));
+	}
+
+	protected AggregateBuilder getPreprocessorAggregateBuilder() throws Exception {
+		return this.xmiExists
+				? this.getXMIReadingPreprocessorAggregateBuilder()
+						: this.getXMIWritingPreprocessorAggregateBuilder();
+	}
+
+	protected AggregateBuilder getXMIReadingPreprocessorAggregateBuilder() throws UIMAException {
+		AggregateBuilder aggregateBuilder = new AggregateBuilder();
+		aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription());
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+				XMIReader.class,
+				XMIReader.PARAM_XMI_DIRECTORY,
+				this.xmiDirectory));
+		return aggregateBuilder;
+	}
+
+	protected AggregateBuilder getXMIWritingPreprocessorAggregateBuilder()
+			throws Exception {
+		AggregateBuilder aggregateBuilder = new AggregateBuilder();
+		aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription());
+
+		// read manual annotations into gold view
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+				ViewCreatorAnnotator.class,
+				ViewCreatorAnnotator.PARAM_VIEW_NAME,
+				GOLD_VIEW_NAME));
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+				ViewTextCopierAnnotator.class,
+				ViewTextCopierAnnotator.PARAM_SOURCE_VIEW_NAME,
+				CAS.NAME_DEFAULT_SOFA,
+				ViewTextCopierAnnotator.PARAM_DESTINATION_VIEW_NAME,
+				GOLD_VIEW_NAME));
+		switch (this.xmlFormat) {
+		case Anafora:
+			aggregateBuilder.add(
+					THYMEAnaforaXMLReader.getDescription(this.xmlDirectory),
+					CAS.NAME_DEFAULT_SOFA,
+					GOLD_VIEW_NAME);
+			break;
+		case Knowtator:
+			aggregateBuilder.add(
+					THYMEKnowtatorXMLReader.getDescription(this.xmlDirectory),
+					CAS.NAME_DEFAULT_SOFA,
+					GOLD_VIEW_NAME);
+			break;
+		case I2B2:
+			aggregateBuilder.add(
+					I2B2TemporalXMLReader.getDescription(this.xmlDirectory),
+					CAS.NAME_DEFAULT_SOFA,
+					GOLD_VIEW_NAME);
+			break;
+		}
+
+		if(this.coreferenceDirectory != null){
+			aggregateBuilder.add(
+					THYMEAnaforaXMLReader.getDescription(this.coreferenceDirectory),
+					CAS.NAME_DEFAULT_SOFA,
+					GOLD_VIEW_NAME);
+		}
 
-    /*
+		// identify segments
+		if(this.xmlFormat == XMLFormat.I2B2){
+			aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
+		}else{
+			aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
+		}
+		// identify sentences
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+				SentenceDetector.class,
+				SentenceDetector.SD_MODEL_FILE_PARAM,
+				"org/apache/ctakes/core/sentdetect/sd-med-model.zip"));
+		// identify tokens
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
+		// merge some tokens
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ContextDependentTokenizerAnnotator.class));
+
+		// identify part-of-speech tags
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+				POSTagger.class,
+				TypeSystemDescriptionFactory.createTypeSystemDescription(),
+				TypePrioritiesFactory.createTypePriorities(Segment.class, Sentence.class, BaseToken.class),
+				POSTagger.POS_MODEL_FILE_PARAM,
+				"org/apache/ctakes/postagger/models/mayo-pos.zip"));
+
+		// identify chunks
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+				Chunker.class,
+				Chunker.CHUNKER_MODEL_FILE_PARAM,
+				FileLocator.locateFile("org/apache/ctakes/chunker/models/chunker-model.zip"),
+				Chunker.CHUNKER_CREATOR_CLASS_PARAM,
+				DefaultChunkCreator.class));
+
+		// identify UMLS named entities
+
+		// adjust NP in NP NP to span both
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+				ChunkAdjuster.class,
+				ChunkAdjuster.PARAM_CHUNK_PATTERN,
+				new String[] { "NP", "NP" },
+				ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+				1));
+		// adjust NP in NP PP NP to span all three
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+				ChunkAdjuster.class,
+				ChunkAdjuster.PARAM_CHUNK_PATTERN,
+				new String[] { "NP", "PP", "NP" },
+				ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+				2));
+		// add lookup windows for each NP
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
+		// maximize lookup windows
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+				OverlapAnnotator.class,
+				"A_ObjectClass",
+				LookupWindowAnnotation.class,
+				"B_ObjectClass",
+				LookupWindowAnnotation.class,
+				"OverlapType",
+				"A_ENV_B",
+				"ActionType",
+				"DELETE",
+				"DeleteAction",
+				new String[] { "selector=B" }));
+		// add UMLS on top of lookup windows
+		aggregateBuilder.add(
+				UmlsDictionaryLookupAnnotator.createAnnotatorDescription()
+				);
+
+		/*
     // add lvg annotator
     String[] XeroxTreebankMap = {
         "adj|JJ",
@@ -524,391 +524,396 @@ public abstract class Evaluation_ImplBas
             new File(LvgCmdApiResourceImpl.class.getResource(
                 "/org/apache/ctakes/lvg/data/config/lvg.properties").toURI())));
     aggregateBuilder.add(lvgAnnotator);
-    */
-    aggregateBuilder.add(LvgAnnotator.createAnnotatorDescription());
+		 */
+		aggregateBuilder.add(LvgAnnotator.createAnnotatorDescription());
+
+		// add dependency parser
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPDependencyParserAE.class));
 
-    // add dependency parser
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPDependencyParserAE.class));
+		// add semantic role labeler
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPSemanticRoleLabelerAE.class));
 
-    // add semantic role labeler
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPSemanticRoleLabelerAE.class));
+		// add gold standard parses to gold view, and adjust gold view to correct a few annotation mis-steps
+		if(this.treebankDirectory != null){
+			aggregateBuilder.add(THYMETreebankReader.getDescription(this.treebankDirectory));
+			aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TimexAnnotationCorrector.class));
+		}else{
+			// add ctakes constituency parses to system view
+			aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class,
+					ConstituencyParser.PARAM_MODEL_FILENAME,
+					"org/apache/ctakes/constituency/parser/models/thyme.bin"));
+			//          "org/apache/ctakes/constituency/parser/models/sharp-3.1.bin"));
+			//            "org/apache/ctakes/constituency/parser/models/thymeNotempeval.bin"));
+			//      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(BerkeleyParserWrapper.class,
+			//          BerkeleyParserWrapper.PARAM_MODEL_FILENAME,
+			//          
+			//        "org/apache/ctakes/constituency/parser/models/thyme.gcg.4sm.bin"));
+			//          "org/apache/ctakes/constituency/parser/models/thyme.4sm.bin"));
+		}
+		// write out the CAS after all the above annotations
+		aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+				XMIWriter.class,
+				XMIWriter.PARAM_XMI_DIRECTORY,
+				this.xmiDirectory));
+
+		return aggregateBuilder;
+	}
+
+	public static <T extends Annotation> List<T> selectExact(JCas jCas, Class<T> annotationClass, Segment segment) {
+		List<T> annotations = Lists.newArrayList();
+		for (T annotation : JCasUtil.selectCovered(jCas, annotationClass, segment)) {
+			if (annotation.getClass().equals(annotationClass)) {
+				annotations.add(annotation);
+			}
+		}
+		return annotations;
+	}
+
+	public static class CopyNPChunksToLookupWindowAnnotations extends JCasAnnotator_ImplBase {
+
+		@Override
+		public void process(JCas jCas) throws AnalysisEngineProcessException {
+			for (Chunk chunk : JCasUtil.select(jCas, Chunk.class)) {
+				if (chunk.getChunkType().equals("NP")) {
+					new LookupWindowAnnotation(jCas, chunk.getBegin(), chunk.getEnd()).addToIndexes();
+				}
+			}
+		}
+	}
+
+	public static class RemoveEnclosedLookupWindows extends JCasAnnotator_ImplBase {
+
+		@Override
+		public void process(JCas jCas) throws AnalysisEngineProcessException {
+			List<LookupWindowAnnotation> lws = new ArrayList<LookupWindowAnnotation>(JCasUtil.select(jCas, LookupWindowAnnotation.class));
+			// we'll navigate backwards so that as we delete things we shorten the list from the back
+			for(int i = lws.size()-2; i >= 0; i--){
+				LookupWindowAnnotation lw1 = lws.get(i);
+				LookupWindowAnnotation lw2 = lws.get(i+1);
+				if(lw1.getBegin() <= lw2.getBegin() && lw1.getEnd() >= lw2.getEnd()){
+					/// lw1 envelops or encloses lw2
+					lws.remove(i+1);
+					lw2.removeFromIndexes();
+				}
+			}
+
+		}
+
+	}
+
+	public static class EntityMentionRemover extends JCasAnnotator_ImplBase {
+
+		@Override
+		public void process(JCas jCas) throws AnalysisEngineProcessException {
+			for (EntityMention mention : Lists.newArrayList(JCasUtil.select(jCas, EntityMention.class))) {
+				mention.removeFromIndexes();
+			}
+		}
+	}
+
+	public static class EventMentionRemover extends JCasAnnotator_ImplBase {
+
+		@Override
+		public void process(JCas jCas) throws AnalysisEngineProcessException {
+			for (EventMention mention : Lists.newArrayList(JCasUtil.select(jCas, EventMention.class))) {
+				mention.removeFromIndexes();
+			}
+		}
+	}
+
+	// replace this with SimpleSegmentWithTagsAnnotator if that code ever gets fixed
+	public static class SegmentsFromBracketedSectionTagsAnnotator extends JCasAnnotator_ImplBase {
+		private static Pattern SECTION_PATTERN = Pattern.compile(
+				"(\\[start section id=\"?(.*?)\"?\\]).*?(\\[end section id=\"?(.*?)\"?\\])",
+				Pattern.DOTALL);
+
+		@Override
+		public void process(JCas jCas) throws AnalysisEngineProcessException {
+			Matcher matcher = SECTION_PATTERN.matcher(jCas.getDocumentText());
+			while (matcher.find()) {
+				Segment segment = new Segment(jCas);
+				segment.setBegin(matcher.start() + matcher.group(1).length());
+				segment.setEnd(matcher.end() - matcher.group(3).length());
+				segment.setId(matcher.group(2));
+				segment.addToIndexes();
+			}
+		}
+	}
+
+	static File getXMIFile(File xmiDirectory, File textFile) {
+		return new File(xmiDirectory, textFile.getName() + ".xmi");
+	}
+
+	static File getXMIFile(File xmiDirectory, JCas jCas) throws AnalysisEngineProcessException {
+		return getXMIFile(xmiDirectory, new File(ViewURIUtil.getURI(jCas).getPath()));
+	}
+
+	public static class XMIWriter extends JCasAnnotator_ImplBase {
+
+		public static final String PARAM_XMI_DIRECTORY = "XMIDirectory";
+
+		@ConfigurationParameter(name = PARAM_XMI_DIRECTORY, mandatory = true)
+		private File xmiDirectory;
+
+		@Override
+		public void initialize(UimaContext context) throws ResourceInitializationException {
+			super.initialize(context);
+			if (!this.xmiDirectory.exists()) {
+				this.xmiDirectory.mkdirs();
+			}
+		}
+
+		@Override
+		public void process(JCas jCas) throws AnalysisEngineProcessException {
+			File xmiFile = getXMIFile(this.xmiDirectory, jCas);
+			try {
+				FileOutputStream outputStream = new FileOutputStream(xmiFile);
+				try {
+					XmiCasSerializer serializer = new XmiCasSerializer(jCas.getTypeSystem());
+					ContentHandler handler = new XMLSerializer(outputStream, false).getContentHandler();
+					serializer.serialize(jCas.getCas(), handler);
+				} finally {
+					outputStream.close();
+				}
+			} catch (SAXException e) {
+				throw new AnalysisEngineProcessException(e);
+			} catch (IOException e) {
+				throw new AnalysisEngineProcessException(e);
+			}
+		}
+	}
+
+	public static class XMIReader extends JCasAnnotator_ImplBase {
+
+		public static final String PARAM_XMI_DIRECTORY = "XMIDirectory";
+
+		@ConfigurationParameter(name = PARAM_XMI_DIRECTORY, mandatory = true)
+		private File xmiDirectory;
+
+		@Override
+		public void process(JCas jCas) throws AnalysisEngineProcessException {
+			File xmiFile = getXMIFile(this.xmiDirectory, jCas);
+			try {
+				FileInputStream inputStream = new FileInputStream(xmiFile);
+				try {
+					XmiCasDeserializer.deserialize(inputStream, jCas.getCas());
+				} finally {
+					inputStream.close();
+				}
+			} catch (SAXException e) {
+				throw new AnalysisEngineProcessException(e);
+			} catch (IOException e) {
+				throw new AnalysisEngineProcessException(e);
+			}
+		}
+	}
+
+	public static class TimexAnnotationCorrector extends JCasAnnotator_ImplBase {
+		@Override
+		public void process(JCas jCas) throws AnalysisEngineProcessException {
+			JCas goldView, systemView;
+			try {
+				goldView = jCas.getView(GOLD_VIEW_NAME);
+				systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+			} catch (CASException e) {
+				e.printStackTrace();
+				throw new AnalysisEngineProcessException();
+			}
+			for(TimeMention mention : JCasUtil.select(goldView, TimeMention.class)){
+				// for each time expression, get the treebank node with the same span.
+				List<TreebankNode> nodes = JCasUtil.selectCovered(systemView, TreebankNode.class, mention);
+				TreebankNode sameSpanNode = null;
+				for(TreebankNode node : nodes){
+					if(node.getBegin() == mention.getBegin() && node.getEnd() == mention.getEnd()){
+						sameSpanNode = node;
+						break;
+					}
+				}
+				if(sameSpanNode != null){
+					// look at node at the position of the timex3.
+					if(sameSpanNode.getNodeType().equals("PP")){
+						// if it is a PP it should be moved down to the NP
+						int numChildren = sameSpanNode.getChildren().size();
+						if(numChildren == 2 && sameSpanNode.getChildren(0).getNodeType().equals("IN") && sameSpanNode.getChildren(1).getNodeType().equals("NP")){
+							// move the time span to this node:
+							TreebankNode mentionNode = sameSpanNode.getChildren(numChildren-1);
+							mention.setBegin(mentionNode.getBegin());
+							mention.setEnd(mentionNode.getEnd());
+						}
+					}
+				}else{
+					// if there is no matching tree span, see if the DT to the left would help.
+					// now adjust for missing DT to the left
+					List<TerminalTreebankNode> precedingPreterms = JCasUtil.selectPreceding(systemView, TerminalTreebankNode.class, mention, 1);
+					if(precedingPreterms != null && precedingPreterms.size() == 1){
+						TerminalTreebankNode leftTerm = precedingPreterms.get(0);
+						if(leftTerm.getNodeType().equals("DT")){
+							// now see if adding this would make it match a tree
+							List<TreebankNode> matchingNodes = JCasUtil.selectCovered(systemView, TreebankNode.class, leftTerm.getBegin(), mention.getEnd());
+							for(TreebankNode node : matchingNodes){
+								if(node.getBegin() == leftTerm.getBegin() && node.getEnd() == mention.getEnd()){
+									sameSpanNode = node;
+									break;
+								}
+							}
+							if(sameSpanNode != null){
+								// adding the DT to the left of th emention made it match a tree:
+								System.err.println("Adding DT: " + leftTerm.getCoveredText() + " to TIMEX: " + mention.getCoveredText());
+								mention.setBegin(leftTerm.getBegin());
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+
+	public static class CopyFromGold extends JCasAnnotator_ImplBase {
+
+		public static AnalysisEngineDescription getDescription(Class<?>... classes)
+				throws ResourceInitializationException {
+			return AnalysisEngineFactory.createPrimitiveDescription(
+					CopyFromGold.class,
+					CopyFromGold.PARAM_ANNOTATION_CLASSES,
+					classes);
+		}
+
+		public static final String PARAM_ANNOTATION_CLASSES = "AnnotationClasses";
+
+		@ConfigurationParameter(name = PARAM_ANNOTATION_CLASSES, mandatory = true)
+		private Class<? extends TOP>[] annotationClasses;
+
+		@Override
+		public void process(JCas jCas) throws AnalysisEngineProcessException {
+			JCas goldView, systemView;
+			try {
+				goldView = jCas.getView(GOLD_VIEW_NAME);
+				systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
+			} catch (CASException e) {
+				throw new AnalysisEngineProcessException(e);
+			}
+			for (Class<? extends TOP> annotationClass : this.annotationClasses) {
+				for (TOP annotation : Lists.newArrayList(JCasUtil.select(systemView, annotationClass))) {
+					if (annotation.getClass().equals(annotationClass)) {
+						annotation.removeFromIndexes();
+					}
+				}
+			}
+			CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
+			Feature sofaFeature = jCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);
+			for (Class<? extends TOP> annotationClass : this.annotationClasses) {
+				for (TOP annotation : JCasUtil.select(goldView, annotationClass)) {
+					TOP copy = (TOP) copier.copyFs(annotation);
+					if (copy instanceof Annotation) {
+						copy.setFeatureValue(sofaFeature, systemView.getSofa());
+					}
+					copy.addToIndexes(systemView);
+				}
+			}
+		}
+	}
+
+	public static class WriteI2B2XML extends JCasAnnotator_ImplBase {
+		public static final String PARAM_OUTPUT_DIR="PARAM_OUTPUT_DIR";
+		@ConfigurationParameter(mandatory=true,description="Output directory to write xml files to.",name=PARAM_OUTPUT_DIR)
+		protected String outputDir;
+
+		@Override
+		public void process(JCas jcas) throws AnalysisEngineProcessException {
+			try {
+				// get the output file name from the input file name and output directory.
+				File outDir = new File(outputDir);
+				if(!outDir.exists()) outDir.mkdirs();
+				File inFile = new File(ViewURIUtil.getURI(jcas));
+				String outFile = inFile.getName().replace(".txt", "");
+
+				// build the xml
+				DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
+				DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
+				Document doc = docBuilder.newDocument();
+				Element rootElement = doc.createElement("ClinicalNarrativeTemporalAnnotation");
+				Element textElement = doc.createElement("TEXT");
+				Element tagsElement = doc.createElement("TAGS");
+				textElement.setTextContent(jcas.getDocumentText());
+				rootElement.appendChild(textElement);
+				rootElement.appendChild(tagsElement);
+				doc.appendChild(rootElement);
+
+				Map<IdentifiedAnnotation,String> argToId = new HashMap<>();
+				int id=0;
+				for(TimeMention timex : JCasUtil.select(jcas, TimeMention.class)){
+					Element timexElement = doc.createElement("TIMEX3");
+					String timexID = "T"+id; id++;
+					argToId.put(timex, timexID);
+					timexElement.setAttribute("id", timexID);
+					timexElement.setAttribute("start", String.valueOf(timex.getBegin()+1));
+					timexElement.setAttribute("end", String.valueOf(timex.getEnd()+1));
+					timexElement.setAttribute("text", timex.getCoveredText());
+					timexElement.setAttribute("type", "NA");
+					timexElement.setAttribute("val", "NA");
+					timexElement.setAttribute("mod", "NA");
+					tagsElement.appendChild(timexElement);
+				}
+
+				id = 0;
+				for(EventMention event : JCasUtil.select(jcas, EventMention.class)){
+					if (event.getClass().equals(EventMention.class)) {
+						// this ensures we are only looking at THYME events and not ctakes-dictionary-lookup events
+						Element eventEl = doc.createElement("EVENT");
+						String eventID = "E"+id;  id++;
+						argToId.put(event, eventID);
+						eventEl.setAttribute("id", eventID);
+						eventEl.setAttribute("start", String.valueOf(event.getBegin()+1));
+						eventEl.setAttribute("end", String.valueOf(event.getEnd()+1));
+						eventEl.setAttribute("text", event.getCoveredText());
+						eventEl.setAttribute("modality", "NA");
+						eventEl.setAttribute("polarity", "NA");
+						eventEl.setAttribute("type", "NA");
+						tagsElement.appendChild(eventEl);
+					}
+				}
+
+				id = 0;
+				for(TemporalTextRelation rel : JCasUtil.select(jcas, TemporalTextRelation.class)){
+					Element linkEl = doc.createElement("TLINK");
+					String linkID = "TL"+id; id++;
+					linkEl.setAttribute("id", linkID);
+					Annotation arg1 = rel.getArg1().getArgument();
+					linkEl.setAttribute("fromID", argToId.get(arg1));
+					linkEl.setAttribute("fromText", arg1.getCoveredText());
+					Annotation arg2 = rel.getArg2().getArgument();
+					if(arg2!=null){
+						linkEl.setAttribute("toID", argToId.get(arg2));
+						linkEl.setAttribute("toText", arg2.getCoveredText());
+					}else{
+						linkEl.setAttribute("toID", "Discharge");
+						linkEl.setAttribute("toText", "Discharge");
+					}
+					linkEl.setAttribute("type", rel.getCategory());
+					tagsElement.appendChild(linkEl);
+				}
+
+				// boilerplate xml-writing code:
+				TransformerFactory transformerFactory = TransformerFactory.newInstance();
+				Transformer transformer = transformerFactory.newTransformer();
+				transformer.setOutputProperty(OutputKeys.INDENT, "yes");
+				transformer.setOutputProperty(OutputKeys.METHOD, "xml");
+				DOMSource source = new DOMSource(doc);
+				StreamResult result = new StreamResult(new File(outputDir, outFile));
+				transformer.transform(source, result);
+			} catch (ParserConfigurationException e) {
+				e.printStackTrace();
+				throw new AnalysisEngineProcessException(e);
+			} catch (TransformerConfigurationException e) {
+				e.printStackTrace();
+				throw new AnalysisEngineProcessException(e);
+			} catch (TransformerException e) {
+				e.printStackTrace();
+				throw new AnalysisEngineProcessException(e);
+			}
+
+		}
 
-    // add gold standard parses to gold view, and adjust gold view to correct a few annotation mis-steps
-    if(this.treebankDirectory != null){
-      aggregateBuilder.add(THYMETreebankReader.getDescription(this.treebankDirectory));
-      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TimexAnnotationCorrector.class));
-    }else{
-      // add ctakes constituency parses to system view
-      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class,
-          ConstituencyParser.PARAM_MODEL_FILENAME,
-          "org/apache/ctakes/constituency/parser/models/thyme.bin"));
-//          "org/apache/ctakes/constituency/parser/models/sharp-3.1.bin"));
-//            "org/apache/ctakes/constituency/parser/models/thymeNotempeval.bin"));
-//      aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(BerkeleyParserWrapper.class,
-//          BerkeleyParserWrapper.PARAM_MODEL_FILENAME,
-//          
-//        "org/apache/ctakes/constituency/parser/models/thyme.gcg.4sm.bin"));
-//          "org/apache/ctakes/constituency/parser/models/thyme.4sm.bin"));
-    }
-    // write out the CAS after all the above annotations
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        XMIWriter.class,
-        XMIWriter.PARAM_XMI_DIRECTORY,
-        this.xmiDirectory));
-
-    return aggregateBuilder;
-  }
-
-  public static <T extends Annotation> List<T> selectExact(JCas jCas, Class<T> annotationClass, Segment segment) {
-    List<T> annotations = Lists.newArrayList();
-    for (T annotation : JCasUtil.selectCovered(jCas, annotationClass, segment)) {
-      if (annotation.getClass().equals(annotationClass)) {
-        annotations.add(annotation);
-      }
-    }
-    return annotations;
-  }
-
-  public static class CopyNPChunksToLookupWindowAnnotations extends JCasAnnotator_ImplBase {
-
-    @Override
-    public void process(JCas jCas) throws AnalysisEngineProcessException {
-      for (Chunk chunk : JCasUtil.select(jCas, Chunk.class)) {
-        if (chunk.getChunkType().equals("NP")) {
-          new LookupWindowAnnotation(jCas, chunk.getBegin(), chunk.getEnd()).addToIndexes();
-        }
-      }
-    }
-  }
-  
-  public static class RemoveEnclosedLookupWindows extends JCasAnnotator_ImplBase {
-
-    @Override
-    public void process(JCas jCas) throws AnalysisEngineProcessException {
-      List<LookupWindowAnnotation> lws = new ArrayList<LookupWindowAnnotation>(JCasUtil.select(jCas, LookupWindowAnnotation.class));
-      // we'll navigate backwards so that as we delete things we shorten the list from the back
-      for(int i = lws.size()-2; i >= 0; i--){
-        LookupWindowAnnotation lw1 = lws.get(i);
-        LookupWindowAnnotation lw2 = lws.get(i+1);
-        if(lw1.getBegin() <= lw2.getBegin() && lw1.getEnd() >= lw2.getEnd()){
-          /// lw1 envelops or encloses lw2
-          lws.remove(i+1);
-          lw2.removeFromIndexes();
-        }
-      }
-      
-    }
-    
-  }
-
-  public static class EntityMentionRemover extends JCasAnnotator_ImplBase {
-
-    @Override
-    public void process(JCas jCas) throws AnalysisEngineProcessException {
-      for (EntityMention mention : Lists.newArrayList(JCasUtil.select(jCas, EntityMention.class))) {
-        mention.removeFromIndexes();
-      }
-    }
-  }
-
-  public static class EventMentionRemover extends JCasAnnotator_ImplBase {
-
-    @Override
-    public void process(JCas jCas) throws AnalysisEngineProcessException {
-      for (EventMention mention : Lists.newArrayList(JCasUtil.select(jCas, EventMention.class))) {
-        mention.removeFromIndexes();
-      }
-    }
-  }
-  
-  // replace this with SimpleSegmentWithTagsAnnotator if that code ever gets fixed
-  public static class SegmentsFromBracketedSectionTagsAnnotator extends JCasAnnotator_ImplBase {
-    private static Pattern SECTION_PATTERN = Pattern.compile(
-        "(\\[start section id=\"?(.*?)\"?\\]).*?(\\[end section id=\"?(.*?)\"?\\])",
-        Pattern.DOTALL);
-
-    @Override
-    public void process(JCas jCas) throws AnalysisEngineProcessException {
-      Matcher matcher = SECTION_PATTERN.matcher(jCas.getDocumentText());
-      while (matcher.find()) {
-        Segment segment = new Segment(jCas);
-        segment.setBegin(matcher.start() + matcher.group(1).length());
-        segment.setEnd(matcher.end() - matcher.group(3).length());
-        segment.setId(matcher.group(2));
-        segment.addToIndexes();
-      }
-    }
-  }
-
-  static File getXMIFile(File xmiDirectory, File textFile) {
-    return new File(xmiDirectory, textFile.getName() + ".xmi");
-  }
-
-  static File getXMIFile(File xmiDirectory, JCas jCas) throws AnalysisEngineProcessException {
-    return getXMIFile(xmiDirectory, new File(ViewURIUtil.getURI(jCas).getPath()));
-  }
-
-  public static class XMIWriter extends JCasAnnotator_ImplBase {
-
-    public static final String PARAM_XMI_DIRECTORY = "XMIDirectory";
-
-    @ConfigurationParameter(name = PARAM_XMI_DIRECTORY, mandatory = true)
-    private File xmiDirectory;
-
-    @Override
-    public void initialize(UimaContext context) throws ResourceInitializationException {
-      super.initialize(context);
-      if (!this.xmiDirectory.exists()) {
-        this.xmiDirectory.mkdirs();
-      }
-    }
-
-    @Override
-    public void process(JCas jCas) throws AnalysisEngineProcessException {
-      File xmiFile = getXMIFile(this.xmiDirectory, jCas);
-      try {
-        FileOutputStream outputStream = new FileOutputStream(xmiFile);
-        try {
-          XmiCasSerializer serializer = new XmiCasSerializer(jCas.getTypeSystem());
-          ContentHandler handler = new XMLSerializer(outputStream, false).getContentHandler();
-          serializer.serialize(jCas.getCas(), handler);
-        } finally {
-          outputStream.close();
-        }
-      } catch (SAXException e) {
-        throw new AnalysisEngineProcessException(e);
-      } catch (IOException e) {
-        throw new AnalysisEngineProcessException(e);
-      }
-    }
-  }
-
-  public static class XMIReader extends JCasAnnotator_ImplBase {
-
-    public static final String PARAM_XMI_DIRECTORY = "XMIDirectory";
-
-    @ConfigurationParameter(name = PARAM_XMI_DIRECTORY, mandatory = true)
-    private File xmiDirectory;
-
-    @Override
-    public void process(JCas jCas) throws AnalysisEngineProcessException {
-      File xmiFile = getXMIFile(this.xmiDirectory, jCas);
-      try {
-        FileInputStream inputStream = new FileInputStream(xmiFile);
-        try {
-          XmiCasDeserializer.deserialize(inputStream, jCas.getCas());
-        } finally {
-          inputStream.close();
-        }
-      } catch (SAXException e) {
-        throw new AnalysisEngineProcessException(e);
-      } catch (IOException e) {
-        throw new AnalysisEngineProcessException(e);
-      }
-    }
-  }
-  
-  public static class TimexAnnotationCorrector extends JCasAnnotator_ImplBase {
-    @Override
-    public void process(JCas jCas) throws AnalysisEngineProcessException {
-      JCas goldView, systemView;
-      try {
-        goldView = jCas.getView(GOLD_VIEW_NAME);
-        systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
-      } catch (CASException e) {
-        e.printStackTrace();
-        throw new AnalysisEngineProcessException();
-      }
-      for(TimeMention mention : JCasUtil.select(goldView, TimeMention.class)){
-        // for each time expression, get the treebank node with the same span.
-        List<TreebankNode> nodes = JCasUtil.selectCovered(systemView, TreebankNode.class, mention);
-        TreebankNode sameSpanNode = null;
-        for(TreebankNode node : nodes){
-          if(node.getBegin() == mention.getBegin() && node.getEnd() == mention.getEnd()){
-            sameSpanNode = node;
-            break;
-          }
-        }
-        if(sameSpanNode != null){
-          // look at node at the position of the timex3.
-          if(sameSpanNode.getNodeType().equals("PP")){
-            // if it is a PP it should be moved down to the NP
-            int numChildren = sameSpanNode.getChildren().size();
-            if(numChildren == 2 && sameSpanNode.getChildren(0).getNodeType().equals("IN") && sameSpanNode.getChildren(1).getNodeType().equals("NP")){
-              // move the time span to this node:
-              TreebankNode mentionNode = sameSpanNode.getChildren(numChildren-1);
-              mention.setBegin(mentionNode.getBegin());
-              mention.setEnd(mentionNode.getEnd());
-            }
-          }
-        }else{
-          // if there is no matching tree span, see if the DT to the left would help.
-          // now adjust for missing DT to the left
-          List<TerminalTreebankNode> precedingPreterms = JCasUtil.selectPreceding(systemView, TerminalTreebankNode.class, mention, 1);
-          if(precedingPreterms != null && precedingPreterms.size() == 1){
-            TerminalTreebankNode leftTerm = precedingPreterms.get(0);
-            if(leftTerm.getNodeType().equals("DT")){
-              // now see if adding this would make it match a tree
-              List<TreebankNode> matchingNodes = JCasUtil.selectCovered(systemView, TreebankNode.class, leftTerm.getBegin(), mention.getEnd());
-              for(TreebankNode node : matchingNodes){
-                if(node.getBegin() == leftTerm.getBegin() && node.getEnd() == mention.getEnd()){
-                  sameSpanNode = node;
-                  break;
-                }
-              }
-              if(sameSpanNode != null){
-                // adding the DT to the left of th emention made it match a tree:
-                System.err.println("Adding DT: " + leftTerm.getCoveredText() + " to TIMEX: " + mention.getCoveredText());
-                mention.setBegin(leftTerm.getBegin());
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-
-  public static class CopyFromGold extends JCasAnnotator_ImplBase {
-
-    public static AnalysisEngineDescription getDescription(Class<?>... classes)
-        throws ResourceInitializationException {
-      return AnalysisEngineFactory.createPrimitiveDescription(
-          CopyFromGold.class,
-          CopyFromGold.PARAM_ANNOTATION_CLASSES,
-          classes);
-    }
-
-    public static final String PARAM_ANNOTATION_CLASSES = "AnnotationClasses";
-
-    @ConfigurationParameter(name = PARAM_ANNOTATION_CLASSES, mandatory = true)
-    private Class<? extends TOP>[] annotationClasses;
-
-    @Override
-    public void process(JCas jCas) throws AnalysisEngineProcessException {
-      JCas goldView, systemView;
-      try {
-        goldView = jCas.getView(GOLD_VIEW_NAME);
-        systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
-      } catch (CASException e) {
-        throw new AnalysisEngineProcessException(e);
-      }
-      for (Class<? extends TOP> annotationClass : this.annotationClasses) {
-        for (TOP annotation : Lists.newArrayList(JCasUtil.select(systemView, annotationClass))) {
-          if (annotation.getClass().equals(annotationClass)) {
-            annotation.removeFromIndexes();
-          }
-        }
-      }
-      CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas());
-      Feature sofaFeature = jCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);
-      for (Class<? extends TOP> annotationClass : this.annotationClasses) {
-        for (TOP annotation : JCasUtil.select(goldView, annotationClass)) {
-          TOP copy = (TOP) copier.copyFs(annotation);
-          if (copy instanceof Annotation) {
-            copy.setFeatureValue(sofaFeature, systemView.getSofa());
-          }
-          copy.addToIndexes(systemView);
-        }
-      }
-    }
-  }
-  
-  public static class WriteI2B2XML extends JCasAnnotator_ImplBase {
-    public static final String PARAM_OUTPUT_DIR="PARAM_OUTPUT_DIR";
-    @ConfigurationParameter(mandatory=true,description="Output directory to write xml files to.",name=PARAM_OUTPUT_DIR)
-    protected String outputDir;
-    
-    @Override
-    public void process(JCas jcas) throws AnalysisEngineProcessException {
-      try {
-        // get the output file name from the input file name and output directory.
-        File outDir = new File(outputDir);
-        if(!outDir.exists()) outDir.mkdirs();
-        File inFile = new File(ViewURIUtil.getURI(jcas));
-        String outFile = inFile.getName().replace(".txt", "");
-        
-        // build the xml
-        DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
-        DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
-        Document doc = docBuilder.newDocument();
-        Element rootElement = doc.createElement("ClinicalNarrativeTemporalAnnotation");
-        Element textElement = doc.createElement("TEXT");
-        Element tagsElement = doc.createElement("TAGS");
-        textElement.setTextContent(jcas.getDocumentText());
-        rootElement.appendChild(textElement);
-        rootElement.appendChild(tagsElement);
-        doc.appendChild(rootElement);
-        
-        Map<IdentifiedAnnotation,String> argToId = new HashMap<>();
-        int id=0;
-        for(TimeMention timex : JCasUtil.select(jcas, TimeMention.class)){
-          Element timexElement = doc.createElement("TIMEX3");
-          String timexID = "T"+id; id++;
-          argToId.put(timex, timexID);
-          timexElement.setAttribute("id", timexID);
-          timexElement.setAttribute("start", String.valueOf(timex.getBegin()+1));
-          timexElement.setAttribute("end", String.valueOf(timex.getEnd()+1));
-          timexElement.setAttribute("text", timex.getCoveredText());
-          timexElement.setAttribute("type", "NA");
-          timexElement.setAttribute("val", "NA");
-          timexElement.setAttribute("mod", "NA");
-          tagsElement.appendChild(timexElement);
-        }
-        
-        id = 0;
-        for(EventMention event : JCasUtil.select(jcas, EventMention.class)){
-          if (event.getClass().equals(EventMention.class)) {
-            // this ensures we are only looking at THYME events and not ctakes-dictionary-lookup events
-            Element eventEl = doc.createElement("EVENT");
-            String eventID = "E"+id;  id++;
-            argToId.put(event, eventID);
-            eventEl.setAttribute("id", eventID);
-            eventEl.setAttribute("start", String.valueOf(event.getBegin()+1));
-            eventEl.setAttribute("end", String.valueOf(event.getEnd()+1));
-            eventEl.setAttribute("text", event.getCoveredText());
-            eventEl.setAttribute("modality", "NA");
-            eventEl.setAttribute("polarity", "NA");
-            eventEl.setAttribute("type", "NA");
-            tagsElement.appendChild(eventEl);
-          }
-        }
-        
-        id = 0;
-        for(TemporalTextRelation rel : JCasUtil.select(jcas, TemporalTextRelation.class)){
-          Element linkEl = doc.createElement("TLINK");
-          String linkID = "TL"+id; id++;
-          linkEl.setAttribute("id", linkID);
-          Annotation arg1 = rel.getArg1().getArgument();
-          linkEl.setAttribute("fromID", argToId.get(arg1));
-          linkEl.setAttribute("fromText", arg1.getCoveredText());
-          Annotation arg2 = rel.getArg2().getArgument();
-          linkEl.setAttribute("toID", argToId.get(arg2));
-          linkEl.setAttribute("toText", arg2.getCoveredText());
-          linkEl.setAttribute("type", rel.getCategory());
-          tagsElement.appendChild(linkEl);
-        }
-        
-        // boilerplate xml-writing code:
-        TransformerFactory transformerFactory = TransformerFactory.newInstance();
-        Transformer transformer = transformerFactory.newTransformer();
-        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
-        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
-        DOMSource source = new DOMSource(doc);
-        StreamResult result = new StreamResult(new File(outputDir, outFile));
-        transformer.transform(source, result);
-      } catch (ParserConfigurationException e) {
-        e.printStackTrace();
-        throw new AnalysisEngineProcessException(e);
-      } catch (TransformerConfigurationException e) {
-        e.printStackTrace();
-        throw new AnalysisEngineProcessException(e);
-      } catch (TransformerException e) {
-        e.printStackTrace();
-        throw new AnalysisEngineProcessException(e);
-      }
-
-    }
-    
-  }
+	}
 }



Mime
View raw message