ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1562089 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/
Date Tue, 28 Jan 2014 15:08:54 GMT
Author: tmill
Date: Tue Jan 28 15:08:54 2014
New Revision: 1562089

URL: http://svn.apache.org/r1562089
Log:
CTAKES-82: Refactored pipelines, including new pipeline that runs event-time relation annotator with placeholder for event-event relations.

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java   (with props)
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java   (with props)
Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/EventAndTimeExtractionPipeline.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/EventExtractionPipeline.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TimeExtractionPipeline.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/EventAndTimeExtractionPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/EventAndTimeExtractionPipeline.java?rev=1562089&r1=1562088&r2=1562089&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/EventAndTimeExtractionPipeline.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/EventAndTimeExtractionPipeline.java Tue Jan 28 15:08:54 2014
@@ -20,43 +20,16 @@ package org.apache.ctakes.temporal.pipel
 
 import java.io.File;
 
-import org.apache.ctakes.chunker.ae.Chunker;
-import org.apache.ctakes.chunker.ae.DefaultChunkCreator;
-import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
-import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
-import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
-import org.apache.ctakes.core.ae.OverlapAnnotator;
-import org.apache.ctakes.core.ae.SentenceDetector;
-import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
-import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
 import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
-import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.core.resource.FileResourceImpl;
-import org.apache.ctakes.core.resource.JdbcConnectionResourceImpl;
-import org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl;
-import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
-import org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE;
-import org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator;
-import org.apache.ctakes.lvg.ae.LvgAnnotator;
-import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
-import org.apache.ctakes.postagger.POSTagger;
+import org.apache.ctakes.temporal.ae.BackwardsTimeAnnotator;
 import org.apache.ctakes.temporal.ae.EventAnnotator;
-import org.apache.ctakes.temporal.ae.TimeAnnotator;
-import org.apache.ctakes.temporal.eval.Evaluation_ImplBase.CopyNPChunksToLookupWindowAnnotations;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
-import org.apache.ctakes.typesystem.type.textspan.Segment;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.temporal.ae.EventTimeRelationAnnotator;
 import org.apache.uima.analysis_engine.AnalysisEngine;
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.collection.CollectionReader;
 import org.uimafit.component.xwriter.XWriter;
 import org.uimafit.factory.AggregateBuilder;
 import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.CollectionReaderFactory;
-import org.uimafit.factory.ExternalResourceFactory;
-import org.uimafit.factory.TypePrioritiesFactory;
-import org.uimafit.factory.TypeSystemDescriptionFactory;
 import org.uimafit.pipeline.SimplePipeline;
 
 import com.lexicalscope.jewel.cli.CliFactory;
@@ -68,32 +41,25 @@ import com.lexicalscope.jewel.cli.Option
  * 
  * @author dmitriy dligach
  */
-public class EventAndTimeExtractionPipeline {
+public class EventAndTimeExtractionPipeline extends TemporalExtractionPipeline_ImplBase {
   
-  static interface Options {
-    // FYI: Command should say -i or --inputDirectory now
-    @Option(shortName="i",
-        description = "specify the path to the directory containing the clinical notes to be processed")
-    public String getInputDirectory();
-    
-    @Option(shortName = "o",
-        description = "specify the path to the directory where the output xmi files are to be saved")
-    public String getOutputDirectory();
-    
+  static interface ETOptions extends Options{
     @Option(
+        shortName = "e",
         description = "specify the path to the directory where the trained event model is located",
         defaultValue="target/eval/event-spans/train_and_test/")
     public String getEventModelDirectory();
     
     @Option(
+        shortName = "t",
         description = "specify the path to the directory where the trained event model is located",
-        defaultValue="target/eval/time-spans/train_and_test/seq/")
+        defaultValue="target/eval/time-spans/train_and_test/BackwardsTimeAnnotator/")
     public String getTimeModelDirectory();
   }
   
 	public static void main(String[] args) throws Exception {
 		
-		Options options = CliFactory.parseArguments(Options.class, args);
+		ETOptions options = CliFactory.parseArguments(ETOptions.class, args);
 
 		CollectionReader collectionReader = CollectionReaderFactory.createCollectionReaderFromPath(
 				"../ctakes-core/desc/collection_reader/FilesInDirectoryCollectionReader.xml",
@@ -102,8 +68,7 @@ public class EventAndTimeExtractionPipel
 
 		AggregateBuilder aggregateBuilder = getPreprocessorAggregateBuilder();
 		aggregateBuilder.add(EventAnnotator.createAnnotatorDescription(new File(options.getEventModelDirectory())));
-		aggregateBuilder.add(TimeAnnotator.createAnnotatorDescription(new File(options.getTimeModelDirectory())));
-		
+		aggregateBuilder.add(BackwardsTimeAnnotator.createAnnotatorDescription(new File(options.getTimeModelDirectory())));
     AnalysisEngine xWriter = AnalysisEngineFactory.createPrimitive(
         XWriter.class,
         XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
@@ -114,184 +79,4 @@ public class EventAndTimeExtractionPipel
         aggregateBuilder.createAggregate(),
         xWriter);
 	}
-	
-	/**
-	 * Preprocessing needed for relation extraction.
-	 */
-  protected static AggregateBuilder getPreprocessorAggregateBuilder()
-      throws Exception {
-    AggregateBuilder aggregateBuilder = new AggregateBuilder();
-    
-    // identify segments; use simple segment annotator on non-mayo notes
-    // aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
-    
-    // identify sentences
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-            SentenceDetector.class,
-            SentenceDetector.SD_MODEL_FILE_PARAM,
-            "org/apache/ctakes/core/sentdetect/sd-med-model.zip"));
-    // identify tokens
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
-    // merge some tokens
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ContextDependentTokenizerAnnotator.class));
-
-    // identify part-of-speech tags
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        POSTagger.class,
-        TypeSystemDescriptionFactory.createTypeSystemDescription(),
-        TypePrioritiesFactory.createTypePriorities(Segment.class, Sentence.class, BaseToken.class),
-        POSTagger.POS_MODEL_FILE_PARAM,
-        "org/apache/ctakes/postagger/models/mayo-pos.zip"));
-
-    // identify chunks
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        Chunker.class,
-        Chunker.CHUNKER_MODEL_FILE_PARAM,
-        FileLocator.locateFile("org/apache/ctakes/chunker/models/chunker-model.zip"),
-        Chunker.CHUNKER_CREATOR_CLASS_PARAM,
-        DefaultChunkCreator.class));
-
-    // identify UMLS named entities
-
-    // adjust NP in NP NP to span both
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        1));
-    // adjust NP in NP PP NP to span all three
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "PP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        2));
-    // add lookup windows for each NP
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
-    // maximize lookup windows
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        OverlapAnnotator.class,
-        "A_ObjectClass",
-        LookupWindowAnnotation.class,
-        "B_ObjectClass",
-        LookupWindowAnnotation.class,
-        "OverlapType",
-        "A_ENV_B",
-        "ActionType",
-        "DELETE",
-        "DeleteAction",
-        new String[] { "selector=B" }));
-    // add UMLS on top of lookup windows
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        UmlsDictionaryLookupAnnotator.class,
-        "ctakes.umlsaddr",
-        "https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser",
-        "ctakes.umlsvendor",
-        "NLM-6515182895",
-        "LookupDescriptor",
-        ExternalResourceFactory.createExternalResourceDescription(
-            FileResourceImpl.class,
-            new File("target/unpacked/org/apache/ctakes/dictionary/lookup/LookupDesc_Db.xml").getAbsoluteFile()),
-        "DbConnection",
-        ExternalResourceFactory.createExternalResourceDescription(
-            JdbcConnectionResourceImpl.class,
-            "",
-            JdbcConnectionResourceImpl.PARAM_DRIVER_CLASS,
-            "org.hsqldb.jdbcDriver",
-            JdbcConnectionResourceImpl.PARAM_URL,
-            // Should be the following but it's WAY too slow
-            // "jdbc:hsqldb:res:/org/apache/ctakes/dictionary/lookup/umls2011ab/umls"),
-            "jdbc:hsqldb:file:target/unpacked/org/apache/ctakes/dictionary/lookup/umls2011ab/umls"),
-        "RxnormIndexReader",
-        ExternalResourceFactory.createExternalResourceDescription(
-            LuceneIndexReaderResourceImpl.class,
-            "",
-            "UseMemoryIndex",
-            true,
-            "IndexDirectory",
-            new File("target/unpacked/org/apache/ctakes/dictionary/lookup/rxnorm_index").getAbsoluteFile()),
-        "OrangeBookIndexReader",
-        ExternalResourceFactory.createExternalResourceDescription(
-            LuceneIndexReaderResourceImpl.class,
-            "",
-            "UseMemoryIndex",
-            true,
-            "IndexDirectory",
-            new File("target/unpacked/org/apache/ctakes/dictionary/lookup/OrangeBook").getAbsoluteFile())));
-
-    // add lvg annotator
-    String[] XeroxTreebankMap = {
-        "adj|JJ",
-        "adv|RB",
-        "aux|AUX",
-        "compl|CS",
-        "conj|CC",
-        "det|DET",
-        "modal|MD",
-        "noun|NN",
-        "prep|IN",
-        "pron|PRP",
-        "verb|VB" };
-    String[] ExclusionSet = {
-        "and",
-        "And",
-        "by",
-        "By",
-        "for",
-        "For",
-        "in",
-        "In",
-        "of",
-        "Of",
-        "on",
-        "On",
-        "the",
-        "The",
-        "to",
-        "To",
-        "with",
-        "With" };
-    AnalysisEngineDescription lvgAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
-        LvgAnnotator.class,
-        "UseSegments",
-        false,
-        "SegmentsToSkip",
-        new String[0],
-        "UseCmdCache",
-        false,
-        "CmdCacheFileLocation",
-        "/org/apache/ctakes/lvg/2005_norm.voc",
-        "CmdCacheFrequencyCutoff",
-        20,
-        "ExclusionSet",
-        ExclusionSet,
-        "XeroxTreebankMap",
-        XeroxTreebankMap,
-        "LemmaCacheFileLocation",
-        "/org/apache/ctakes/lvg/2005_lemma.voc",
-        "UseLemmaCache",
-        false,
-        "LemmaCacheFrequencyCutoff",
-        20,
-        "PostLemmas",
-        true,
-        "LvgCmdApi",
-        ExternalResourceFactory.createExternalResourceDescription(
-            LvgCmdApiResourceImpl.class,
-            new File(LvgCmdApiResourceImpl.class.getResource(
-                "/org/apache/ctakes/lvg/data/config/lvg.properties").toURI())));
-    aggregateBuilder.add(lvgAnnotator);
-
-    // add dependency parser
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPDependencyParserAE.class));
-
-    // add semantic role labeler
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPSemanticRoleLabelerAE.class));
-
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class));
-    
-    return aggregateBuilder;
-  }
 }

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/EventExtractionPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/EventExtractionPipeline.java?rev=1562089&r1=1562088&r2=1562089&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/EventExtractionPipeline.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/EventExtractionPipeline.java Tue Jan 28 15:08:54 2014
@@ -20,42 +20,14 @@ package org.apache.ctakes.temporal.pipel
 
 import java.io.File;
 
-import org.apache.ctakes.chunker.ae.Chunker;
-import org.apache.ctakes.chunker.ae.DefaultChunkCreator;
-import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
-import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
-import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
-import org.apache.ctakes.core.ae.OverlapAnnotator;
-import org.apache.ctakes.core.ae.SentenceDetector;
-import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
-import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
 import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
-import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.core.resource.FileResourceImpl;
-import org.apache.ctakes.core.resource.JdbcConnectionResourceImpl;
-import org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl;
-import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
-import org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE;
-import org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator;
-import org.apache.ctakes.lvg.ae.LvgAnnotator;
-import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
-import org.apache.ctakes.postagger.POSTagger;
 import org.apache.ctakes.temporal.ae.EventAnnotator;
-import org.apache.ctakes.temporal.eval.Evaluation_ImplBase.CopyNPChunksToLookupWindowAnnotations;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
-import org.apache.ctakes.typesystem.type.textspan.Segment;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.uima.analysis_engine.AnalysisEngine;
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.collection.CollectionReader;
 import org.uimafit.component.xwriter.XWriter;
 import org.uimafit.factory.AggregateBuilder;
 import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.CollectionReaderFactory;
-import org.uimafit.factory.ExternalResourceFactory;
-import org.uimafit.factory.TypePrioritiesFactory;
-import org.uimafit.factory.TypeSystemDescriptionFactory;
 import org.uimafit.pipeline.SimplePipeline;
 
 import com.lexicalscope.jewel.cli.CliFactory;
@@ -67,20 +39,9 @@ import com.lexicalscope.jewel.cli.Option
  * 
  * @author dmitriy dligach
  */
-public class EventExtractionPipeline {
+public class EventExtractionPipeline extends TemporalExtractionPipeline_ImplBase {
   
-  static interface Options {
-    // FYI -- command line should use -i or --inputDirectory now
-    @Option(
-        shortName = "i",
-        description = "specify the path to the directory containing the clinical notes to be processed")
-    public String getInputDirectory();
-    
-    @Option(
-        shortName = "o",
-        description = "specify the path to the directory where the output xmi files are to be saved")
-    public String getOutputDirectory();
-    
+  static interface EventOptions extends Options{
     @Option(
         shortName = "m",
         description = "specify the path to the directory where the trained model is located",
@@ -90,7 +51,7 @@ public class EventExtractionPipeline {
   
 	public static void main(String[] args) throws Exception {
 		
-		Options options = CliFactory.parseArguments(Options.class, args);
+		EventOptions options = CliFactory.parseArguments(EventOptions.class, args);
 
 		CollectionReader collectionReader = CollectionReaderFactory.createCollectionReaderFromPath(
 				"../ctakes-core/desc/collection_reader/FilesInDirectoryCollectionReader.xml",
@@ -110,185 +71,4 @@ public class EventExtractionPipeline {
         aggregateBuilder.createAggregate(),
         xWriter);
 	}
-	
-	/**
-	 * Preprocessing needed for relation extraction.
-	 */
-  protected static AggregateBuilder getPreprocessorAggregateBuilder()
-      throws Exception {
-    AggregateBuilder aggregateBuilder = new AggregateBuilder();
-    
-    // identify segments; use simple segment annotator on non-mayo notes
-    // aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
-    
-    // identify sentences
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-            SentenceDetector.class,
-            SentenceDetector.SD_MODEL_FILE_PARAM,
-            "org/apache/ctakes/core/sentdetect/sd-med-model.zip"));
-    
-    // identify tokens
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
-    // merge some tokens
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ContextDependentTokenizerAnnotator.class));
-
-    // identify part-of-speech tags
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        POSTagger.class,
-        TypeSystemDescriptionFactory.createTypeSystemDescription(),
-        TypePrioritiesFactory.createTypePriorities(Segment.class, Sentence.class, BaseToken.class),
-        POSTagger.POS_MODEL_FILE_PARAM,
-        "org/apache/ctakes/postagger/models/mayo-pos.zip"));
-
-    // identify chunks
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        Chunker.class,
-        Chunker.CHUNKER_MODEL_FILE_PARAM,
-        FileLocator.locateFile("org/apache/ctakes/chunker/models/chunker-model.zip"),
-        Chunker.CHUNKER_CREATOR_CLASS_PARAM,
-        DefaultChunkCreator.class));
-
-    // identify UMLS named entities
-
-    // adjust NP in NP NP to span both
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        1));
-    // adjust NP in NP PP NP to span all three
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "PP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        2));
-    // add lookup windows for each NP
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
-    // maximize lookup windows
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        OverlapAnnotator.class,
-        "A_ObjectClass",
-        LookupWindowAnnotation.class,
-        "B_ObjectClass",
-        LookupWindowAnnotation.class,
-        "OverlapType",
-        "A_ENV_B",
-        "ActionType",
-        "DELETE",
-        "DeleteAction",
-        new String[] { "selector=B" }));
-    // add UMLS on top of lookup windows
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        UmlsDictionaryLookupAnnotator.class,
-        "ctakes.umlsaddr",
-        "https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser",
-        "ctakes.umlsvendor",
-        "NLM-6515182895",
-        "LookupDescriptor",
-        ExternalResourceFactory.createExternalResourceDescription(
-            FileResourceImpl.class,
-            new File("target/unpacked/org/apache/ctakes/dictionary/lookup/LookupDesc_Db.xml").getAbsoluteFile()),
-        "DbConnection",
-        ExternalResourceFactory.createExternalResourceDescription(
-            JdbcConnectionResourceImpl.class,
-            "",
-            JdbcConnectionResourceImpl.PARAM_DRIVER_CLASS,
-            "org.hsqldb.jdbcDriver",
-            JdbcConnectionResourceImpl.PARAM_URL,
-            // Should be the following but it's WAY too slow
-            // "jdbc:hsqldb:res:/org/apache/ctakes/dictionary/lookup/umls2011ab/umls"),
-            "jdbc:hsqldb:file:target/unpacked/org/apache/ctakes/dictionary/lookup/umls2011ab/umls"),
-        "RxnormIndexReader",
-        ExternalResourceFactory.createExternalResourceDescription(
-            LuceneIndexReaderResourceImpl.class,
-            "",
-            "UseMemoryIndex",
-            true,
-            "IndexDirectory",
-            new File("target/unpacked/org/apache/ctakes/dictionary/lookup/rxnorm_index").getAbsoluteFile()),
-        "OrangeBookIndexReader",
-        ExternalResourceFactory.createExternalResourceDescription(
-            LuceneIndexReaderResourceImpl.class,
-            "",
-            "UseMemoryIndex",
-            true,
-            "IndexDirectory",
-            new File("target/unpacked/org/apache/ctakes/dictionary/lookup/OrangeBook").getAbsoluteFile())));
-
-    // add lvg annotator
-    String[] XeroxTreebankMap = {
-        "adj|JJ",
-        "adv|RB",
-        "aux|AUX",
-        "compl|CS",
-        "conj|CC",
-        "det|DET",
-        "modal|MD",
-        "noun|NN",
-        "prep|IN",
-        "pron|PRP",
-        "verb|VB" };
-    String[] ExclusionSet = {
-        "and",
-        "And",
-        "by",
-        "By",
-        "for",
-        "For",
-        "in",
-        "In",
-        "of",
-        "Of",
-        "on",
-        "On",
-        "the",
-        "The",
-        "to",
-        "To",
-        "with",
-        "With" };
-    AnalysisEngineDescription lvgAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
-        LvgAnnotator.class,
-        "UseSegments",
-        false,
-        "SegmentsToSkip",
-        new String[0],
-        "UseCmdCache",
-        false,
-        "CmdCacheFileLocation",
-        "/org/apache/ctakes/lvg/2005_norm.voc",
-        "CmdCacheFrequencyCutoff",
-        20,
-        "ExclusionSet",
-        ExclusionSet,
-        "XeroxTreebankMap",
-        XeroxTreebankMap,
-        "LemmaCacheFileLocation",
-        "/org/apache/ctakes/lvg/2005_lemma.voc",
-        "UseLemmaCache",
-        false,
-        "LemmaCacheFrequencyCutoff",
-        20,
-        "PostLemmas",
-        true,
-        "LvgCmdApi",
-        ExternalResourceFactory.createExternalResourceDescription(
-            LvgCmdApiResourceImpl.class,
-            new File(LvgCmdApiResourceImpl.class.getResource(
-                "/org/apache/ctakes/lvg/data/config/lvg.properties").toURI())));
-    aggregateBuilder.add(lvgAnnotator);
-
-    // add dependency parser
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPDependencyParserAE.class));
-
-    // add semantic role labeler
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPSemanticRoleLabelerAE.class));
-
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class));
-    
-    return aggregateBuilder;
-  }
 }

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java?rev=1562089&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java Tue Jan 28 15:08:54 2014
@@ -0,0 +1,80 @@
+package org.apache.ctakes.temporal.pipelines;
+
+import java.io.File;
+
+import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
+import org.apache.ctakes.temporal.ae.BackwardsTimeAnnotator;
+import org.apache.ctakes.temporal.ae.EventAnnotator;
+import org.apache.ctakes.temporal.ae.EventEventRelationAnnotator;
+import org.apache.ctakes.temporal.ae.EventTimeRelationAnnotator;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.collection.CollectionReader;
+import org.uimafit.component.xwriter.XWriter;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.CollectionReaderFactory;
+import org.uimafit.pipeline.SimplePipeline;
+
+import com.lexicalscope.jewel.cli.CliFactory;
+import com.lexicalscope.jewel.cli.Option;
+
+public class FullTemporalExtractionPipeline extends
+    TemporalExtractionPipeline_ImplBase {
+
+  static interface FullOptions extends Options {
+    @Option(
+        shortName = "e",
+        description = "specify the path to the directory where the trained event model is located",
+        defaultValue="target/eval/event-spans/train_and_test/")
+    public String getEventModelDirectory();
+    
+    @Option(
+        shortName = "t",
+        description = "specify the path to the directory where the trained event model is located",
+        defaultValue="target/eval/time-spans/train_and_test/BackwardsTimeAnnotator/")
+    public String getTimeModelDirectory();
+    
+    @Option(
+        shortName = "r",
+        description = "Specify the path to the directory where the trained event-time relation model is located",
+        defaultValue="target/eval/temporal-relations/event-time/train_and_test/")
+    public String getEventTimeRelationModelDirectory();
+
+    @Option(
+        shortName = "s",
+        description = "Specify the path to the directory where the trained event-event relation model is located",
+        defaultToNull=true) // add in default value once we have a satisfying trained model
+    public String getEventEventRelationModelDirectory();  
+  }
+
+  /**
+   * @param args
+   * @throws Exception 
+   */
+  public static void main(String[] args) throws Exception {
+    FullOptions options = CliFactory.parseArguments(FullOptions.class, args);
+    
+    CollectionReader collectionReader = CollectionReaderFactory.createCollectionReaderFromPath(
+        "../ctakes-core/desc/collection_reader/FilesInDirectoryCollectionReader.xml",
+        FilesInDirectoryCollectionReader.PARAM_INPUTDIR,
+        options.getInputDirectory());
+
+    AggregateBuilder aggregateBuilder = getPreprocessorAggregateBuilder();
+    aggregateBuilder.add(EventAnnotator.createAnnotatorDescription(new File(options.getEventModelDirectory())));
+    aggregateBuilder.add(BackwardsTimeAnnotator.createAnnotatorDescription(new File(options.getTimeModelDirectory())));
+    aggregateBuilder.add(EventTimeRelationAnnotator.createAnnotatorDescription(new File(options.getEventTimeRelationModelDirectory())));
+    if(options.getEventEventRelationModelDirectory()!=null){
+      aggregateBuilder.add(EventEventRelationAnnotator.createAnnotatorDescription(new File(options.getEventEventRelationModelDirectory())));
+    }
+    AnalysisEngine xWriter = AnalysisEngineFactory.createPrimitive(
+        XWriter.class,
+        XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
+        options.getOutputDirectory());
+    
+    SimplePipeline.runPipeline(
+        collectionReader,
+        aggregateBuilder.createAggregate(),
+        xWriter);
+  }
+
+}

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java?rev=1562089&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java (added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java Tue Jan 28 15:08:54 2014
@@ -0,0 +1,231 @@
+package org.apache.ctakes.temporal.pipelines;
+
+import java.io.File;
+
+import org.apache.ctakes.chunker.ae.Chunker;
+import org.apache.ctakes.chunker.ae.DefaultChunkCreator;
+import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
+import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
+import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
+import org.apache.ctakes.core.ae.OverlapAnnotator;
+import org.apache.ctakes.core.ae.SentenceDetector;
+import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
+import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.core.resource.FileResourceImpl;
+import org.apache.ctakes.core.resource.JdbcConnectionResourceImpl;
+import org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl;
+import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
+import org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE;
+import org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator;
+import org.apache.ctakes.lvg.ae.LvgAnnotator;
+import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
+import org.apache.ctakes.postagger.POSTagger;
+import org.apache.ctakes.temporal.eval.Evaluation_ImplBase.CopyNPChunksToLookupWindowAnnotations;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.ExternalResourceFactory;
+import org.uimafit.factory.TypePrioritiesFactory;
+import org.uimafit.factory.TypeSystemDescriptionFactory;
+
+import com.lexicalscope.jewel.cli.Option;
+
+public abstract class TemporalExtractionPipeline_ImplBase {
+  static interface Options {
+
+    @Option(
+        shortName = "i",
+        description = "specify the path to the directory containing the clinical notes to be processed")
+    public String getInputDirectory();
+    
+    @Option(
+        shortName = "o",
+        description = "specify the path to the directory where the output xmi files are to be saved")
+    public String getOutputDirectory();
+  }
+  
+  /**
+   * Preprocessing needed for relation extraction.
+   */
+  protected static AggregateBuilder getPreprocessorAggregateBuilder()
+      throws Exception {
+    AggregateBuilder aggregateBuilder = new AggregateBuilder();
+    
+    // identify segments; use simple segment annotator on non-mayo notes
+    // aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
+    
+    // identify sentences
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+            SentenceDetector.class,
+            SentenceDetector.SD_MODEL_FILE_PARAM,
+            "org/apache/ctakes/core/sentdetect/sd-med-model.zip"));
+    // identify tokens
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
+    // merge some tokens
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ContextDependentTokenizerAnnotator.class));
+
+    // identify part-of-speech tags
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+        POSTagger.class,
+        TypeSystemDescriptionFactory.createTypeSystemDescription(),
+        TypePrioritiesFactory.createTypePriorities(Segment.class, Sentence.class, BaseToken.class),
+        POSTagger.POS_MODEL_FILE_PARAM,
+        "org/apache/ctakes/postagger/models/mayo-pos.zip"));
+
+    // identify chunks
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+        Chunker.class,
+        Chunker.CHUNKER_MODEL_FILE_PARAM,
+        FileLocator.locateFile("org/apache/ctakes/chunker/models/chunker-model.zip"),
+        Chunker.CHUNKER_CREATOR_CLASS_PARAM,
+        DefaultChunkCreator.class));
+
+    // identify UMLS named entities
+
+    // adjust NP in NP NP to span both
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+        ChunkAdjuster.class,
+        ChunkAdjuster.PARAM_CHUNK_PATTERN,
+        new String[] { "NP", "NP" },
+        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+        1));
+    // adjust NP in NP PP NP to span all three
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+        ChunkAdjuster.class,
+        ChunkAdjuster.PARAM_CHUNK_PATTERN,
+        new String[] { "NP", "PP", "NP" },
+        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
+        2));
+    // add lookup windows for each NP
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
+    // maximize lookup windows
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+        OverlapAnnotator.class,
+        "A_ObjectClass",
+        LookupWindowAnnotation.class,
+        "B_ObjectClass",
+        LookupWindowAnnotation.class,
+        "OverlapType",
+        "A_ENV_B",
+        "ActionType",
+        "DELETE",
+        "DeleteAction",
+        new String[] { "selector=B" }));
+    // add UMLS on top of lookup windows
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+        UmlsDictionaryLookupAnnotator.class,
+        "ctakes.umlsaddr",
+        "https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser",
+        "ctakes.umlsvendor",
+        "NLM-6515182895",
+        "LookupDescriptor",
+        ExternalResourceFactory.createExternalResourceDescription(
+            FileResourceImpl.class,
+            FileLocator.locateFile("org/apache/ctakes/dictionary/lookup/LookupDesc_Db.xml")),
+        "DbConnection",
+        ExternalResourceFactory.createExternalResourceDescription(
+            JdbcConnectionResourceImpl.class,
+            "",
+            JdbcConnectionResourceImpl.PARAM_DRIVER_CLASS,
+            "org.hsqldb.jdbcDriver",
+            JdbcConnectionResourceImpl.PARAM_URL,
+            // Should be the following but it's WAY too slow
+            // "jdbc:hsqldb:res:/org/apache/ctakes/dictionary/lookup/umls2011ab/umls"),
+            "jdbc:hsqldb:file:target/unpacked/org/apache/ctakes/dictionary/lookup/umls2011ab/umls"),
+        "RxnormIndexReader",
+        ExternalResourceFactory.createExternalResourceDescription(
+            LuceneIndexReaderResourceImpl.class,
+            "",
+            "UseMemoryIndex",
+            true,
+            "IndexDirectory",
+            new File("target/unpacked/org/apache/ctakes/dictionary/lookup/rxnorm_index").getAbsoluteFile()),
+        "OrangeBookIndexReader",
+        ExternalResourceFactory.createExternalResourceDescription(
+            LuceneIndexReaderResourceImpl.class,
+            "",
+            "UseMemoryIndex",
+            true,
+            "IndexDirectory",
+            "org/apache/ctakes/dictionary/lookup/OrangeBook")));
+
+    // add lvg annotator
+    String[] XeroxTreebankMap = {
+        "adj|JJ",
+        "adv|RB",
+        "aux|AUX",
+        "compl|CS",
+        "conj|CC",
+        "det|DET",
+        "modal|MD",
+        "noun|NN",
+        "prep|IN",
+        "pron|PRP",
+        "verb|VB" };
+    String[] ExclusionSet = {
+        "and",
+        "And",
+        "by",
+        "By",
+        "for",
+        "For",
+        "in",
+        "In",
+        "of",
+        "Of",
+        "on",
+        "On",
+        "the",
+        "The",
+        "to",
+        "To",
+        "with",
+        "With" };
+    AnalysisEngineDescription lvgAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
+        LvgAnnotator.class,
+        "UseSegments",
+        false,
+        "SegmentsToSkip",
+        new String[0],
+        "UseCmdCache",
+        false,
+        "CmdCacheFileLocation",
+        "/org/apache/ctakes/lvg/2005_norm.voc",
+        "CmdCacheFrequencyCutoff",
+        20,
+        "ExclusionSet",
+        ExclusionSet,
+        "XeroxTreebankMap",
+        XeroxTreebankMap,
+        "LemmaCacheFileLocation",
+        "/org/apache/ctakes/lvg/2005_lemma.voc",
+        "UseLemmaCache",
+        false,
+        "LemmaCacheFrequencyCutoff",
+        20,
+        "PostLemmas",
+        true,
+        "LvgCmdApi",
+        ExternalResourceFactory.createExternalResourceDescription(
+            LvgCmdApiResourceImpl.class,
+            new File(LvgCmdApiResourceImpl.class.getResource(
+                "/org/apache/ctakes/lvg/data/config/lvg.properties").toURI())));
+    aggregateBuilder.add(lvgAnnotator);
+
+    // add dependency parser
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPDependencyParserAE.class));
+
+    // add semantic role labeler
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPSemanticRoleLabelerAE.class));
+
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class));
+    
+    return aggregateBuilder;
+  }
+}

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TimeExtractionPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TimeExtractionPipeline.java?rev=1562089&r1=1562088&r2=1562089&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TimeExtractionPipeline.java (original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TimeExtractionPipeline.java Tue Jan 28 15:08:54 2014
@@ -20,42 +20,14 @@ package org.apache.ctakes.temporal.pipel
 
 import java.io.File;
 
-import org.apache.ctakes.chunker.ae.Chunker;
-import org.apache.ctakes.chunker.ae.DefaultChunkCreator;
-import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
-import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
-import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
-import org.apache.ctakes.core.ae.OverlapAnnotator;
-import org.apache.ctakes.core.ae.SentenceDetector;
-import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
-import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
 import org.apache.ctakes.core.cr.FilesInDirectoryCollectionReader;
-import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.core.resource.FileResourceImpl;
-import org.apache.ctakes.core.resource.JdbcConnectionResourceImpl;
-import org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl;
-import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
-import org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE;
-import org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator;
-import org.apache.ctakes.lvg.ae.LvgAnnotator;
-import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
-import org.apache.ctakes.postagger.POSTagger;
 import org.apache.ctakes.temporal.ae.BackwardsTimeAnnotator;
-import org.apache.ctakes.temporal.eval.Evaluation_ImplBase.CopyNPChunksToLookupWindowAnnotations;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
-import org.apache.ctakes.typesystem.type.textspan.Segment;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.uima.analysis_engine.AnalysisEngine;
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.collection.CollectionReader;
 import org.uimafit.component.xwriter.XWriter;
 import org.uimafit.factory.AggregateBuilder;
 import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.CollectionReaderFactory;
-import org.uimafit.factory.ExternalResourceFactory;
-import org.uimafit.factory.TypePrioritiesFactory;
-import org.uimafit.factory.TypeSystemDescriptionFactory;
 import org.uimafit.pipeline.SimplePipeline;
 
 import com.lexicalscope.jewel.cli.CliFactory;
@@ -67,21 +39,11 @@ import com.lexicalscope.jewel.cli.Option
  * 
  * @author dmitriy dligach
  */
-public class TimeExtractionPipeline {
+public class TimeExtractionPipeline extends TemporalExtractionPipeline_ImplBase {
   
-  static interface Options {
+  static interface TimexOptions extends Options {
 
     @Option(
-        shortName = "i",
-        description = "specify the path to the directory containing the clinical notes to be processed")
-    public String getInputDirectory();
-    
-    @Option(
-        shortName = "o",
-        description = "specify the path to the directory where the output xmi files are to be saved")
-    public String getOutputDirectory();
-        
-    @Option(
         shortName = "m",
         description = "specify the path to the directory where the temporal expression model is located",
         defaultValue="target/eval/time-spans/train_and_test/BackwardsTimeAnnotator/")
@@ -90,7 +52,7 @@ public class TimeExtractionPipeline {
   
 	public static void main(String[] args) throws Exception {
 		
-		Options options = CliFactory.parseArguments(Options.class, args);
+		TimexOptions options = CliFactory.parseArguments(TimexOptions.class, args);
 
 		CollectionReader collectionReader = CollectionReaderFactory.createCollectionReaderFromPath(
 				"../ctakes-core/desc/collection_reader/FilesInDirectoryCollectionReader.xml",
@@ -110,184 +72,4 @@ public class TimeExtractionPipeline {
         aggregateBuilder.createAggregate(),
         xWriter);
 	}
-	
-	/**
-	 * Preprocessing needed for relation extraction.
-	 */
-  protected static AggregateBuilder getPreprocessorAggregateBuilder()
-      throws Exception {
-    AggregateBuilder aggregateBuilder = new AggregateBuilder();
-    
-    // identify segments; use simple segment annotator on non-mayo notes
-    // aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
-    
-    // identify sentences
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-            SentenceDetector.class,
-            SentenceDetector.SD_MODEL_FILE_PARAM,
-            "org/apache/ctakes/core/sentdetect/sd-med-model.zip"));
-    // identify tokens
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
-    // merge some tokens
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ContextDependentTokenizerAnnotator.class));
-
-    // identify part-of-speech tags
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        POSTagger.class,
-        TypeSystemDescriptionFactory.createTypeSystemDescription(),
-        TypePrioritiesFactory.createTypePriorities(Segment.class, Sentence.class, BaseToken.class),
-        POSTagger.POS_MODEL_FILE_PARAM,
-        "org/apache/ctakes/postagger/models/mayo-pos.zip"));
-
-    // identify chunks
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        Chunker.class,
-        Chunker.CHUNKER_MODEL_FILE_PARAM,
-        FileLocator.locateFile("org/apache/ctakes/chunker/models/chunker-model.zip"),
-        Chunker.CHUNKER_CREATOR_CLASS_PARAM,
-        DefaultChunkCreator.class));
-
-    // identify UMLS named entities
-
-    // adjust NP in NP NP to span both
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        1));
-    // adjust NP in NP PP NP to span all three
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "PP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        2));
-    // add lookup windows for each NP
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
-    // maximize lookup windows
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        OverlapAnnotator.class,
-        "A_ObjectClass",
-        LookupWindowAnnotation.class,
-        "B_ObjectClass",
-        LookupWindowAnnotation.class,
-        "OverlapType",
-        "A_ENV_B",
-        "ActionType",
-        "DELETE",
-        "DeleteAction",
-        new String[] { "selector=B" }));
-    // add UMLS on top of lookup windows
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        UmlsDictionaryLookupAnnotator.class,
-        "ctakes.umlsaddr",
-        "https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser",
-        "ctakes.umlsvendor",
-        "NLM-6515182895",
-        "LookupDescriptor",
-        ExternalResourceFactory.createExternalResourceDescription(
-            FileResourceImpl.class,
-            FileLocator.locateFile("org/apache/ctakes/dictionary/lookup/LookupDesc_Db.xml")),
-        "DbConnection",
-        ExternalResourceFactory.createExternalResourceDescription(
-            JdbcConnectionResourceImpl.class,
-            "",
-            JdbcConnectionResourceImpl.PARAM_DRIVER_CLASS,
-            "org.hsqldb.jdbcDriver",
-            JdbcConnectionResourceImpl.PARAM_URL,
-            // Should be the following but it's WAY too slow
-            // "jdbc:hsqldb:res:/org/apache/ctakes/dictionary/lookup/umls2011ab/umls"),
-            "jdbc:hsqldb:file:target/unpacked/org/apache/ctakes/dictionary/lookup/umls2011ab/umls"),
-        "RxnormIndexReader",
-        ExternalResourceFactory.createExternalResourceDescription(
-            LuceneIndexReaderResourceImpl.class,
-            "",
-            "UseMemoryIndex",
-            true,
-            "IndexDirectory",
-            new File("target/unpacked/org/apache/ctakes/dictionary/lookup/rxnorm_index").getAbsoluteFile()),
-        "OrangeBookIndexReader",
-        ExternalResourceFactory.createExternalResourceDescription(
-            LuceneIndexReaderResourceImpl.class,
-            "",
-            "UseMemoryIndex",
-            true,
-            "IndexDirectory",
-            "org/apache/ctakes/dictionary/lookup/OrangeBook")));
-
-    // add lvg annotator
-    String[] XeroxTreebankMap = {
-        "adj|JJ",
-        "adv|RB",
-        "aux|AUX",
-        "compl|CS",
-        "conj|CC",
-        "det|DET",
-        "modal|MD",
-        "noun|NN",
-        "prep|IN",
-        "pron|PRP",
-        "verb|VB" };
-    String[] ExclusionSet = {
-        "and",
-        "And",
-        "by",
-        "By",
-        "for",
-        "For",
-        "in",
-        "In",
-        "of",
-        "Of",
-        "on",
-        "On",
-        "the",
-        "The",
-        "to",
-        "To",
-        "with",
-        "With" };
-    AnalysisEngineDescription lvgAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
-        LvgAnnotator.class,
-        "UseSegments",
-        false,
-        "SegmentsToSkip",
-        new String[0],
-        "UseCmdCache",
-        false,
-        "CmdCacheFileLocation",
-        "/org/apache/ctakes/lvg/2005_norm.voc",
-        "CmdCacheFrequencyCutoff",
-        20,
-        "ExclusionSet",
-        ExclusionSet,
-        "XeroxTreebankMap",
-        XeroxTreebankMap,
-        "LemmaCacheFileLocation",
-        "/org/apache/ctakes/lvg/2005_lemma.voc",
-        "UseLemmaCache",
-        false,
-        "LemmaCacheFrequencyCutoff",
-        20,
-        "PostLemmas",
-        true,
-        "LvgCmdApi",
-        ExternalResourceFactory.createExternalResourceDescription(
-            LvgCmdApiResourceImpl.class,
-            new File(LvgCmdApiResourceImpl.class.getResource(
-                "/org/apache/ctakes/lvg/data/config/lvg.properties").toURI())));
-    aggregateBuilder.add(lvgAnnotator);
-
-    // add dependency parser
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPDependencyParserAE.class));
-
-    // add semantic role labeler
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPSemanticRoleLabelerAE.class));
-
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class));
-    
-    return aggregateBuilder;
-  }
 }



Mime
View raw message