ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chen...@apache.org
Subject svn commit: r1605587 - in /ctakes/trunk/ctakes-temporal/src: main/java/org/apache/ctakes/temporal/pipelines/ test/java/org/apach/ctakes/temporal/ae/
Date Wed, 25 Jun 2014 20:16:49 GMT
Author: chenpei
Date: Wed Jun 25 20:16:49 2014
New Revision: 1605587

URL: http://svn.apache.org/r1605587
Log:
CTAKES-297 - Updated temporal projects to use the static methods to createAnnotatorDescription().
 No need to duplicate the default parameter settings for each of the Annotators for the default/preprocessing
pipelines

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java
    ctakes/trunk/ctakes-temporal/src/test/java/org/apach/ctakes/temporal/ae/ContextualModalityAnnotatorTest.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java?rev=1605587&r1=1605586&r2=1605587&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java
Wed Jun 25 20:16:49 2014
@@ -79,11 +79,13 @@ public class FullTemporalExtractionPipel
 
     AggregateBuilder aggregateBuilder = getLightweightPreprocessorAggregateBuilder();
     aggregateBuilder.add(EventAnnotator.createAnnotatorDescription(new File(options.getEventModelDirectory())));
-    aggregateBuilder.add(BackwardsTimeAnnotator.createAnnotatorDescription(options.getTimeModelDirectory()
+ "/model.jar"));
-    aggregateBuilder.add(EventTimeRelationAnnotator.createAnnotatorDescription(new File(options.getEventTimeRelationModelDirectory())));
+    aggregateBuilder.add(BackwardsTimeAnnotator.createAnnotatorDescription(options.getTimeModelDirectory()
+ File.pathSeparator + "model.jar"));
+    aggregateBuilder.add(EventTimeRelationAnnotator.createAnnotatorDescription(options.getEventTimeRelationModelDirectory()
+ File.separator + "model.jar"));
     if(options.getEventEventRelationModelDirectory()!=null){
-      aggregateBuilder.add(EventEventRelationAnnotator.createAnnotatorDescription(options.getEventEventRelationModelDirectory()
+ "/model.jar"));
+      aggregateBuilder.add(EventEventRelationAnnotator.createAnnotatorDescription(options.getEventEventRelationModelDirectory()
+ File.separator + "model.jar"));
     }
+    
+    //aggregateBuilder.createAggregateDescription().toXML(new FileWriter("desc/analysis_engine/TemporalAggregateUMLSPipeline.xml"));
     AnalysisEngine xWriter = getXMIWriter(options.getOutputDirectory());
     
     SimplePipeline.runPipeline(

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java?rev=1605587&r1=1605586&r2=1605587&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java
Wed Jun 25 20:16:49 2014
@@ -23,6 +23,7 @@ import java.io.File;
 import org.apache.ctakes.chunker.ae.Chunker;
 import org.apache.ctakes.chunker.ae.DefaultChunkCreator;
 import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
+import org.apache.ctakes.clinicalpipeline.ClinicalPipelineFactory;
 import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
 import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
 import org.apache.ctakes.core.ae.SentenceDetector;
@@ -78,165 +79,30 @@ public abstract class TemporalExtraction
   protected static AggregateBuilder getPreprocessorAggregateBuilder()
       throws Exception {
     AggregateBuilder aggregateBuilder = new AggregateBuilder();
-    
-    // identify segments; use simple segment annotator on non-mayo notes
-    // aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
-    
-    // identify sentences
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-            SentenceDetector.class,
-            SentenceDetector.SD_MODEL_FILE_PARAM,
-            "org/apache/ctakes/core/sentdetect/sd-med-model.zip"));
-    // identify tokens
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
-    // merge some tokens
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ContextDependentTokenizerAnnotator.class));
-
-    // identify part-of-speech tags
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        POSTagger.class,
-        TypeSystemDescriptionFactory.createTypeSystemDescription(),
-        TypePrioritiesFactory.createTypePriorities(Segment.class, Sentence.class, BaseToken.class),
-        POSTagger.POS_MODEL_FILE_PARAM,
-        "org/apache/ctakes/postagger/models/mayo-pos.zip"));
-
-    // identify chunks
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        Chunker.class,
-        Chunker.CHUNKER_MODEL_FILE_PARAM,
-        FileLocator.locateFile("org/apache/ctakes/chunker/models/chunker-model.zip"),
-        Chunker.CHUNKER_CREATOR_CLASS_PARAM,
-        DefaultChunkCreator.class));
-
-    // identify UMLS named entities
-
-    // adjust NP in NP NP to span both
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        1));
-    // adjust NP in NP PP NP to span all three
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        ChunkAdjuster.class,
-        ChunkAdjuster.PARAM_CHUNK_PATTERN,
-        new String[] { "NP", "PP", "NP" },
-        ChunkAdjuster.PARAM_EXTEND_TO_INCLUDE_TOKEN,
-        2));
-    // add lookup windows for each NP
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
-    // maximize lookup windows
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveEnclosedLookupWindows.class));
-    // add UMLS on top of lookup windows
-    aggregateBuilder.add(UmlsDictionaryLookupAnnotator.createAnnotatorDescription());
-
-    // add lvg annotator
-    String[] XeroxTreebankMap = {
-        "adj|JJ",
-        "adv|RB",
-        "aux|AUX",
-        "compl|CS",
-        "conj|CC",
-        "det|DET",
-        "modal|MD",
-        "noun|NN",
-        "prep|IN",
-        "pron|PRP",
-        "verb|VB" };
-    String[] ExclusionSet = {
-        "and",
-        "And",
-        "by",
-        "By",
-        "for",
-        "For",
-        "in",
-        "In",
-        "of",
-        "Of",
-        "on",
-        "On",
-        "the",
-        "The",
-        "to",
-        "To",
-        "with",
-        "With" };
-    AnalysisEngineDescription lvgAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
-        LvgAnnotator.class,
-        "UseSegments",
-        false,
-        "SegmentsToSkip",
-        new String[0],
-        "UseCmdCache",
-        false,
-        "CmdCacheFileLocation",
-        "/org/apache/ctakes/lvg/2005_norm.voc",
-        "CmdCacheFrequencyCutoff",
-        20,
-        "ExclusionSet",
-        ExclusionSet,
-        "XeroxTreebankMap",
-        XeroxTreebankMap,
-        "LemmaCacheFileLocation",
-        "/org/apache/ctakes/lvg/2005_lemma.voc",
-        "UseLemmaCache",
-        false,
-        "LemmaCacheFrequencyCutoff",
-        20,
-        "PostLemmas",
-        true,
-        "LvgCmdApi",
-        ExternalResourceFactory.createExternalResourceDescription(
-            LvgCmdApiResourceImpl.class,
-            new File(LvgCmdApiResourceImpl.class.getResource(
-                "/org/apache/ctakes/lvg/data/config/lvg.properties").toURI())));
-    aggregateBuilder.add(lvgAnnotator);
-
-    // add dependency parser
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPDependencyParserAE.class));
-
+    aggregateBuilder.add(ClinicalPipelineFactory.getDefaultPipeline());
     // add semantic role labeler
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPSemanticRoleLabelerAE.class));
-
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class));
-    
     return aggregateBuilder;
   }
   
   protected static AggregateBuilder getLightweightPreprocessorAggregateBuilder() throws Exception{
     AggregateBuilder aggregateBuilder = new AggregateBuilder();
     
+    /** Consider using ClinicalPipelineFactory.getDefaultPipeline()
+     * 
+     */
     // identify segments; use simple segment annotator on non-mayo notes
     // aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
     
-    // identify sentences
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-            SentenceDetector.class,
-            SentenceDetector.SD_MODEL_FILE_PARAM,
-            "org/apache/ctakes/core/sentdetect/sd-med-model.zip"));
-    // identify tokens
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
-    // merge some tokens
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ContextDependentTokenizerAnnotator.class));
-
-    // identify part-of-speech tags
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        POSTagger.class,
-        TypeSystemDescriptionFactory.createTypeSystemDescription(),
-        TypePrioritiesFactory.createTypePriorities(Segment.class, Sentence.class, BaseToken.class),
-        POSTagger.POS_MODEL_FILE_PARAM,
-        "org/apache/ctakes/postagger/models/mayo-pos.zip"));
-
-    // add dependency parser
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPDependencyParserAE.class));
-
-    // add semantic role labeler
+    aggregateBuilder.add(SimpleSegmentAnnotator.createAnnotatorDescription());
+    aggregateBuilder.add(SentenceDetector.createAnnotatorDescription());
+    aggregateBuilder.add(TokenizerAnnotatorPTB.createAnnotatorDescription());
+    aggregateBuilder.add(ContextDependentTokenizerAnnotator.createAnnotatorDescription());
+    aggregateBuilder.add(POSTagger.createAnnotatorDescription());
+    aggregateBuilder.add(Chunker.createAnnotatorDescription());
+    aggregateBuilder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPSemanticRoleLabelerAE.class));
-
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class));
 
     return aggregateBuilder;

Modified: ctakes/trunk/ctakes-temporal/src/test/java/org/apach/ctakes/temporal/ae/ContextualModalityAnnotatorTest.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/test/java/org/apach/ctakes/temporal/ae/ContextualModalityAnnotatorTest.java?rev=1605587&r1=1605586&r2=1605587&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/test/java/org/apach/ctakes/temporal/ae/ContextualModalityAnnotatorTest.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/test/java/org/apach/ctakes/temporal/ae/ContextualModalityAnnotatorTest.java
Wed Jun 25 20:16:49 2014
@@ -20,6 +20,7 @@ package org.apach.ctakes.temporal.ae;
 
 import static org.junit.Assert.*;
 
+import java.io.FileWriter;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -28,8 +29,10 @@ import org.apache.ctakes.clinicalpipelin
 import org.apache.ctakes.clinicalpipeline.ClinicalPipelineFactory.CopyNPChunksToLookupWindowAnnotations;
 import org.apache.ctakes.clinicalpipeline.ClinicalPipelineFactory.RemoveEnclosedLookupWindows;
 import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
+import org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator;
 import org.apache.ctakes.temporal.ae.BackwardsTimeAnnotator;
 import org.apache.ctakes.temporal.ae.ContextualModalityAnnotator;
+import org.apache.ctakes.temporal.ae.DocTimeRelAnnotator;
 import org.apache.ctakes.temporal.ae.EventAnnotator;
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -45,6 +48,7 @@ import org.uimafit.factory.AnalysisEngin
 import org.uimafit.factory.JCasFactory;
 import org.uimafit.pipeline.SimplePipeline;
 import org.uimafit.util.JCasUtil;
+import org.xml.sax.SAXException;
 
 public class ContextualModalityAnnotatorTest {
 
@@ -52,7 +56,7 @@ public class ContextualModalityAnnotator
 	private Logger LOGGER = Logger.getLogger(getClass().getName());
 
 	@Test
-	public void testPipeline() throws UIMAException, IOException {
+	public void testPipeline() throws UIMAException, IOException, SAXException {
 
 		String note = "The patient is a 55-year-old man referred by Dr. Good for recently diagnosed
colorectal cancer.  "
 				+ "The patient was well till 6 months ago, when he started having a little blood with
stool.";
@@ -69,7 +73,7 @@ public class ContextualModalityAnnotator
 		// Commented out the Dictionary lookup for the test
 		// Uncomment and set -Dctakes.umlsuser and -Dctakes.umlspw env params if
 		// needed
-		// builder.add(UmlsDictionaryLookupAnnotator.createAnnotatorDescription());
+		//builder.add(UmlsDictionaryLookupAnnotator.createAnnotatorDescription());
 		builder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());
 
 		// Add BackwardsTimeAnnotator
@@ -78,10 +82,16 @@ public class ContextualModalityAnnotator
 		// Add EventAnnotator
 		builder.add(EventAnnotator
 				.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/eventannotator/model.jar"));
-		// Add Document Time Relative Annotator
+		// Add ContextualModalityAnnotator
 		builder.add(ContextualModalityAnnotator
 				.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/contextualmodality/model.jar"));
+		
+		// Add DocTimeRelAnnotator
+		builder.add(DocTimeRelAnnotator
+				.createAnnotatorDescription("/org/apache/ctakes/temporal/ae/doctimerel/model.jar"));
 
+		//builder.createAggregateDescription().toXML(new FileWriter("desc/analysis_engine/TemporalAggregateUMLSPipeline.xml"));
+		
 		SimplePipeline.runPipeline(jcas, builder.createAggregateDescription());
 
 		Collection<EventMention> mentions = JCasUtil.select(jcas,



Mime
View raw message