ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1585978 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines: FullTemporalExtractionPipeline.java TemporalExtractionPipeline_ImplBase.java TimeExtractionPipeline.java
Date Wed, 09 Apr 2014 14:00:25 GMT
Author: tmill
Date: Wed Apr  9 14:00:25 2014
New Revision: 1585978

URL: http://svn.apache.org/r1585978
Log:
CTAKES-82: Clean-up of some code in the default temporal pipelines. Added simpler preprocessor
w/o umls stuff.

Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TimeExtractionPipeline.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java?rev=1585978&r1=1585977&r2=1585978&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/FullTemporalExtractionPipeline.java
Wed Apr  9 14:00:25 2014
@@ -59,17 +59,14 @@ public class FullTemporalExtractionPipel
         FilesInDirectoryCollectionReader.PARAM_INPUTDIR,
         options.getInputDirectory());
 
-    AggregateBuilder aggregateBuilder = getPreprocessorAggregateBuilder();
+    AggregateBuilder aggregateBuilder = getLightweightPreprocessorAggregateBuilder();
     aggregateBuilder.add(EventAnnotator.createAnnotatorDescription(new File(options.getEventModelDirectory())));
     aggregateBuilder.add(BackwardsTimeAnnotator.createAnnotatorDescription(new File(options.getTimeModelDirectory())));
     aggregateBuilder.add(EventTimeRelationAnnotator.createAnnotatorDescription(new File(options.getEventTimeRelationModelDirectory())));
     if(options.getEventEventRelationModelDirectory()!=null){
       aggregateBuilder.add(EventEventRelationAnnotator.createAnnotatorDescription(new File(options.getEventEventRelationModelDirectory())));
     }
-    AnalysisEngine xWriter = AnalysisEngineFactory.createPrimitive(
-        XWriter.class,
-        XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
-        options.getOutputDirectory());
+    AnalysisEngine xWriter = getXMIWriter(options.getOutputDirectory());
     
     SimplePipeline.runPipeline(
         collectionReader,

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java?rev=1585978&r1=1585977&r2=1585978&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TemporalExtractionPipeline_ImplBase.java
Wed Apr  9 14:00:25 2014
@@ -7,7 +7,6 @@ import org.apache.ctakes.chunker.ae.Defa
 import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
 import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
 import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
-import org.apache.ctakes.core.ae.OverlapAnnotator;
 import org.apache.ctakes.core.ae.SentenceDetector;
 import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
 import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
@@ -15,6 +14,7 @@ import org.apache.ctakes.core.resource.F
 import org.apache.ctakes.core.resource.FileResourceImpl;
 import org.apache.ctakes.core.resource.JdbcConnectionResourceImpl;
 import org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
 import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
 import org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE;
 import org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator;
@@ -22,11 +22,16 @@ import org.apache.ctakes.lvg.ae.LvgAnnot
 import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
 import org.apache.ctakes.postagger.POSTagger;
 import org.apache.ctakes.temporal.eval.Evaluation_ImplBase.CopyNPChunksToLookupWindowAnnotations;
+import org.apache.ctakes.temporal.eval.Evaluation_ImplBase.RemoveEnclosedLookupWindows;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
 import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.uimafit.component.xwriter.XWriter;
+import org.uimafit.component.xwriter.XWriterFileNamer;
 import org.uimafit.factory.AggregateBuilder;
 import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.ExternalResourceFactory;
@@ -105,18 +110,7 @@ public abstract class TemporalExtraction
     // add lookup windows for each NP
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(CopyNPChunksToLookupWindowAnnotations.class));
     // maximize lookup windows
-    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
-        OverlapAnnotator.class,
-        "A_ObjectClass",
-        LookupWindowAnnotation.class,
-        "B_ObjectClass",
-        LookupWindowAnnotation.class,
-        "OverlapType",
-        "A_ENV_B",
-        "ActionType",
-        "DELETE",
-        "DeleteAction",
-        new String[] { "selector=B" }));
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(RemoveEnclosedLookupWindows.class));
     // add UMLS on top of lookup windows
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
         UmlsDictionaryLookupAnnotator.class,
@@ -228,4 +222,57 @@ public abstract class TemporalExtraction
     
     return aggregateBuilder;
   }
+  
+  protected static AggregateBuilder getLightweightPreprocessorAggregateBuilder() throws Exception{
+    AggregateBuilder aggregateBuilder = new AggregateBuilder();
+    
+    // identify segments; use simple segment annotator on non-mayo notes
+    // aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SegmentsFromBracketedSectionTagsAnnotator.class));
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
+    
+    // identify sentences
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+            SentenceDetector.class,
+            SentenceDetector.SD_MODEL_FILE_PARAM,
+            "org/apache/ctakes/core/sentdetect/sd-med-model.zip"));
+    // identify tokens
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
+    // merge some tokens
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ContextDependentTokenizerAnnotator.class));
+
+    // identify part-of-speech tags
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+        POSTagger.class,
+        TypeSystemDescriptionFactory.createTypeSystemDescription(),
+        TypePrioritiesFactory.createTypePriorities(Segment.class, Sentence.class, BaseToken.class),
+        POSTagger.POS_MODEL_FILE_PARAM,
+        "org/apache/ctakes/postagger/models/mayo-pos.zip"));
+
+    // add dependency parser
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPDependencyParserAE.class));
+
+    // add semantic role labeler
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearNLPSemanticRoleLabelerAE.class));
+
+    aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ConstituencyParser.class));
+
+    return aggregateBuilder;
+  }
+  
+  protected static AnalysisEngine getXMIWriter(String outputDirectory) throws ResourceInitializationException{
+    return AnalysisEngineFactory.createPrimitive(
+        XWriter.class,
+        XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
+        outputDirectory,
+        XWriter.PARAM_FILE_NAMER_CLASS_NAME,
+        DocIDFileNamer.class.getName()
+        );
+  }
+  
+  public static class DocIDFileNamer implements XWriterFileNamer {
+    @Override
+    public String nameFile(JCas jCas) {
+      return DocumentIDAnnotationUtil.getDocumentID(jCas);
+    }
+  }
 }

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TimeExtractionPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TimeExtractionPipeline.java?rev=1585978&r1=1585977&r2=1585978&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TimeExtractionPipeline.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/pipelines/TimeExtractionPipeline.java
Wed Apr  9 14:00:25 2014
@@ -24,9 +24,7 @@ import org.apache.ctakes.core.cr.FilesIn
 import org.apache.ctakes.temporal.ae.BackwardsTimeAnnotator;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.collection.CollectionReader;
-import org.uimafit.component.xwriter.XWriter;
 import org.uimafit.factory.AggregateBuilder;
-import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.CollectionReaderFactory;
 import org.uimafit.pipeline.SimplePipeline;
 
@@ -62,11 +60,8 @@ public class TimeExtractionPipeline exte
 		AggregateBuilder aggregateBuilder = getPreprocessorAggregateBuilder();
 		aggregateBuilder.add(BackwardsTimeAnnotator.createAnnotatorDescription(new File(options.getTimeModelDirectory())));
 		
-    AnalysisEngine xWriter = AnalysisEngineFactory.createPrimitive(
-        XWriter.class,
-        XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
-        options.getOutputDirectory());
-		
+    AnalysisEngine xWriter = getXMIWriter(options.getOutputDirectory());
+	
     SimplePipeline.runPipeline(
         collectionReader,
         aggregateBuilder.createAggregate(),



Mime
View raw message