ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From james-mas...@apache.org
Subject svn commit: r1558453 - /ctakes/trunk/ctakes-core/scripts/groovy/cTAKES-clinical-pipeline.groovy
Date Wed, 15 Jan 2014 16:30:14 GMT
Author: james-masanz
Date: Wed Jan 15 16:30:13 2014
New Revision: 1558453

URL: http://svn.apache.org/r1558453
Log:
Add SRL and dep parser (remove TODOs).  Set TypePriorities to ensure don't get odd inconsistent
behaviors like java.lang.RuntimeException: First token in sentence not found!! from CharacterOffsetToLineTokenConverterCtakesImpl.

Modified:
    ctakes/trunk/ctakes-core/scripts/groovy/cTAKES-clinical-pipeline.groovy

Modified: ctakes/trunk/ctakes-core/scripts/groovy/cTAKES-clinical-pipeline.groovy
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/scripts/groovy/cTAKES-clinical-pipeline.groovy?rev=1558453&r1=1558452&r2=1558453&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/scripts/groovy/cTAKES-clinical-pipeline.groovy (original)
+++ ctakes/trunk/ctakes-core/scripts/groovy/cTAKES-clinical-pipeline.groovy Wed Jan 15 16:30:13
2014
@@ -21,6 +21,8 @@ import java.io.File;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.resource.metadata.TypePriorities;
+import org.apache.uima.resource.metadata.TypePriorityList;
 import org.apache.uima.resource.metadata.TypeSystemDescription;
 import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.AggregateBuilder;
@@ -74,6 +76,17 @@ println("Using cTAKES in " + cTAKES_HOME
 println("Instantiating collection reader");
 CollectionReader collectionReader = FilesCollectionReader.getCollectionReader(inputDir);
 
+println "Creating TypePriorities";
+// Add first TypePriorityList
+Class cl1 = org.apache.ctakes.typesystem.type.textspan.Segment;
+Class cl2 = org.apache.ctakes.typesystem.type.textspan.Sentence;
+Class cl3 = org.apache.ctakes.typesystem.type.syntax.BaseToken;
+TypePriorities typePriorities = TypePrioritiesFactory.createTypePriorities(cl1, cl2, cl3);
+
+// Add second TypePriorityList
+TypePriorityList typePriorityList = typePriorities.addPriorityList();
+typePriorityList.addType("org.apache.ctakes.typesystem.type.textspan.Sentence");
+typePriorityList.addType("org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation");
 
 //Build the pipeline to run
 // we assume cTAKES' desc directory is on classpath for those 
@@ -81,7 +94,7 @@ CollectionReader collectionReader = File
 // reference by descriptor (XML file) name
 println("Building pipeline aggregate builder object");
 
-AggregateBuilder aggregateBuilder = new AggregateBuilder();
+AggregateBuilder aggregateBuilder = new AggregateBuilder(null, typePriorities, null);
 
 // Here is the flow from AggregatePlaintextUMLSProcessor.xml in 3.1.1
 /*
@@ -220,7 +233,7 @@ ConfigurationParameterFactory.addConfigu
 ConfigurationParameterFactory.addConfigurationParameters(
 			dictionaryLookupAnnotator,
 			org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator.UMLSUSER_PARAM, 
-			"" // put your UMLS user ID here or set JAVA_OPTS ctakes.umlsuser or see user or install
guide
+			"" // put your UMLS user ID here or set JAVA_OPTS ctakes_umlsuser or see user or install
guide
 	);
 // Commenting out the setting of UMLSPW_PARAM as you probably don't want to put your password
 // in this script so that if you share the script you don't share your password accidentally
@@ -231,11 +244,42 @@ ConfigurationParameterFactory.addConfigu
 //	);
 aggregateBuilder.add(dictionaryLookupAnnotator);
 
-// DependencyParser
-println(" TODO YET *** Adding dependency parser annotator"); // TODO 
-
-// SemanticRoleLabeler        
-println(" TODO YET *** Adding semantic role labeler annotator"); // TODO 
+// DependencyParser - see ClearNLPDependencyParserAE.xml
+println " Adding dependency parser annotator"
+annotatorClass = org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE.class;
+def dependencyParserAnnotator = AnalysisEngineFactory.createPrimitiveDescription(annotatorClass);
+ConfigurationParameterFactory.addConfigurationParameters(
+				dependencyParserAnnotator,
+				org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE.PARAM_PARSER_MODEL_FILE_NAME,
+				org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE.DEFAULT_MODEL_FILE_NAME
+		);
+ConfigurationParameterFactory.addConfigurationParameters(
+				dependencyParserAnnotator,
+				"ParserAlgorithmName",
+				"shift-pop"
+		);
+ConfigurationParameterFactory.addConfigurationParameters(
+				dependencyParserAnnotator,
+				org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE.PARAM_USE_LEMMATIZER,
+				true
+		);
+aggregateBuilder.add(dependencyParserAnnotator);
+
+// SemanticRoleLabeler - see ClearNLPSemanticRoleLabelerAE.xml
+println " Adding semantic role labeler annotator"
+annotatorClass = org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE.class;
+def srlAnnotator = AnalysisEngineFactory.createPrimitiveDescription(annotatorClass);
+ConfigurationParameterFactory.addConfigurationParameters(
+				srlAnnotator,
+				org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE.PARAM_PARSER_MODEL_FILE_NAME,
+				org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE.DEFAULT_SRL_MODEL_FILE_NAME
+		);
+ConfigurationParameterFactory.addConfigurationParameters(
+				srlAnnotator,
+				"UseLemmatizer",
+				true
+		);
+aggregateBuilder.add(srlAnnotator);
 		
 println(" Adding assertion annotators");
 def assertionDescriptorLocation = "ctakes-assertion/desc/AssertionMiniPipelineAnalysisEngine";
// Note createAnalysisEngineDescription expects name to not end in .xml even though filename
actually does



Mime
View raw message