ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1650413 - /ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/CuisOnlyPlaintextUMLSProcessor.xml
Date Thu, 08 Jan 2015 23:02:19 GMT
Author: seanfinan
Date: Thu Jan  8 23:02:18 2015
New Revision: 1650413

URL: http://svn.apache.org/r1650413
Log:
Glorified Grep.  Tiny pipeline that simply looks up Terms based upon umls dictionary, sentence
breaks and part of speech.
It can process 50,000 notes per hour on my circa-vista laptop, reading and writing to file.
 
LVG is commented out.  Depending upon the phase of the moon, this can add a small or large
amount of time the the run.  Including LVG will enable more term recognition.
Negation, Uncertainty, etc. are commented out.
If you want to run any other modules in a pipeline then use another ae as a template.

Added:
    ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/CuisOnlyPlaintextUMLSProcessor.xml

Added: ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/CuisOnlyPlaintextUMLSProcessor.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/CuisOnlyPlaintextUMLSProcessor.xml?rev=1650413&view=auto
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/CuisOnlyPlaintextUMLSProcessor.xml
(added)
+++ ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/CuisOnlyPlaintextUMLSProcessor.xml
Thu Jan  8 23:02:18 2015
@@ -0,0 +1,168 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+   <primitive>false</primitive>
+   <delegateAnalysisEngineSpecifiers>
+      <delegateAnalysisEngine key="SimpleSegmentAnnotator">
+         <import location="SimpleSegmentAnnotator.xml"/>
+      </delegateAnalysisEngine>
+      <delegateAnalysisEngine key="SentenceDetectorAnnotator">
+         <import location="../../../ctakes-core/desc/analysis_engine/SentenceDetectorAnnotator.xml"/>
+      </delegateAnalysisEngine>
+      <delegateAnalysisEngine key="TokenizerAnnotator">
+         <import location="../../../ctakes-core/desc/analysis_engine/TokenizerAnnotator.xml"/>
+      </delegateAnalysisEngine>
+      <!--<delegateAnalysisEngine key="LvgAnnotator">-->
+      <!--<import location="../../../ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml"/>-->
+      <!--</delegateAnalysisEngine>-->
+      <delegateAnalysisEngine key="POSTagger">
+         <import location="../../../ctakes-pos-tagger/desc/POSTagger.xml"/>
+      </delegateAnalysisEngine>
+      <delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
+         <import location="../../../ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsLookupAnnotator.xml"/>
+      </delegateAnalysisEngine>
+      <!--<delegateAnalysisEngine key="GenericCleartkAnalysisEngine">-->
+      <!--<import location="../../../ctakes-assertion/desc/analysis_engine/GenericCleartkAnalysisEngine.xml"/>-->
+      <!--</delegateAnalysisEngine>-->
+      <!--<delegateAnalysisEngine key="HistoryCleartkAnalysisEngine">-->
+      <!--<import location="../../../ctakes-assertion/desc/analysis_engine/HistoryCleartkAnalysisEngine.xml"/>-->
+      <!--</delegateAnalysisEngine>-->
+      <!--<delegateAnalysisEngine key="PolarityCleartkAnalysisEngine">-->
+      <!--<import location="../../../ctakes-assertion/desc/analysis_engine/PolarityCleartkAnalysisEngine.xml"/>-->
+      <!--</delegateAnalysisEngine>-->
+      <!--<delegateAnalysisEngine key="SubjectCleartkAnalysisEngine">-->
+      <!--<import location="../../../ctakes-assertion/desc/analysis_engine/SubjectCleartkAnalysisEngine.xml"/>-->
+      <!--</delegateAnalysisEngine>-->
+      <!--<delegateAnalysisEngine key="UncertaintyCleartkAnalysisEngine">-->
+      <!--<import location="../../../ctakes-assertion/desc/analysis_engine/UncertaintyCleartkAnalysisEngine.xml"/>-->
+      <!--</delegateAnalysisEngine>-->
+   </delegateAnalysisEngineSpecifiers>
+   <analysisEngineMetaData>
+      <name>CuisOnlyUMLSProcessor</name>
+      <description>Runs the smallest pipeline for annotating clinical documents in
plain text format using the
+         built in UMLS (SNOMEDCT and RxNORM) dictionaries.
+         This uses the database in resources/org/apache/ctakes/dictionary/lookup/fast/ctakessnorx
and requires an UMLS license.
+         Please update resources/org/apache/ctakes/dictionary/lookup/fast/cTakesHsql.xml
file with your UMLS username and password.
+      </description>
+      <version/>
+      <vendor/>
+      <configurationParameters searchStrategy="language_fallback">
+         <configurationParameter>
+            <name>SegmentID</name>
+            <description/>
+            <type>String</type>
+            <multiValued>false</multiValued>
+            <mandatory>false</mandatory>
+            <overrides>
+               <parameter>SimpleSegmentAnnotator/SegmentID</parameter>
+            </overrides>
+         </configurationParameter>
+      </configurationParameters>
+      <configurationParameterSettings>
+      </configurationParameterSettings>
+      <flowConstraints>
+         <fixedFlow>
+            <node>SimpleSegmentAnnotator</node>
+            <node>SentenceDetectorAnnotator</node>
+            <node>TokenizerAnnotator</node>
+            <!--<node>LvgAnnotator</node>-->
+            <node>POSTagger</node>
+            <node>DictionaryLookupAnnotatorDB</node>
+            <!--<node>GenericCleartkAnalysisEngine</node>-->
+            <!--<node>HistoryCleartkAnalysisEngine</node>-->
+            <!--<node>PolarityCleartkAnalysisEngine</node>-->
+            <!--<node>SubjectCleartkAnalysisEngine</node>-->
+            <!--<node>UncertaintyCleartkAnalysisEngine</node>-->
+         </fixedFlow>
+      </flowConstraints>
+      <typePriorities>
+         <name>Ordering</name>
+         <description>For subiterator</description>
+         <version>1.0</version>
+         <priorityList>
+            <type>org.apache.ctakes.typesystem.type.textspan.Segment</type>
+            <type>org.apache.ctakes.typesystem.type.textspan.Sentence</type>
+            <type>org.apache.ctakes.typesystem.type.syntax.BaseToken</type>
+         </priorityList>
+         <priorityList>
+            <type>org.apache.ctakes.typesystem.type.textspan.Sentence</type>
+            <type>org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation</type>
+         </priorityList>
+      </typePriorities>
+      <fsIndexCollection/>
+      <capabilities>
+         <capability>
+            <inputs/>
+            <outputs>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.NewlineToken</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.WordToken</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.VP</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.refsem.UmlsConcept</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.UCP</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.TimeAnnotation</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.SymbolToken</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.Sentence</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspanSegment</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.SBAR</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.RangeAnnotation</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.PunctuationToken</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.Property</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.Properties</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.PersonTitleAnnotation</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.PRT</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.PP</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.OntologyConcept</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.NumToken</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.MeasurementAnnotation</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.Lemma</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.LST</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.INTJ</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.FractionAnnotation</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.structured.DocumentID</type>
+               <type allAnnotatorFeatures="true">uima.tcas.DocumentAnnotation</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.DateAnnotation</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.CopySrcAnnotation</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.CopyDestAnnotation</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.ContractionToken</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.ContextAnnotation</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.CONJP</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.BaseToken</type>
+               <type allAnnotatorFeatures="true">uima.cas.AnnotationBase</type>
+               <type allAnnotatorFeatures="true">uima.tcas.Annotation</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.ADVP</type>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.ADJP</type>
+            </outputs>
+            <languagesSupported/>
+         </capability>
+      </capabilities>
+      <operationalProperties>
+         <modifiesCas>true</modifiesCas>
+         <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+         <outputsNewCASes>false</outputsNewCASes>
+      </operationalProperties>
+   </analysisEngineMetaData>
+   <resourceManagerConfiguration/>
+</analysisEngineDescription>



Mime
View raw message