ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1650691 - /ctakes/trunk/ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsOverlapLookupAnnotator.xml
Date Fri, 09 Jan 2015 22:46:43 GMT
Author: seanfinan
Date: Fri Jan  9 22:46:42 2015
New Revision: 1650691

URL: http://svn.apache.org/r1650691
Log:
Adding descriptor for UmlsOverlapLookupAnnotator

Added:
    ctakes/trunk/ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsOverlapLookupAnnotator.xml

Added: ctakes/trunk/ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsOverlapLookupAnnotator.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsOverlapLookupAnnotator.xml?rev=1650691&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsOverlapLookupAnnotator.xml
(added)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsOverlapLookupAnnotator.xml
Fri Jan  9 22:46:42 2015
@@ -0,0 +1,166 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+<taeDescription xmlns="http://uima.apache.org/resourceSpecifier">
+   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+   <primitive>true</primitive>
+   <annotatorImplementationName>org.apache.ctakes.dictionary.lookup2.ae.OverlapJCasTermAnnotator</annotatorImplementationName>
+   <analysisEngineMetaData>
+      <name>UmlsOverlapLookupAnnotator</name>
+      <description>Lookup Annotator descriptor for Snomed Terms which are in a Rare
Word -format Database, Ctakes
+         and uses overlap spans for matching in addition to exact spans
+      </description>
+      <version/>
+      <vendor/>
+
+      <configurationParameters>
+         <!-- windowAnnotations and exclusionTags were originally for the LookupConsumer,
but now apply to the annotator -->
+         <configurationParameter>
+            <name>windowAnnotations</name>
+            <description>Type of window to use for lookup</description>
+            <type>String</type>
+            <multiValued>false</multiValued>
+            <mandatory>true</mandatory>
+         </configurationParameter>
+         <configurationParameter>
+            <name>exclusionTags</name>
+            <description>Parts of speech to ignore when considering lookup tokens</description>
+            <type>String</type>
+            <multiValued>false</multiValued>
+            <mandatory>false</mandatory>
+         </configurationParameter>
+         <configurationParameter>
+            <name>minimumSpan</name>
+            <description>Minimum required span length of tokens to use for lookup.
Default is 3</description>
+            <type>String</type>
+            <multiValued>false</multiValued>
+            <mandatory>false</mandatory>
+         </configurationParameter>
+         <configurationParameter>
+            <name>totalTokenSkips</name>
+            <description>Specifies the number of total tokens that can be skipped.
Default is 4</description>
+            <type>String</type>
+            <multiValued>false</multiValued>
+            <mandatory>false</mandatory>
+         </configurationParameter>
+         <configurationParameter>
+            <name>consecutiveSkips</name>
+            <description>Specifies the number of consecutive non-comma tokens that
can be skipped. Default is 2</description>
+            <type>String</type>
+            <multiValued>false</multiValued>
+            <mandatory>false</mandatory>
+         </configurationParameter>
+      </configurationParameters>
+
+      <configurationParameterSettings>
+         <nameValuePair>
+            <name>windowAnnotations</name>
+            <value>
+               <!--  LookupWindowAnnotation is supposed to be a refined Noun Phrase  -->
+               <!--<string>org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation</string>-->
+               <!--  In some instances LookupWindowAnnotation is missing tokens and Sentence
can be used -->
+               <string>org.apache.ctakes.typesystem.type.textspan.Sentence</string>
+            </value>
+         </nameValuePair>
+         <nameValuePair>
+            <name>exclusionTags</name>
+            <value>
+               <string>VB,VBD,VBG,VBN,VBP,VBZ,CC,CD,DT,EX,IN,LS,MD,PDT,POS,PP,PP$,PRP,PRP$,RP,TO,WDT,WP,WPS,WRB</string>
+            </value>
+         </nameValuePair>
+         <nameValuePair>
+            <name>minimumSpan</name>
+            <value>
+               <string>3</string>
+            </value>
+         </nameValuePair>
+         <nameValuePair>
+            <name>totalTokenSkips</name>
+            <value>
+               <string>4</string>
+            </value>
+         </nameValuePair>
+         <nameValuePair>
+            <name>consecutiveSkips</name>
+            <value>
+               <string>2</string>
+            </value>
+         </nameValuePair>
+      </configurationParameterSettings>
+
+      <typeSystemDescription>
+         <imports>
+         </imports>
+      </typeSystemDescription>
+      <typePriorities/>
+      <fsIndexCollection/>
+      <capabilities>
+         <capability>
+            <inputs>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.BaseToken</type>
+               <!--<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation</type>-->
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.Sentence</type>
+            </inputs>
+            <outputs>
+               <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation</type>
+            </outputs>
+            <languagesSupported/>
+         </capability>
+      </capabilities>
+      <operationalProperties>
+         <modifiesCas>true</modifiesCas>
+         <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+         <outputsNewCASes>false</outputsNewCASes>
+      </operationalProperties>
+   </analysisEngineMetaData>
+
+   <externalResourceDependencies>
+      <!-- DictionaryDescriptor is a relatively poorly-named xml that contains parms for
dictionary files, dbs, etc. -->
+      <!-- why aren't such things just defined here?  The obvious answer is -->
+      <externalResourceDependency>
+         <key>DictionaryDescriptor</key>
+         <description/>
+         <interfaceName>org.apache.ctakes.core.resource.FileResource</interfaceName>
+         <optional>false</optional>
+      </externalResourceDependency>
+   </externalResourceDependencies>
+
+   <resourceManagerConfiguration>
+      <externalResources>
+         <externalResource>
+            <!-- The Binding is below, for DictionaryDescriptor = DictionaryDescriptorFile
-->
+            <name>DictionaryDescriptorFile</name>
+            <description/>
+            <fileResourceSpecifier>
+               <fileUrl>file:org/apache/ctakes/dictionary/lookup/fast/cTakesHsql.xml</fileUrl>
+            </fileResourceSpecifier>
+            <implementationName>org.apache.ctakes.core.resource.FileResourceImpl</implementationName>
+         </externalResource>
+      </externalResources>
+
+      <externalResourceBindings>
+         <externalResourceBinding>
+            <key>DictionaryDescriptor</key>
+            <resourceName>DictionaryDescriptorFile</resourceName>
+         </externalResourceBinding>
+      </externalResourceBindings>
+   </resourceManagerConfiguration>
+</taeDescription>



Mime
View raw message