ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chen...@apache.org
Subject svn commit: r1466216 - in /ctakes/trunk: ctakes-clinical-pipeline/desc/analysis_engine/ ctakes-pos-tagger-res/src/main/resources/org/apache/ctakes/postagger/models/ ctakes-pos-tagger/ ctakes-pos-tagger/desc/ ctakes-pos-tagger/src/main/java/org/apache/c...
Date Tue, 09 Apr 2013 20:26:04 GMT
Author: chenpei
Date: Tue Apr  9 20:26:03 2013
New Revision: 1466216

URL: http://svn.apache.org/r1466216
Log:
CTAKES-186 - Add ClearNLP POSTagger as optional AE

Added:
    ctakes/trunk/ctakes-pos-tagger-res/src/main/resources/org/apache/ctakes/postagger/models/mayo-en-pos-1.3.0.jar
  (with props)
    ctakes/trunk/ctakes-pos-tagger/desc/ClearNLPPOSTagger.xml
    ctakes/trunk/ctakes-pos-tagger/src/main/java/org/apache/ctakes/postagger/ClearNLPPOSTaggerAE.java
Modified:
    ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregateCdaProcessor.xml
    ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregateCdaUMLSProcessor.xml
    ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextProcessor.xml
    ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml
    ctakes/trunk/ctakes-pos-tagger/pom.xml

Modified: ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregateCdaProcessor.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregateCdaProcessor.xml?rev=1466216&r1=1466215&r2=1466216&view=diff
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregateCdaProcessor.xml (original)
+++ ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregateCdaProcessor.xml Tue
Apr  9 20:26:03 2013
@@ -53,6 +53,11 @@
 <delegateAnalysisEngine key="POSTagger">
 <import location="../../../ctakes-pos-tagger/desc/POSTagger.xml"/>
 </delegateAnalysisEngine>
+<!-- 
+<delegateAnalysisEngine key="ClearPOSTagger">
+<import location="../../../ctakes-pos-tagger/desc/ClearNLPPOSTagger.xml"/>
+</delegateAnalysisEngine>
+ -->
 <delegateAnalysisEngine key="StatusAnnotator">
 <import location="../../../ctakes-ne-contexts/desc/StatusAnnotator.xml"/>
 </delegateAnalysisEngine>
@@ -104,6 +109,7 @@
 <node>LvgAnnotator</node>
 <node>ContextDependentTokenizerAnnotator</node>
 <node>POSTagger</node>
+<!-- <node>ClearPOSTagger</node>  -->
 <node>Chunker</node>
 <node>AdjustNounPhraseToIncludeFollowingNP</node>
 <node>AdjustNounPhraseToIncludeFollowingPPNP</node>

Modified: ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregateCdaUMLSProcessor.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregateCdaUMLSProcessor.xml?rev=1466216&r1=1466215&r2=1466216&view=diff
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregateCdaUMLSProcessor.xml
(original)
+++ ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregateCdaUMLSProcessor.xml
Tue Apr  9 20:26:03 2013
@@ -53,6 +53,11 @@
 <delegateAnalysisEngine key="POSTagger">
 <import location="../../../ctakes-pos-tagger/desc/POSTagger.xml"/>
 </delegateAnalysisEngine>
+<!-- 
+<delegateAnalysisEngine key="ClearPOSTagger">
+<import location="../../../ctakes-pos-tagger/desc/ClearNLPPOSTagger.xml"/>
+</delegateAnalysisEngine>
+ -->
 <delegateAnalysisEngine key="StatusAnnotator">
 <import location="../../../ctakes-ne-contexts/desc/StatusAnnotator.xml"/>
 </delegateAnalysisEngine>
@@ -107,6 +112,7 @@ Please update DictionaryLookupAnnotatorU
 <node>LvgAnnotator</node>
 <node>ContextDependentTokenizerAnnotator</node>
 <node>POSTagger</node>
+<!-- <node>ClearPOSTagger</node>  -->
 <node>Chunker</node>
 <node>AdjustNounPhraseToIncludeFollowingNP</node>
 <node>AdjustNounPhraseToIncludeFollowingPPNP</node>

Modified: ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextProcessor.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextProcessor.xml?rev=1466216&r1=1466215&r2=1466216&view=diff
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextProcessor.xml
(original)
+++ ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextProcessor.xml
Tue Apr  9 20:26:03 2013
@@ -47,6 +47,11 @@
 <delegateAnalysisEngine key="POSTagger">
 <import location="../../../ctakes-pos-tagger/desc/POSTagger.xml"/>
 </delegateAnalysisEngine>
+<!-- 
+<delegateAnalysisEngine key="ClearPOSTagger">
+<import location="../../../ctakes-pos-tagger/desc/ClearNLPPOSTagger.xml"/>
+</delegateAnalysisEngine>
+ -->
 <delegateAnalysisEngine key="StatusAnnotator">
 <import location="../../../ctakes-ne-contexts/desc/StatusAnnotator.xml"/>
 </delegateAnalysisEngine>
@@ -118,6 +123,7 @@ Includes adjusting NP chunks to include 
 <node>LvgAnnotator</node>
 <node>ContextDependentTokenizerAnnotator</node>
 <node>POSTagger</node>
+<!-- <node>ClearPOSTagger</node>  -->
 <node>Chunker</node>
 <node>AdjustNounPhraseToIncludeFollowingNP</node>
 <node>AdjustNounPhraseToIncludeFollowingPPNP</node>

Modified: ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml?rev=1466216&r1=1466215&r2=1466216&view=diff
==============================================================================
--- ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml
(original)
+++ ctakes/trunk/ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml
Tue Apr  9 20:26:03 2013
@@ -62,6 +62,11 @@
     <delegateAnalysisEngine key="POSTagger">
       <import location="../../../ctakes-pos-tagger/desc/POSTagger.xml"/>
     </delegateAnalysisEngine>
+	<!-- 
+	<delegateAnalysisEngine key="ClearPOSTagger">
+	<import location="../../../ctakes-pos-tagger/desc/ClearNLPPOSTagger.xml"/>
+	</delegateAnalysisEngine>
+	 -->    
     <delegateAnalysisEngine key="LvgAnnotator">
       <import location="../../../ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml"/>
     </delegateAnalysisEngine>
@@ -119,6 +124,7 @@ and requires an UMLS license.  Please up
         <node>LvgAnnotator</node>
         <node>ContextDependentTokenizerAnnotator</node>
         <node>POSTagger</node>
+		<!-- <node>ClearPOSTagger</node>  -->        
         <node>Chunker</node>
         <node>AdjustNounPhraseToIncludeFollowingNP</node>
         <node>AdjustNounPhraseToIncludeFollowingPPNP</node>

Added: ctakes/trunk/ctakes-pos-tagger-res/src/main/resources/org/apache/ctakes/postagger/models/mayo-en-pos-1.3.0.jar
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-pos-tagger-res/src/main/resources/org/apache/ctakes/postagger/models/mayo-en-pos-1.3.0.jar?rev=1466216&view=auto
==============================================================================
Binary file - no diff available.

Propchange: ctakes/trunk/ctakes-pos-tagger-res/src/main/resources/org/apache/ctakes/postagger/models/mayo-en-pos-1.3.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: ctakes/trunk/ctakes-pos-tagger/desc/ClearNLPPOSTagger.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-pos-tagger/desc/ClearNLPPOSTagger.xml?rev=1466216&view=auto
==============================================================================
--- ctakes/trunk/ctakes-pos-tagger/desc/ClearNLPPOSTagger.xml (added)
+++ ctakes/trunk/ctakes-pos-tagger/desc/ClearNLPPOSTagger.xml Tue Apr  9 20:26:03 2013
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <primitive>true</primitive>
+    <annotatorImplementationName>org.apache.ctakes.postagger.ClearNLPPOSTaggerAE</annotatorImplementationName>
+    <analysisEngineMetaData>
+        <name>ClearNLPPOSTaggerAE</name>
+        <description>Descriptor automatically generated by uimaFIT</description>
+        <version>unknown</version>
+        <vendor>org.apache.ctakes.dependency.parser.ae</vendor>
+        <configurationParameters>
+            <configurationParameter>
+                <name>POSModelFileName</name>
+                <description>This parameter provides the file name of the Clear POS
model required by the factory method provided by ClearNLPUtil.  If not specified, this analysis
engine will use a default model from the resources directory</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+        </configurationParameters>
+
+        <typeSystemDescription>
+            <imports>
+        <import name="org.apache.ctakes.typesystem.types.TypeSystem"/>
+            </imports>
+        </typeSystemDescription>
+        <fsIndexCollection/>
+        <capabilities>
+            <capability>
+                <inputs>
+                    <feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:partOfSpeech</feature>
+                    <feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:normalizedForm</feature>
+                    <feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:tokenNumber</feature>
+                    <feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:end</feature>
+                    <feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:begin</feature>
+                </inputs>
+                <outputs/>
+                <languagesSupported/>
+            </capability>
+        </capabilities>
+        <operationalProperties>
+            <modifiesCas>true</modifiesCas>
+            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+            <outputsNewCASes>false</outputsNewCASes>
+        </operationalProperties>
+    </analysisEngineMetaData>
+</analysisEngineDescription>

Modified: ctakes/trunk/ctakes-pos-tagger/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-pos-tagger/pom.xml?rev=1466216&r1=1466215&r2=1466216&view=diff
==============================================================================
--- ctakes/trunk/ctakes-pos-tagger/pom.xml (original)
+++ ctakes/trunk/ctakes-pos-tagger/pom.xml Tue Apr  9 20:26:03 2013
@@ -58,6 +58,10 @@
 			<artifactId>opennlp-tools</artifactId>
 		</dependency>
 		<dependency>
+			<groupId>com.googlecode.clearnlp</groupId>
+			<artifactId>clearnlp</artifactId>
+		</dependency>		
+		<dependency>
 			<groupId>jdom</groupId>
 			<artifactId>jdom</artifactId>
 		</dependency>

Added: ctakes/trunk/ctakes-pos-tagger/src/main/java/org/apache/ctakes/postagger/ClearNLPPOSTaggerAE.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-pos-tagger/src/main/java/org/apache/ctakes/postagger/ClearNLPPOSTaggerAE.java?rev=1466216&view=auto
==============================================================================
--- ctakes/trunk/ctakes-pos-tagger/src/main/java/org/apache/ctakes/postagger/ClearNLPPOSTaggerAE.java
(added)
+++ ctakes/trunk/ctakes-pos-tagger/src/main/java/org/apache/ctakes/postagger/ClearNLPPOSTaggerAE.java
Tue Apr  9 20:26:03 2013
@@ -0,0 +1,127 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.postagger;
+
+import java.net.URI;
+import java.net.URL;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.descriptor.ConfigurationParameter;
+import org.uimafit.descriptor.TypeCapability;
+import org.uimafit.util.JCasUtil;
+
+import com.googlecode.clearnlp.component.AbstractComponent;
+import com.googlecode.clearnlp.dependency.DEPNode;
+import com.googlecode.clearnlp.dependency.DEPTree;
+import com.googlecode.clearnlp.engine.EngineGetter;
+import com.googlecode.clearnlp.nlp.NLPLib;
+import com.googlecode.clearnlp.reader.AbstractReader;
+
+/**
+ * <br>
+* This class provides a UIMA wrapper for the CLEAR POSTagger. This pos tagger is available
here:
+ * <p>
+ * http://code.google.com/p/clearnlp
+ * <p>
+ * 
+ */
+@TypeCapability(
+		inputs = { 
+				"org.apache.ctakes.typesystem.type.syntax.BaseToken:partOfSpeech",
+				"org.apache.ctakes.typesystem.type.syntax.BaseToken:normalizedForm",
+				"org.apache.ctakes.typesystem.type.syntax.BaseToken:tokenNumber",
+				"org.apache.ctakes.typesystem.type.syntax.BaseToken:end",
+				"org.apache.ctakes.typesystem.type.syntax.BaseToken:begin"
+		})
+public class ClearNLPPOSTaggerAE extends JCasAnnotator_ImplBase {
+
+	final String language = AbstractReader.LANG_EN;
+	public Logger logger = Logger.getLogger(getClass().getName());
+	
+	// Default model values
+	public static final String DEFAULT_MODEL_FILE_NAME = "org/apache/ctakes/postagger/models/mayo-en-pos-1.3.0.jar";
+
+	
+	
+	// Configuration Parameters 
+	public static final String PARAM_POS_MODEL_FILE_NAME = "POSModelFileName";
+	@ConfigurationParameter(
+			name = PARAM_POS_MODEL_FILE_NAME,
+			description = "This parameter provides the file name of the Clear POS model required "
+
+					      "by the factory method provided by ClearNLPUtil.  If not specified, this " +
+					      "analysis engine will use a default model from the resources directory")
+	protected URI posModelUri;
+
+
+
+	protected AbstractComponent postagger;
+
+
+	@Override
+	public void initialize(UimaContext context) throws ResourceInitializationException {
+		super.initialize(context);
+
+		try {
+
+            	URL parserModelURL = (this.posModelUri == null)
+                    ? this.getClass().getClassLoader().getResource(DEFAULT_MODEL_FILE_NAME).toURI().toURL()
+                    : this.posModelUri.toURL();
+                 
+                    this.postagger = EngineGetter.getComponent(parserModelURL.openStream(),
this.language, NLPLib.MODE_POS);
+
+        } catch (Exception e) {
+            throw new ResourceInitializationException(e);
+        }
+	}
+
+	@Override
+	public void process(JCas jCas) throws AnalysisEngineProcessException {
+		for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+			List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
+			DEPTree tree = new DEPTree();
+
+			// Convert CAS data into structures usable by ClearNLP
+			for (int i = 0; i < tokens.size(); i++) {
+				BaseToken token = tokens.get(i);
+				DEPNode node = new DEPNode(i+1, token.getCoveredText());
+				tree.add(node);
+			}
+
+			// Run parser and convert output back to CAS friendly data types
+			postagger.process(tree);
+			
+			for (int i = 0; i < tokens.size(); i++) {
+				BaseToken token = tokens.get(i);
+				DEPNode node = tree.get(i+1);
+				token.setPartOfSpeech(node.pos);
+			}
+			
+		}
+		
+		
+	}
+}



Mime
View raw message