ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chen...@apache.org
Subject svn commit: r1676753 - in /ctakes/trunk: ctakes-core/src/main/java/org/apache/ctakes/core/ci/ ctakes-preprocessor/desc/ ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/a...
Date Wed, 29 Apr 2015 14:38:07 GMT
Author: chenpei
Date: Wed Apr 29 14:38:06 2015
New Revision: 1676753

URL: http://svn.apache.org/r1676753
Log:
CTAKES-347 AggregateCdaProcessor fails with URI is not hierarchical
Not sure how widely CDA as input is used.  Simplified the config input- reusing uimaFIT style
config parameters and use plain strings for path+inputstream instead of FileResource.  These
config files are small and having them shared as a resource is more than it needs to be.

Modified:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ci/HyphenTextModifierImpl.java
    ctakes/trunk/ctakes-preprocessor/desc/CdaCasInitializer.xml
    ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ClinicalNotePreProcessor.java
    ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/DTDloader.java
    ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ae/CdaCasInitializer.java
    ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/test/TestDriver.java
    ctakes/trunk/ctakes-preprocessor/src/test/java/org/apache/ctakes/preprocessor/ClinicalNotePreProcessorTest.java

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ci/HyphenTextModifierImpl.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ci/HyphenTextModifierImpl.java?rev=1676753&r1=1676752&r2=1676753&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ci/HyphenTextModifierImpl.java
(original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ci/HyphenTextModifierImpl.java
Wed Apr 29 14:38:06 2015
@@ -39,6 +39,7 @@ import java.util.Map;
 
 import org.apache.ctakes.core.nlp.tokenizer.Token;
 import org.apache.ctakes.core.nlp.tokenizer.Tokenizer;
+import org.apache.ctakes.core.resource.FileLocator;
 
 
 /**
@@ -59,8 +60,8 @@ public class HyphenTextModifierImpl impl
 		iv_tokenizer = new Tokenizer();
 		BufferedReader br;
 		try {
-			br = new BufferedReader(new FileReader(new File(hyphenfilename)));
-
+			br = new BufferedReader(new InputStreamReader(
+				      FileLocator.getAsStream(hyphenfilename)));
 			String line = "";
 
 			iv_shouldbeHyphenMap = new HashMap<String, Integer>();

Modified: ctakes/trunk/ctakes-preprocessor/desc/CdaCasInitializer.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-preprocessor/desc/CdaCasInitializer.xml?rev=1676753&r1=1676752&r2=1676753&view=diff
==============================================================================
--- ctakes/trunk/ctakes-preprocessor/desc/CdaCasInitializer.xml (original)
+++ ctakes/trunk/ctakes-preprocessor/desc/CdaCasInitializer.xml Wed Apr 29 14:38:06 2015
@@ -20,89 +20,76 @@
 
 -->
 <taeDescription xmlns="http://uima.apache.org/resourceSpecifier">
-<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-<primitive>true</primitive>
-<annotatorImplementationName>org.apache.ctakes.preprocessor.ae.CdaCasInitializer</annotatorImplementationName>
-<analysisEngineMetaData>
-<name>CdaCasInitializer</name>
-<description>Creates a plaintext view from a CDA view.  
-This does not handle all CDA documents.
-The CDA must conform to the DTD resource.</description>
-<version>2.1</version>
-<vendor>Mayo Clinic</vendor>
-<configurationParameters/>
-<configurationParameterSettings/>
-<typeSystemDescription>
-<imports>
-</imports>
-</typeSystemDescription>
-<typePriorities/>
-<fsIndexCollection/>
-<capabilities>
-<capability>
-<inputs>
-<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.BaseToken</type>
-</inputs>
-<outputs>
-<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.Segment</type>
-<type allAnnotatorFeatures="true">edu.mayo.bmi.uima.type.util.Pair</type>
-<type allAnnotatorFeatures="true">edu.mayo.bmi.uima.type.util.Pairs</type>
-<type allAnnotatorFeatures="true">uima.tcas.DocumentAnnotation</type>
-</outputs>
-<outputSofas>
-<sofaName>plaintext</sofaName>
-</outputSofas>
-<languagesSupported/>
-</capability>
-</capabilities>
-<operationalProperties>
-<modifiesCas>true</modifiesCas>
-<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-<outputsNewCASes>false</outputsNewCASes>
-</operationalProperties>
-</analysisEngineMetaData>
-<externalResourceDependencies>
-<externalResourceDependency>
-<key>HyphenDictionary</key>
-<description></description>
-<interfaceName>org.apache.ctakes.core.resource.FileResource</interfaceName>
-<optional>false</optional>
-</externalResourceDependency>
-<externalResourceDependency>
-<key>DTD</key>
-<description></description>
-<interfaceName>org.apache.ctakes.core.resource.FileResource</interfaceName>
-<optional>false</optional>
-</externalResourceDependency>
-</externalResourceDependencies>
-<resourceManagerConfiguration>
-<externalResources>
-<externalResource>
-<name>DtdFile</name>
-<description></description>
-<fileResourceSpecifier>
-<fileUrl>file:org/apache/ctakes/preprocessor/cda/NotesIIST_RTF.DTD</fileUrl>
-</fileResourceSpecifier>
-<implementationName>org.apache.ctakes.core.resource.FileResourceImpl</implementationName>
-</externalResource>
-<externalResource>
-<name>HyphenFile</name>
-<description></description>
-<fileResourceSpecifier>
-<fileUrl>file:org/apache/ctakes/preprocessor/tokenizer/hyphenated.txt</fileUrl>
-</fileResourceSpecifier>
-<implementationName>org.apache.ctakes.core.resource.FileResourceImpl</implementationName>
-</externalResource>
-</externalResources>
-<externalResourceBindings>
-<externalResourceBinding>
-<key>DTD</key>
-<resourceName>DtdFile</resourceName>
-</externalResourceBinding>
-<externalResourceBinding>
-<key>HyphenDictionary</key>
-<resourceName>HyphenFile</resourceName>
-</externalResourceBinding>
-</externalResourceBindings>
-</resourceManagerConfiguration>
+	<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+	<primitive>true</primitive>
+	<annotatorImplementationName>org.apache.ctakes.preprocessor.ae.CdaCasInitializer
+	</annotatorImplementationName>
+	<analysisEngineMetaData>
+		<name>CdaCasInitializer</name>
+		<description>Creates a plaintext view from a CDA view.
+			This does not
+			handle all CDA documents.
+			The CDA must conform to the DTD resource.
+		</description>
+		<version>2.1</version>
+		<vendor>Mayo Clinic</vendor>
+		<configurationParameters>
+			<configurationParameter>
+				<name>HyphenFile</name>
+				<description></description>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>DtdFile</name>
+				<description></description>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>			
+		</configurationParameters>
+		<configurationParameterSettings>
+			<nameValuePair>
+				<name>HyphenFile</name>
+				<value>
+					<string>org/apache/ctakes/preprocessor/tokenizer/hyphenated.txt</string>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>DtdFile</name>
+				<value>
+					<string>org/apache/ctakes/preprocessor/cda/NotesIIST_RTF.DTD</string>
+				</value>
+			</nameValuePair>			
+		</configurationParameterSettings>
+		<typeSystemDescription>
+			<imports>
+			</imports>
+		</typeSystemDescription>
+		<typePriorities />
+		<fsIndexCollection />
+		<capabilities>
+			<capability>
+				<inputs>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.BaseToken
+					</type>
+				</inputs>
+				<outputs>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.Segment
+					</type>
+					<type allAnnotatorFeatures="true">uima.tcas.DocumentAnnotation</type>
+				</outputs>
+				<outputSofas>
+					<sofaName>plaintext</sofaName>
+				</outputSofas>
+				<languagesSupported />
+			</capability>
+		</capabilities>
+		<operationalProperties>
+			<modifiesCas>true</modifiesCas>
+			<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+			<outputsNewCASes>false</outputsNewCASes>
+		</operationalProperties>
+	</analysisEngineMetaData>
 </taeDescription>

Modified: ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ClinicalNotePreProcessor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ClinicalNotePreProcessor.java?rev=1676753&r1=1676752&r2=1676753&view=diff
==============================================================================
--- ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ClinicalNotePreProcessor.java
(original)
+++ ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ClinicalNotePreProcessor.java
Wed Apr 29 14:38:06 2015
@@ -20,6 +20,7 @@ package org.apache.ctakes.preprocessor;
 
 import java.io.File;
 import java.io.FileNotFoundException;
+import java.io.InputStream;
 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Calendar;
@@ -153,7 +154,7 @@ public class ClinicalNotePreProcessor ex
      *            as part of the section.
      * @throws SAXException
      */
-    public ClinicalNotePreProcessor(File dtdFile, boolean includeSectionMarkers)
+    public ClinicalNotePreProcessor(InputStream dtdFile, boolean includeSectionMarkers)
             throws SAXException, FileNotFoundException
     {
         iv_includeSectionMarkers = includeSectionMarkers;

Modified: ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/DTDloader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/DTDloader.java?rev=1676753&r1=1676752&r2=1676753&view=diff
==============================================================================
--- ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/DTDloader.java
(original)
+++ ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/DTDloader.java
Wed Apr 29 14:38:06 2015
@@ -22,6 +22,7 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.InputStream;
 
 import org.xml.sax.EntityResolver;
 import org.xml.sax.InputSource;
@@ -36,9 +37,9 @@ public class DTDloader implements Entity
 {
     private InputSource iv_inSrc;
 
-    public DTDloader(File dtdFile) throws FileNotFoundException
+    public DTDloader(InputStream dtdFile)
     {
-        iv_inSrc = new InputSource(new FileInputStream(dtdFile));
+        iv_inSrc = new InputSource(dtdFile);
     }
 
     /**

Modified: ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ae/CdaCasInitializer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ae/CdaCasInitializer.java?rev=1676753&r1=1676752&r2=1676753&view=diff
==============================================================================
--- ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ae/CdaCasInitializer.java
(original)
+++ ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/ae/CdaCasInitializer.java
Wed Apr 29 14:38:06 2015
@@ -22,12 +22,12 @@ import java.io.File;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
-
+
 import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.FSIterator;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.JFSIndexRepository;
 import org.apache.uima.jcas.cas.FSArray;
@@ -38,6 +38,7 @@ import org.apache.uima.resource.Resource
 import org.apache.ctakes.core.ci.HyphenTextModifierImpl;
 import org.apache.ctakes.core.ci.TextModification;
 import org.apache.ctakes.core.ci.TextModifier;
+import org.apache.ctakes.core.resource.FileLocator;
 import org.apache.ctakes.core.resource.FileResource;
 import org.apache.ctakes.preprocessor.ClinicalNotePreProcessor;
 import org.apache.ctakes.preprocessor.DocumentMetaData;
@@ -47,7 +48,8 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.ctakes.typesystem.type.util.Pair;
 import org.apache.ctakes.typesystem.type.util.Pairs;
-
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
 
 /**
  * Bootstraps the CAS by:
@@ -63,21 +65,33 @@ import org.apache.ctakes.typesystem.type
  */
 public class CdaCasInitializer extends JCasAnnotator_ImplBase
 {
+	protected static final String DEFAULT_HYPHEN_FILE = "org/apache/ctakes/preprocessor/tokenizer/hyphenated.txt";
+	protected static final String DEFAULT_DTD_FILE = "org/apache/ctakes/preprocessor/cda/NotesIIST_RTF.DTD";

+	
     // LOG4J logger based on class name
     private Logger logger = Logger.getLogger(getClass().getName());
 
-    private File dtdFile;
     private Boolean includeSectionMarkers;
-
     private TextModifier tm;
+    private UimaContext uimaContext;
     
-    
-    private UimaContext uimaContext; 
+    public static final String PARAM_DTD_FILE = "DtdFile";
+  	@ConfigurationParameter(name = PARAM_DTD_FILE, 
+  	    description = "Path to File that contains the DTD file", 
+  	    defaultValue=DEFAULT_DTD_FILE,
+  	    mandatory=false)
+  	protected String dtdfilepath;
+  	
+    public static final String PARAM_HYPHEN_FILE = "HyphenFile";
+  	@ConfigurationParameter(name = PARAM_HYPHEN_FILE, 
+  	    description = "Path to File that contains the hypenated file", 
+  	    defaultValue=DEFAULT_HYPHEN_FILE,
+  	    mandatory=false)
+  	protected String hyphenfilepath;  	
     
 	public void initialize(UimaContext aCtx) throws ResourceInitializationException {
 		
 		super.initialize(aCtx);
-		
 		uimaContext = aCtx;
 		initialize();
 
@@ -95,17 +109,13 @@ public class CdaCasInitializer extends J
         int hyphWindow = 3;
 
         try {
-            FileResource hyphResrc = (FileResource) uimaContext.getResourceObject("HyphenDictionary");
-            File hyphFile = hyphResrc.getFile();
-        	logger.info("Hyphen dictionary: " + hyphFile.getAbsolutePath());
+        	logger.info("Hyphen dictionary: " + hyphenfilepath);
 
             tm = new HyphenTextModifierImpl(
-                    hyphFile.getAbsolutePath(),
+            		hyphenfilepath,
                     hyphWindow);
 
-            FileResource dtdResrc = (FileResource) uimaContext.getResourceObject("DTD");
-            dtdFile = dtdResrc.getFile();
-        	logger.info("DTD: " + dtdFile.getAbsolutePath());
+        	logger.info("DTD: " + dtdfilepath);
         }
         catch (Exception e) {
             throw new ResourceInitializationException(e);
@@ -151,9 +161,10 @@ public class CdaCasInitializer extends J
             
         	JCas originalView = jcas.getView("_InitialView");
         	originalText = originalView.getSofaDataString();
-
+
+        	//TODO: A bit over engineered? Is this config really parsed very time???
             PreProcessor pp = new ClinicalNotePreProcessor(
-                    dtdFile,
+            		FileLocator.getAsStream(dtdfilepath),
                     includeSectionMarkers.booleanValue());
             dmd = pp.process(originalText);
 

Modified: ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/test/TestDriver.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/test/TestDriver.java?rev=1676753&r1=1676752&r2=1676753&view=diff
==============================================================================
--- ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/test/TestDriver.java
(original)
+++ ctakes/trunk/ctakes-preprocessor/src/main/java/org/apache/ctakes/preprocessor/test/TestDriver.java
Wed Apr 29 14:38:06 2015
@@ -27,6 +27,7 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.ctakes.core.resource.FileLocator;
 import org.apache.ctakes.preprocessor.ClinicalNotePreProcessor;
 import org.apache.ctakes.preprocessor.DocumentMetaData;
 import org.apache.ctakes.preprocessor.PreProcessor;
@@ -62,12 +63,7 @@ public class TestDriver
             Set sectionNames;
             Iterator snItr;
 
-            File dtdFile = new File(dtdFilename);
-            if (!exists(dtdFile)) {
-            	System.exit(-1);
-            }
-            
-            PreProcessor pp = new ClinicalNotePreProcessor(dtdFile, false);
+            PreProcessor pp = new ClinicalNotePreProcessor(FileLocator.getAsStream(dtdFilename),
false);
 
             timestamp = System.currentTimeMillis();
             DocumentMetaData dmd = pp.process(hl7Text);

Modified: ctakes/trunk/ctakes-preprocessor/src/test/java/org/apache/ctakes/preprocessor/ClinicalNotePreProcessorTest.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-preprocessor/src/test/java/org/apache/ctakes/preprocessor/ClinicalNotePreProcessorTest.java?rev=1676753&r1=1676752&r2=1676753&view=diff
==============================================================================
--- ctakes/trunk/ctakes-preprocessor/src/test/java/org/apache/ctakes/preprocessor/ClinicalNotePreProcessorTest.java
(original)
+++ ctakes/trunk/ctakes-preprocessor/src/test/java/org/apache/ctakes/preprocessor/ClinicalNotePreProcessorTest.java
Wed Apr 29 14:38:06 2015
@@ -29,6 +29,7 @@ import java.net.URL;
 import java.net.URLDecoder;
 import java.util.Map;
 
+import org.apache.ctakes.core.resource.FileLocator;
 import org.apache.ctakes.preprocessor.ClinicalNotePreProcessor;
 import org.apache.ctakes.preprocessor.DocumentMetaData;
 
@@ -63,8 +64,7 @@ public class ClinicalNotePreProcessorTes
         super.setUp();
 
         String dtdLocation = "src/test/resources/NotesIIST_RTF.DTD";
-        File dtd = new File(dtdLocation);
-        iv_cnotePreProcessor = new ClinicalNotePreProcessor(dtd, false);
+        iv_cnotePreProcessor = new ClinicalNotePreProcessor(FileLocator.getAsStream(dtdLocation),
false);
 
         String cnoteLocationOnCp = "src/test/resources/testpatient_cn_1.xml";
         String cnoteLocation = new File(cnoteLocationOnCp).getPath();



Mime
View raw message