Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 79D0F18C26 for ; Fri, 4 Dec 2015 21:23:53 +0000 (UTC) Received: (qmail 45469 invoked by uid 500); 4 Dec 2015 21:23:44 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 45431 invoked by uid 500); 4 Dec 2015 21:23:44 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 45422 invoked by uid 99); 4 Dec 2015 21:23:43 -0000 Received: from Unknown (HELO spamd1-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 04 Dec 2015 21:23:43 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd1-us-west.apache.org (ASF Mail Server at spamd1-us-west.apache.org) with ESMTP id 714FFC6191 for ; Fri, 4 Dec 2015 21:23:43 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd1-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: 1.79 X-Spam-Level: * X-Spam-Status: No, score=1.79 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, T_RP_MATCHES_RCVD=-0.01] autolearn=disabled Received: from mx1-us-west.apache.org ([10.40.0.8]) by localhost (spamd1-us-west.apache.org [10.40.0.7]) (amavisd-new, port 10024) with ESMTP id NvwFv4pkCy0I for ; Fri, 4 Dec 2015 21:23:41 +0000 (UTC) Received: from mailrelay1-us-west.apache.org (mailrelay1-us-west.apache.org [209.188.14.139]) by mx1-us-west.apache.org (ASF Mail Server at mx1-us-west.apache.org) with ESMTP id 8CD032021D for ; Fri, 4 Dec 2015 21:23:41 +0000 (UTC) Received: from svn01-us-west.apache.org (svn.apache.org [10.41.0.6]) by mailrelay1-us-west.apache.org (ASF Mail Server at mailrelay1-us-west.apache.org) with ESMTP id 3676EE0280 for ; Fri, 4 Dec 2015 21:23:41 +0000 (UTC) Received: from svn01-us-west.apache.org (localhost [127.0.0.1]) by svn01-us-west.apache.org (ASF Mail Server at svn01-us-west.apache.org) with ESMTP id 351183A0A1D for ; Fri, 4 Dec 2015 21:23:41 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1718033 - in /ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property: ./ plaintext/ plaintext/PropertyTextWriter.java plaintext/PropertyTextWriterFit.java plaintext/PropertyTextWriterUima.java Date: Fri, 04 Dec 2015 21:23:41 -0000 To: commits@ctakes.apache.org From: seanfinan@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20151204212341.351183A0A1D@svn01-us-west.apache.org> Author: seanfinan Date: Fri Dec 4 21:23:40 2015 New Revision: 1718033 URL: http://svn.apache.org/viewvc?rev=1718033&view=rev Log: Simple Cas Consumer that writes to file the basic properties of all events and entities, including their relations Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriter.java ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterFit.java ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterUima.java Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriter.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriter.java?rev=1718033&view=auto ============================================================================== --- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriter.java (added) +++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriter.java Fri Dec 4 21:23:40 2015 @@ -0,0 +1,289 @@ +package org.apache.ctakes.core.cc.property.plaintext; + + +import org.apache.ctakes.core.cc.pretty.SemanticGroup; +import org.apache.ctakes.core.util.DocumentIDAnnotationUtil; +import org.apache.ctakes.core.util.OntologyConceptUtil; +import org.apache.ctakes.typesystem.type.refsem.*; +import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation; +import org.apache.ctakes.typesystem.type.textsem.*; +import org.apache.ctakes.typesystem.type.textspan.Sentence; +import org.apache.log4j.Logger; +import org.apache.uima.cas.text.AnnotationFS; +import org.apache.uima.fit.util.JCasUtil; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.tcas.Annotation; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.*; + +/** + * Writes Document event and anatomic information to file. + * + * @author SPF , chip-nlp + * @version %I% + * @since 10/15/2015 + */ +public class PropertyTextWriter { + + static private final Logger LOGGER = Logger.getLogger( "PropertyTextWriter" ); + static private final String FILE_EXTENSION = ".properties.txt"; + + private String _outputDirPath; + + // TODO Abstract common methods for PropertyTextWriter and PrettyTextWriter + + /** + * @param outputDirectoryPath may be empty or null, in which case the current working directory is used + * @throws IllegalArgumentException if the provided path points to a File and not a Directory + * @throws SecurityException if the File System has issues + */ + public void setOutputDirectory( final String outputDirectoryPath ) throws IllegalArgumentException, + SecurityException { + // If no outputDir is specified (null or empty) the current working directory will be used. Else check path. + if ( outputDirectoryPath == null || outputDirectoryPath.isEmpty() ) { + _outputDirPath = ""; + LOGGER.debug( "No Output Directory Path specified, using current working directory " + + System.getProperty( "user.dir" ) ); + return; + } + final File outputDir = new File( outputDirectoryPath ); + if ( !outputDir.exists() ) { + outputDir.mkdirs(); + } + if ( !outputDir.isDirectory() ) { + throw new IllegalArgumentException( outputDirectoryPath + " is not a valid directory path" ); + } + _outputDirPath = outputDirectoryPath; + LOGGER.debug( "Output Directory Path set to " + _outputDirPath ); + } + + /** + * Process the jcas and write sentence property lists to file. + * Filename is based upon the document id stored in the cas + * + * @param jcas ye olde ... + */ + public void process( final JCas jcas ) { + LOGGER.info( "Starting processing" ); + final String docId = DocumentIDAnnotationUtil.getDocumentIdForFile( jcas ); + File outputFile; + if ( _outputDirPath == null || _outputDirPath.isEmpty() ) { + outputFile = new File( docId + FILE_EXTENSION ); + } else { + outputFile = new File( _outputDirPath, docId + FILE_EXTENSION ); + } + try ( final BufferedWriter writer = new BufferedWriter( new FileWriter( outputFile ) ) ) { + final Collection sentences = JCasUtil.select( jcas, Sentence.class ); + for ( Sentence sentence : sentences ) { + writeSentence( jcas, sentence, writer ); + } + } catch ( IOException ioE ) { + LOGGER.error( "Could not not write pretty property file " + outputFile.getPath() ); + LOGGER.error( ioE.getMessage() ); + } + LOGGER.info( "Finished processing" ); + } + + /** + * Write a sentence and list of event and anatomical site properties from the document text + * + * @param jcas ye olde ... + * @param sentence annotation containing the sentence + * @param writer writer to which property lists for the sentence should be written + * @throws IOException if the writer has issues + */ + static public void writeSentence( final JCas jcas, + final AnnotationFS sentence, + final BufferedWriter writer ) throws IOException { + final String sentenceText = sentence.getCoveredText().trim(); + if ( sentenceText.isEmpty() ) { + return; + } + final Collection identifiedAnnotations + = JCasUtil.selectCovered( jcas, IdentifiedAnnotation.class, sentence ); + if ( identifiedAnnotations.isEmpty() ) { + return; + } + writer.write( sentenceText ); + writer.newLine(); + for ( IdentifiedAnnotation annotation : identifiedAnnotations ) { + final Map> semanticConcepts = getSemanticConcepts( annotation ); + if ( semanticConcepts.isEmpty() ) { + continue; + } + // write line with actual text, polarity, + writer.write( "\"" + annotation.getCoveredText() + "\"" + getAnnotationProperties( annotation ) ); + if ( annotation instanceof EventMention ) { + writer.write( getEventProperties( (EventMention)annotation ) ); + } else if ( annotation instanceof AnatomicalSiteMention ) { + writer.write( getAnatomicalProperties( (AnatomicalSiteMention)annotation ) ); + } + writer.newLine(); + // Write listing of UmlsConcept info, grouped by ctakes semantic group + for ( Map.Entry> umlsConcepts : semanticConcepts.entrySet() ) { + writer.write( " \t" + umlsConcepts.getKey() ); + writer.newLine(); + for ( UmlsConcept umlsConcept : umlsConcepts.getValue() ) { + final String preferredText = umlsConcept.getPreferredText(); + writer.write( " \t \t" + umlsConcept.getCui() + (preferredText != null ? " " + preferredText : "") ); + writer.newLine(); + } + } + final Collection relations = getRelations( jcas, annotation ); + for ( String relation : relations ) { + writer.write( " \t" + relation ); + writer.newLine(); + } + } + writer.newLine(); + } + + + /** + * @param identifiedAnnotation an annotation of interest + * @return map of semantic type names and umls concepts within those types as they apply to the annotation + */ + static private Map> getSemanticConcepts( + final IdentifiedAnnotation identifiedAnnotation ) { + final Collection umlsConcepts = OntologyConceptUtil.getConcepts( identifiedAnnotation ); + if ( umlsConcepts == null || umlsConcepts.isEmpty() ) { + return Collections.emptyMap(); + } + final Map> semanticConcepts = new HashMap<>(); + final Collection usedCuis = new HashSet<>(); + for ( UmlsConcept umlsConcept : umlsConcepts ) { + if ( usedCuis.contains( umlsConcept.getCui() ) ) { + continue; + } + usedCuis.add( umlsConcept.getCui() ); + final String tui = umlsConcept.getTui(); + final String semanticName = SemanticGroup.getSemanticName( tui ); + Collection concepts = semanticConcepts.get( semanticName ); + if ( concepts == null ) { + concepts = new HashSet<>(); + semanticConcepts.put( semanticName, concepts ); + } + concepts.add( umlsConcept ); + } + return semanticConcepts; + } + + /** + * @param annotation - + * @return a line of text with doctimerel, modality, aspect and permanence ; if available + */ + static private String getAnnotationProperties( final IdentifiedAnnotation annotation ) { + final StringBuilder sb = new StringBuilder(); + if ( annotation.getPolarity() < 0 ) { + sb.append( " negated" ); + } + if ( annotation.getUncertainty() == 1 ) { + sb.append( " uncertain" ); + } + if ( annotation.getGeneric() ) { + sb.append( " generic" ); + } + if ( annotation.getConditional() ) { + sb.append( " conditional" ); + } + if ( annotation.getHistoryOf() == 1 ) { + sb.append( " in history" ); + } + if ( annotation.getSubject() != null && !annotation.getSubject().isEmpty() ) { + sb.append( " for " ).append( annotation.getSubject() ); + } + return sb.toString(); + } + + /** + * @param eventMention - + * @return a line of text with doctimerel, modality, aspect and permanence ; if available + */ + static private String getEventProperties( final EventMention eventMention ) { + final Event event = eventMention.getEvent(); + if ( event == null ) { + return ""; + } + final EventProperties eventProperties = event.getProperties(); + if ( eventProperties == null ) { + return ""; + } + final StringBuilder sb = new StringBuilder(); + sb.append( " occurred " ); + sb.append( eventProperties.getDocTimeRel().toLowerCase() ); + sb.append( " document time" ); + // modality is: Actual, hypothetical, hedged, generic + final String modality = eventProperties.getContextualModality(); + if ( modality != null && !modality.isEmpty() ) { + sb.append( ", " ); + sb.append( modality.toLowerCase() ); + } + // Aspect is: Intermittent (or not) + final String aspect = eventProperties.getContextualAspect(); + if ( aspect != null && !aspect.isEmpty() ) { + sb.append( ", " ); + sb.append( aspect.toLowerCase() ); + } + // Permanence is: Finite or permanent + final String permanence = eventProperties.getPermanence(); + if ( permanence != null && !permanence.isEmpty() ) { + sb.append( ", " ); + sb.append( permanence.toLowerCase() ); + } + return sb.toString(); + } + + /** + * @param anatomicalSite - + * @return a line of text with body laterality and side ; if available + */ + static private String getAnatomicalProperties( final AnatomicalSiteMention anatomicalSite ) { + StringBuilder sb = new StringBuilder(); + final BodyLateralityModifier laterality = anatomicalSite.getBodyLaterality(); + if ( laterality != null ) { + final Attribute normalized = laterality.getNormalizedForm(); + if ( normalized != null && normalized instanceof BodyLaterality ) { + sb.append( ", " ); + sb.append( ((BodyLaterality)normalized).getValue() ); + } + } + final BodySideModifier bodySide = anatomicalSite.getBodySide(); + if ( bodySide != null ) { + final Attribute normalized = bodySide.getNormalizedForm(); + if ( normalized != null && normalized instanceof BodySide ) { + sb.append( ", " ); + sb.append( ((BodySide)normalized).getValue() ); + } + } + return sb.toString(); + } + + /** + * @param jcas ye olde ... + * @param annotation of interest + * @return all relations with the given annotation as the first or second argument + */ + static private Collection getRelations( final JCas jcas, final IdentifiedAnnotation annotation ) { + final Collection relations = JCasUtil.select( jcas, BinaryTextRelation.class ); + if ( relations == null || relations.isEmpty() ) { + return Collections.emptyList(); + } + final Collection relationTexts = new ArrayList<>(); + for ( BinaryTextRelation relation : relations ) { + final Annotation argument1 = relation.getArg1().getArgument(); + final Annotation argument2 = relation.getArg2().getArgument(); + if ( annotation.equals( argument1 ) || annotation.equals( argument2 ) ) { + relationTexts.add( argument1.getCoveredText() + + " " + relation.getCategory().toLowerCase() + + " " + argument2.getCoveredText() ); + } + } + return relationTexts; + } + + +} Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterFit.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterFit.java?rev=1718033&view=auto ============================================================================== --- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterFit.java (added) +++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterFit.java Fri Dec 4 21:23:40 2015 @@ -0,0 +1,100 @@ +package org.apache.ctakes.core.cc.property.plaintext; + +import org.apache.uima.UimaContext; +import org.apache.uima.analysis_engine.AnalysisEngineDescription; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.CASException; +import org.apache.uima.fit.component.CasConsumer_ImplBase; +import org.apache.uima.fit.descriptor.ConfigurationParameter; +import org.apache.uima.fit.factory.AnalysisEngineFactory; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; + +import static org.apache.ctakes.core.config.ConfigParameterConstants.DESC_OUTPUTDIR; +import static org.apache.ctakes.core.config.ConfigParameterConstants.PARAM_OUTPUTDIR; + +/** + * Writes Document event and anatomic information to file. + * This version can be used in the UimaFit style with {@link org.apache.uima.fit.descriptor.ConfigurationParameter} + * It cannot be used in the old Uima CPE style (e.g. the Uima CPE Gui) as the Uima CPE has problems with Fit Consumers. + * There is a version that can be used with the CPE GUI: + * {@link org.apache.ctakes.core.cc.property.plaintext.PropertyTextWriterUima} + * + * @author SPF , chip-nlp + * @version %I% + * @since 10/15/2015 + */ +public class PropertyTextWriterFit extends CasConsumer_ImplBase { + + @ConfigurationParameter( + name = PARAM_OUTPUTDIR, + mandatory = false, + description = DESC_OUTPUTDIR, + defaultValue = "" + ) + private String fitOutputDirectoryPath; + +// static private final Logger LOGGER = Logger.getLogger( "PropertyTextWriterFit" ); + + // delegate + final private PropertyTextWriter _propertyTextWriter; + + public PropertyTextWriterFit() { + super(); + _propertyTextWriter = new PropertyTextWriter(); + } + + /** + * {@inheritDoc} + */ + @Override + public void initialize( final UimaContext uimaContext ) throws ResourceInitializationException { + super.initialize( uimaContext ); + try { + if ( fitOutputDirectoryPath != null ) { + _propertyTextWriter.setOutputDirectory( fitOutputDirectoryPath ); + } else { + _propertyTextWriter.setOutputDirectory( (String)uimaContext.getConfigParameterValue( PARAM_OUTPUTDIR ) ); + } + } catch ( IllegalArgumentException | SecurityException multE ) { + // thrown if the path specifies a File (not Dir) or by file system access methods + throw new ResourceInitializationException( multE ); + } + } + + + /** + * {@inheritDoc} + */ + @Override + public void process( final CAS aCAS ) throws AnalysisEngineProcessException { + JCas jcas; + try { + jcas = aCAS.getJCas(); + } catch ( CASException casE ) { + throw new AnalysisEngineProcessException( casE ); + } + _propertyTextWriter.process( jcas ); + } + + /** + * @return This Cas Consumer as an Analysis Engine + * @throws org.apache.uima.resource.ResourceInitializationException if anything went wrong + */ + static public AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException { + return createAnnotatorDescription( "" ); + } + + /** + * @param outputDirectoryPath may be empty or null, in which case the current working directory is used + * @return This Cas Consumer as an Analysis Engine + * @throws org.apache.uima.resource.ResourceInitializationException if anything went wrong + */ + static public AnalysisEngineDescription createAnnotatorDescription( final String outputDirectoryPath ) + throws ResourceInitializationException { + return AnalysisEngineFactory.createEngineDescription( PropertyTextWriterFit.class, + PARAM_OUTPUTDIR, outputDirectoryPath ); + } + +} Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterUima.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterUima.java?rev=1718033&view=auto ============================================================================== --- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterUima.java (added) +++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/property/plaintext/PropertyTextWriterUima.java Fri Dec 4 21:23:40 2015 @@ -0,0 +1,64 @@ +package org.apache.ctakes.core.cc.property.plaintext; + +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.CASException; +import org.apache.uima.collection.CasConsumer_ImplBase; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; + +import java.util.logging.Logger; + +import static org.apache.ctakes.core.config.ConfigParameterConstants.PARAM_OUTPUTDIR; + +/** + * Writes Document event and anatomic information to file. + * This can be used with the old descriptor .xml files and the UIMA CPE Gui. For a UimaFit PropertyTextWriter, use + * {@link org.apache.ctakes.core.cc.property.plaintext.PropertyTextWriterFit} + * + * @author SPF , chip-nlp + * @version %I% + * @since 10/15/2015 + */ +public class PropertyTextWriterUima extends CasConsumer_ImplBase { + + + static private final Logger LOGGER = Logger.getLogger( "PrettyTextWriterUima" ); + + // delegate + final private PropertyTextWriter _propertyTextWriter; + + public PropertyTextWriterUima() { + super(); + _propertyTextWriter = new PropertyTextWriter(); + } + + /** + * {@inheritDoc} + */ + @Override + public void initialize() throws ResourceInitializationException { + super.initialize(); + try { + _propertyTextWriter.setOutputDirectory( (String)getConfigParameterValue( PARAM_OUTPUTDIR ) ); + } catch ( IllegalArgumentException | SecurityException multE ) { + // thrown if the path specifies a File (not Dir) or by file system access methods + throw new ResourceInitializationException( multE ); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void processCas( final CAS aCAS ) throws AnalysisEngineProcessException { + JCas jcas; + try { + jcas = aCAS.getJCas(); + } catch ( CASException casE ) { + throw new AnalysisEngineProcessException( casE ); + } + _propertyTextWriter.process( jcas ); + } + +}