Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 7330818A93 for ; Fri, 1 Apr 2016 21:51:51 +0000 (UTC) Received: (qmail 78610 invoked by uid 500); 1 Apr 2016 21:51:51 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 78531 invoked by uid 500); 1 Apr 2016 21:51:51 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 78522 invoked by uid 99); 1 Apr 2016 21:51:51 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd1-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 01 Apr 2016 21:51:51 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd1-us-west.apache.org (ASF Mail Server at spamd1-us-west.apache.org) with ESMTP id BF38EC2D20 for ; Fri, 1 Apr 2016 21:51:50 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd1-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: 0.804 X-Spam-Level: X-Spam-Status: No, score=0.804 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RP_MATCHES_RCVD=-0.996] autolearn=disabled Received: from mx1-lw-us.apache.org ([10.40.0.8]) by localhost (spamd1-us-west.apache.org [10.40.0.7]) (amavisd-new, port 10024) with ESMTP id TiA6VcX4Ik6X for ; Fri, 1 Apr 2016 21:51:49 +0000 (UTC) Received: from mailrelay1-us-west.apache.org (mailrelay1-us-west.apache.org [209.188.14.139]) by mx1-lw-us.apache.org (ASF Mail Server at mx1-lw-us.apache.org) with ESMTP id 1B96B5F23D for ; Fri, 1 Apr 2016 21:51:49 +0000 (UTC) Received: from svn01-us-west.apache.org (svn.apache.org [10.41.0.6]) by mailrelay1-us-west.apache.org (ASF Mail Server at mailrelay1-us-west.apache.org) with ESMTP id 5248EE01D9 for ; Fri, 1 Apr 2016 21:51:48 +0000 (UTC) Received: from svn01-us-west.apache.org (localhost [127.0.0.1]) by svn01-us-west.apache.org (ASF Mail Server at svn01-us-west.apache.org) with ESMTP id 2DA0A3A0230 for ; Fri, 1 Apr 2016 21:51:48 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1737445 - in /ctakes/trunk: ctakes-core/desc/cas_consumer/ ctakes-core/src/main/java/org/apache/ctakes/core/cc/ ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/ ctakes-core/src/main/java/org/apache/ctakes/core/util/ ct... Date: Fri, 01 Apr 2016 21:51:48 -0000 To: commits@ctakes.apache.org From: seanfinan@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20160401215148.2DA0A3A0230@svn01-us-west.apache.org> Author: seanfinan Date: Fri Apr 1 21:51:47 2016 New Revision: 1737445 URL: http://svn.apache.org/viewvc?rev=1737445&view=rev Log: Some changes to simple text writer consumers Dictionary updates Added: ctakes/trunk/ctakes-core/desc/cas_consumer/PropertyTextWriter.xml Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/SentenceTokensPrinter.java ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java ctakes/trunk/ctakes-dictionary-lookup-fast/pom.xml ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/BsvConceptFactory.java ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/DefaultConcept.java ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/BsvRareWordDictionary.java ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/env/EnvironmentVariable.java Added: ctakes/trunk/ctakes-core/desc/cas_consumer/PropertyTextWriter.xml URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/desc/cas_consumer/PropertyTextWriter.xml?rev=1737445&view=auto ============================================================================== --- ctakes/trunk/ctakes-core/desc/cas_consumer/PropertyTextWriter.xml (added) +++ ctakes/trunk/ctakes-core/desc/cas_consumer/PropertyTextWriter.xml Fri Apr 1 21:51:47 2016 @@ -0,0 +1,65 @@ + + + + org.apache.uima.java + org.apache.ctakes.core.cc.property.plaintext.PropertyTextWriterUima + + PropertyTextWriter + Cas Consumer that writes event and anatomic site properties to file(s) or console. + 1.0 + Apache Software Foundation + + + OutputDirectory + Directory to which files should be saved + String + false + false + + + + + OutputDirectory + + + CHANGE ME + + + + + + + + + false + true + + + + + + + + + + + Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/SentenceTokensPrinter.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/SentenceTokensPrinter.java?rev=1737445&r1=1737444&r2=1737445&view=diff ============================================================================== --- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/SentenceTokensPrinter.java (original) +++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/SentenceTokensPrinter.java Fri Apr 1 21:51:47 2016 @@ -23,6 +23,8 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; +import static org.apache.ctakes.core.config.ConfigParameterConstants.PARAM_OUTPUTDIR; + /** * Saves the (base) tokens of each sentence on a separate line, separated by spaces * @@ -35,8 +37,6 @@ public class SentenceTokensPrinter exten // LOG4J logger based on interface name final static private Logger LOGGER = Logger.getLogger( "SentenceTokensPrinter" ); - public static final String PARAM_OUTPUTDIR = "OutputDirectory"; - private String _outputDirPath; @@ -54,7 +54,8 @@ public class SentenceTokensPrinter exten final File outputDirectory = new File( outputDirPath ); if ( !outputDirectory.exists() && !outputDirectory.mkdirs() ) { throw new ResourceInitializationException( - new IOException( "Parameter setting 'OutputDirectory' does not point to an existing directory" + + new IOException( "Parameter setting " + PARAM_OUTPUTDIR + + " does not point to an existing directory" + " or one that could be created." ) ); } _outputDirPath = outputDirPath; Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java?rev=1737445&r1=1737444&r2=1737445&view=diff ============================================================================== --- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java (original) +++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java Fri Apr 1 21:51:47 2016 @@ -31,7 +31,6 @@ import static org.apache.ctakes.core.con */ public class PrettyTextWriterFit extends CasConsumer_ImplBase { - // UimaFit magically sets the value of the first instance variable to the parameter value - desired or otherwise @ConfigurationParameter( name = PARAM_OUTPUTDIR, mandatory = false, Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java?rev=1737445&r1=1737444&r2=1737445&view=diff ============================================================================== --- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java (original) +++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/OntologyConceptUtil.java Fri Apr 1 21:51:47 2016 @@ -169,7 +169,7 @@ final public class OntologyConceptUtil { /** * @param jcas - - * @return set of all tuis in jcas + * @return set of all ontology codes in jcas */ static public Map> getSchemeCodes( final JCas jcas ) { return getSchemeCodes( JCasUtil.select( jcas, IdentifiedAnnotation.class ) ); @@ -177,7 +177,7 @@ final public class OntologyConceptUtil { /** * @param jcas - - * @return set of all tuis in jcas + * @return set of all ontology codes in jcas */ static public Collection getCodes( final JCas jcas ) { return getCodes( JCasUtil.select( jcas, IdentifiedAnnotation.class ) ); @@ -201,7 +201,7 @@ final public class OntologyConceptUtil { /** * @param jcas - * @param lookupWindow - - * @return set of all cuis in jcas + * @return set of all cuis in lookupWindow */ static public Collection getCuis( final JCas jcas, final T lookupWindow ) { return getCuis( JCasUtil.selectCovered( jcas, IdentifiedAnnotation.class, lookupWindow ) ); @@ -210,7 +210,7 @@ final public class OntologyConceptUtil { /** * @param jcas - * @param lookupWindow - - * @return set of all tuis in jcas + * @return set of all tuis in lookupWindow */ static public Collection getTuis( final JCas jcas, final T lookupWindow ) { return getTuis( JCasUtil.selectCovered( jcas, IdentifiedAnnotation.class, lookupWindow ) ); @@ -219,7 +219,7 @@ final public class OntologyConceptUtil { /** * @param jcas - * @param lookupWindow - - * @return set of all tuis in jcas + * @return map of all schemes and their codes in lookupWindow */ static public Map> getSchemeCodes( final JCas jcas, final T lookupWindow ) { @@ -229,7 +229,7 @@ final public class OntologyConceptUtil { /** * @param jcas - * @param lookupWindow - - * @return set of all tuis in jcas + * @return set of all codes in lookupWindow */ static public Collection getCodes( final JCas jcas, final T lookupWindow ) { return getCodes( JCasUtil.selectCovered( jcas, IdentifiedAnnotation.class, lookupWindow ) ); Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/pom.xml URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/pom.xml?rev=1737445&r1=1737444&r2=1737445&view=diff ============================================================================== --- ctakes/trunk/ctakes-dictionary-lookup-fast/pom.xml (original) +++ ctakes/trunk/ctakes-dictionary-lookup-fast/pom.xml Fri Apr 1 21:51:47 2016 @@ -46,33 +46,11 @@ org.apache.ctakes ctakes-core - log4j log4j - org.apache.lucene - lucene-core - - - org.apache.lucene - lucene-queries - - - org.apache.lucene - lucene-queryparser - - - org.apache.lucene - lucene-analyzers-common - - jdom jdom @@ -80,35 +58,7 @@ junit junit - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - - unpack-dependencies - process-resources - - unpack-dependencies - - - ctakes-resources-umls2011ab - - ${project.build.directory}/classes - - - - - Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/BsvConceptFactory.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/BsvConceptFactory.java?rev=1737445&r1=1737444&r2=1737445&view=diff ============================================================================== --- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/BsvConceptFactory.java (original) +++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/BsvConceptFactory.java Fri Apr 1 21:51:47 2016 @@ -11,7 +11,6 @@ import org.apache.uima.UimaContext; import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.util.*; @@ -88,37 +87,29 @@ final public class BsvConceptFactory imp *

* If the TUI column is omitted then the entityId for the dictionary is used as the TUI *

- * // * @param bsvFile file containing term rows and bsv columns * * @param bsvFilePath file containing term rows and bsv columns * @return collection of all valid terms read from the bsv file */ static private Collection parseBsvFile( final String bsvFilePath ) { - InputStream bsvFile = null; - try { - bsvFile = FileLocator.getAsStream( bsvFilePath ); - } catch ( IOException ioE ) { - ioE.getMessage(); - return Collections.emptyList(); - } final Collection cuiTuiTerms = new ArrayList<>(); - try ( final BufferedReader reader = new BufferedReader( new InputStreamReader( bsvFile ) ) ) { + try ( final BufferedReader reader + = new BufferedReader( new InputStreamReader( FileLocator.getAsStream( bsvFilePath ) ) ) ) { String line = reader.readLine(); while ( line != null ) { if ( line.startsWith( "//" ) || line.startsWith( "#" ) ) { + line = reader.readLine(); continue; } final String[] columns = LookupUtil.fastSplit( line, '|' ); final CuiTuiTerm cuiTuiTerm = createCuiTuiTerm( columns ); if ( cuiTuiTerm != null ) { - // Add to the dictionary cuiTuiTerms.add( cuiTuiTerm ); } else { LOGGER.warn( "Bad BSV line " + line + " in " + bsvFilePath ); } line = reader.readLine(); } - reader.close(); } catch ( IOException ioE ) { LOGGER.error( ioE.getMessage() ); } Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/DefaultConcept.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/DefaultConcept.java?rev=1737445&r1=1737444&r2=1737445&view=diff ============================================================================== --- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/DefaultConcept.java (original) +++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/DefaultConcept.java Fri Apr 1 21:51:47 2016 @@ -9,7 +9,7 @@ import org.apache.ctakes.typesystem.type import javax.annotation.concurrent.Immutable; import java.util.Collection; import java.util.Collections; -import java.util.HashSet; +import java.util.stream.Collectors; /** * Author: SPF @@ -38,7 +38,7 @@ final public class DefaultConcept implem * @param preferredText - */ public DefaultConcept( final String cui, final String preferredText ) { - this( cui, preferredText, new HashSetMap() ); + this( cui, preferredText, new HashSetMap<>() ); } /** @@ -51,11 +51,10 @@ final public class DefaultConcept implem _cui = cui; _preferredText = preferredText; _codes = new ImmutableCollectionMap<>( codes ); - final Collection ctakesSemantics = new HashSet<>(); - for ( String tui : getCodes( TUI ) ) { - // Attempt to obtain one or more valid type ids from the tuis of the term - ctakesSemantics.add( SemanticUtil.getTuiSemanticGroupId( tui ) ); - } + // Attempt to obtain one or more valid type ids from the tuis of the term + final Collection ctakesSemantics = getCodes( TUI ).stream() + .map( SemanticUtil::getTuiSemanticGroupId ) + .collect( Collectors.toSet() ); if ( ctakesSemantics.isEmpty() ) { ctakesSemantics.add( CONST.NE_TYPE_ID_UNKNOWN ); } Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java?rev=1737445&r1=1737444&r2=1737445&view=diff ============================================================================== --- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java (original) +++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/SemanticCleanupTermConsumer.java Fri Apr 1 21:51:47 2016 @@ -72,11 +72,10 @@ public class SemanticCleanupTermConsumer groupedSemanticCuis.put( cTakesSemantic, semanticTerms ); } // Clean up sign/symptoms and disease/disorder spans that are also anatomical sites - if ( groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_ANATOMICAL_SITE ) ) { - if ( groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_FINDING ) ) { - for ( TextSpan anatomicalSpan : groupedSemanticCuis.get( CONST.NE_TYPE_ID_ANATOMICAL_SITE ).keySet() ) { - groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING ).remove( anatomicalSpan ); - } + if ( groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_ANATOMICAL_SITE ) + && groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_FINDING ) ) { + for ( TextSpan anatomicalSpan : groupedSemanticCuis.get( CONST.NE_TYPE_ID_ANATOMICAL_SITE ).keySet() ) { + groupedSemanticCuis.get( CONST.NE_TYPE_ID_FINDING ).remove( anatomicalSpan ); } if ( groupedSemanticCuis.containsKey( CONST.NE_TYPE_ID_DISORDER ) ) { for ( TextSpan anatomicalSpan : groupedSemanticCuis.get( CONST.NE_TYPE_ID_ANATOMICAL_SITE ).keySet() ) { Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/BsvRareWordDictionary.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/BsvRareWordDictionary.java?rev=1737445&r1=1737444&r2=1737445&view=diff ============================================================================== --- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/BsvRareWordDictionary.java (original) +++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/BsvRareWordDictionary.java Fri Apr 1 21:51:47 2016 @@ -28,11 +28,9 @@ import org.apache.uima.UimaContext; import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; import java.util.Properties; import static org.apache.ctakes.dictionary.lookup2.dictionary.RareWordTermMapCreator.CuiTerm; @@ -113,31 +111,24 @@ final public class BsvRareWordDictionary * @return collection of all valid terms read from the bsv file */ static private Collection parseBsvFile( final String bsvFilePath ) { - InputStream bsvFile = null; - try { - bsvFile = FileLocator.getAsStream( bsvFilePath ); - } catch ( IOException ioE ) { - ioE.getMessage(); - return Collections.emptyList(); - } final Collection cuiTerms = new ArrayList<>(); - try ( final BufferedReader reader = new BufferedReader( new InputStreamReader( bsvFile ) ) ) { + try ( final BufferedReader reader + = new BufferedReader( new InputStreamReader( FileLocator.getAsStream( bsvFilePath ) ) ) ) { String line = reader.readLine(); while ( line != null ) { if ( line.startsWith( "//" ) || line.startsWith( "#" ) ) { + line = reader.readLine(); continue; } final String[] columns = LookupUtil.fastSplit( line, '|' ); final CuiTerm cuiTerm = createCuiTuiTerm( columns ); if ( cuiTerm != null ) { - // Add to the dictionary cuiTerms.add( cuiTerm ); } else { LOGGER.warn( "Bad BSV line " + line + " in " + bsvFilePath ); } line = reader.readLine(); } - reader.close(); } catch ( IOException ioE ) { LOGGER.error( ioE.getMessage() ); } Modified: ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/env/EnvironmentVariable.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/env/EnvironmentVariable.java?rev=1737445&r1=1737444&r2=1737445&view=diff ============================================================================== --- ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/env/EnvironmentVariable.java (original) +++ ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/env/EnvironmentVariable.java Fri Apr 1 21:51:47 2016 @@ -18,9 +18,13 @@ */ package org.apache.ctakes.utils.env; +import jdk.nashorn.internal.ir.annotations.Immutable; import org.apache.uima.UimaContext; -public class EnvironmentVariable { +@Immutable +final public class EnvironmentVariable { + + private EnvironmentVariable() {} // TODO never return null unless there is a great reason. Refactor to non-null NOT_PRESENT // static public final String NOT_PRESENT = "EnvironmentVariable.NOT_PRESENT";