ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1719002 - in /ctakes/sandbox/dictionarytool: ./ data/tiny/ src/org/apache/ctakes/dictionarytool/ src/org/apache/ctakes/dictionarytool/reader/ src/org/apache/ctakes/dictionarytool/util/ src/org/apache/ctakes/dictionarytool/writer/
Date Thu, 10 Dec 2015 02:21:30 GMT
Author: seanfinan
Date: Thu Dec 10 02:21:30 2015
New Revision: 1719002

URL: http://svn.apache.org/viewvc?rev=1719002&view=rev
Log:
Updates.  At this point it is mostly per-project kludge because of time constraints and procrastination
for dictionary gui

Added:
    ctakes/sandbox/dictionarytool/dictionarytool.iml
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/OwlCodeMapCreator.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/OwlDictionaryCreator.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CuiCodeUtil.java
      - copied, changed from r1703273, ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/CuiCodeUtil.java
Modified:
    ctakes/sandbox/dictionarytool/   (props changed)
    ctakes/sandbox/dictionarytool/data/tiny/ConversionSources.txt
    ctakes/sandbox/dictionarytool/data/tiny/CtakesSources.txt
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/CodeMapCreator.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/DictionaryCreator2.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCodesForCuisReader.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTextsForCuisReader.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CreatorProperties.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiCodesDbWriter.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiCodesWriter.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiTextsMapWriter.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiTuiTextsMapWriter.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/FirstWordDbWriter.java

Propchange: ctakes/sandbox/dictionarytool/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Thu Dec 10 02:21:30 2015
@@ -0,0 +1 @@
+*.idea

Modified: ctakes/sandbox/dictionarytool/data/tiny/ConversionSources.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/data/tiny/ConversionSources.txt?rev=1719002&r1=1719001&r2=1719002&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/data/tiny/ConversionSources.txt (original)
+++ ctakes/sandbox/dictionarytool/data/tiny/ConversionSources.txt Thu Dec 10 02:21:30 2015
@@ -1,4 +1,4 @@
-// ICD10PCS
-// ICD9CM
+ICD10PCS
+ICD9CM
 RXNORM
-SNOMEDCT
+SNOMEDCT_US
\ No newline at end of file

Modified: ctakes/sandbox/dictionarytool/data/tiny/CtakesSources.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/data/tiny/CtakesSources.txt?rev=1719002&r1=1719001&r2=1719002&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/data/tiny/CtakesSources.txt (original)
+++ ctakes/sandbox/dictionarytool/data/tiny/CtakesSources.txt Thu Dec 10 02:21:30 2015
@@ -1,2 +1,4 @@
 SNOMEDCT
 SNOMEDCT_US
+ICD9CM
+ICD10PCS

Added: ctakes/sandbox/dictionarytool/dictionarytool.iml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/dictionarytool.iml?rev=1719002&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/dictionarytool.iml (added)
+++ ctakes/sandbox/dictionarytool/dictionarytool.iml Thu Dec 10 02:21:30 2015
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="library" name="hsqldb_1_8_0_10" level="project" />
+    <orderEntry type="library" name="jcip-annotations" level="project" />
+    <orderEntry type="library" name="NobleTools-1.0" level="project" />
+    <orderEntry type="library" name="owlapi-distribution-3.5.0" level="project" />
+  </component>
+</module>
+

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/CodeMapCreator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/CodeMapCreator.java?rev=1719002&r1=1719001&r2=1719002&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/CodeMapCreator.java
(original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/CodeMapCreator.java
Thu Dec 10 02:21:30 2015
@@ -32,13 +32,20 @@ public class CodeMapCreator {
    static private final Logger LOGGER = Logger.getLogger( "CodeMapCreator" );
 
 
-   static private final String[] DEBUG_ARGS = {"-umls", "C:\\Spiffy\\umls\\data\\external\\2011AB\\META",
+//   static private final String[] DEBUG_ARGS = {"-umls", "C:\\Spiffy\\umls\\data\\external\\2011AB\\META",
+   static private final String[] DEBUG_ARGS = {"-umls", "C:\\Spiffy\\umls\\data\\external\\2015AB\\META",
+//                                               "-db",
+//                                               "jdbc:hsqldb:file:C:/Spiffy/rword_dict/data/internal/ctakesnewsnorx/ctakesnewsnorx",
+//                                               "-tbl", "kludge",
+//                                               "-ol", "C:/Spiffy/rword_dict/output/umls2015icd_bsv/CodeMapUmls2015.bsv",
+//                                               "-ol", "C:/Spiffy/rword_dict/output/umls2015_bsv/CodeMapUmls2015.bsv",
                                                "-db",
-                                               "jdbc:hsqldb:file:C:/Spiffy/rword_dict/data/internal/ctakesnewsnorx/ctakesnewsnorx",
+//                                               "jdbc:hsqldb:file:C:/Spiffy/rword_dict/output/umls2015_hsql/ctakessnorx2015",
+                                               "jdbc:hsqldb:file:C:/Spiffy/rword_dict/output/umls2015icd_hsql/ctakesicd2015",
                                                "-tbl", "kludge",
-         //                                               "-ol", "C:/Spiffy/Output/DictionaryToolTest/CodeMap_sno_rx.bsv"
-                                                "-fd", "./data/tiny"
-   };
+                                                "-fd", "./data/tiny",
+                                               "-src", "./data/tiny/CtakesSources.txt"
+};
 
    static private final String[] TIM_OTHER_ARGS = { "-umls", "C:\\Spiffy\\umls\\data\\external\\2011AB\\META",
                                                     "-db",

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/DictionaryCreator2.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/DictionaryCreator2.java?rev=1719002&r1=1719001&r2=1719002&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/DictionaryCreator2.java
(original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/DictionaryCreator2.java
Thu Dec 10 02:21:30 2015
@@ -44,16 +44,22 @@ public class DictionaryCreator2 {
 
 
 
-   static private final String[] DEBUG_ARGS = {"-umls", "C:\\Spiffy\\umls\\data\\external\\2011AB\\META",
+//   static private final String[] DEBUG_ARGS = {"-umls", "C:\\Spiffy\\umls\\data\\external\\2011AB\\META",
+   static private final String[] DEBUG_ARGS = {"-umls", "C:\\Spiffy\\umls\\data\\external\\2015AB\\META",
 //                                               "-db",
 //                                               "jdbc:hsqldb:file:C:/Spiffy/rword_dict/output/temp/fword_sno_rx_mem/fword_sno_rx_mem",
 //                                               "-tbl", "CUI_TERMS",
 //                                               "jdbc:hsqldb:file:C:/Spiffy/rword_dict/data/internal/ctakesnewsnorx/ctakesnewsnorx",
 //                                               "-tbl", "CUI_TERMS",
 //                                           "-ol", "C:\\Spiffy\\rword_dict\\output\\temp/Terms_sno_rx_ptb.bsv",
-                                               "-bsv", "C:/Spiffy/rword_dict/output/umls2011_bsv/Umls2011.bsv",
+//                                               "-bsv", "C:/Spiffy/rword_dict/output/umls2015icd_bsv/Umls2015.bsv",
+//                                               "-bsv", "C:/Spiffy/rword_dict/output/umls2015_bsv/Umls2015.bsv",
 //                                           "-fw",
+                                             "-db",
+                                               "jdbc:hsqldb:file:C:/Spiffy/rword_dict/output/umls2015icd_hsql/ctakesicd2015",
+                                               "-tbl", "CUI_TERMS",
                                              "-fd", "./data/tiny",
+                                               "-src", "./data/tiny/CtakesSources.txt",
                                              "-atui", "./data/tiny/CtakesAnatTuis.txt",
                                              "-tui", "./data/tiny/CtakesSnomedTuis.txt"
    };

Added: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/OwlCodeMapCreator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/OwlCodeMapCreator.java?rev=1719002&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/OwlCodeMapCreator.java
(added)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/OwlCodeMapCreator.java
Thu Dec 10 02:21:30 2015
@@ -0,0 +1,173 @@
+package org.apache.ctakes.dictionarytool;
+
+import edu.pitt.dbmi.nlp.noble.ontology.IClass;
+import edu.pitt.dbmi.nlp.noble.ontology.IOntology;
+import edu.pitt.dbmi.nlp.noble.ontology.IOntologyException;
+import edu.pitt.dbmi.nlp.noble.ontology.owl.OOntology;
+import edu.pitt.dbmi.nlp.noble.terminology.Concept;
+import edu.pitt.dbmi.nlp.noble.terminology.SemanticType;
+import org.apache.ctakes.dictionarytool.reader.UmlsCodesForCuisReader;
+import org.apache.ctakes.dictionarytool.util.*;
+import org.apache.ctakes.dictionarytool.writer.CuiCodesDbWriter;
+import org.apache.ctakes.dictionarytool.writer.CuiCodesWriter;
+
+import java.net.URI;
+import java.util.*;
+import java.util.logging.Logger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.*;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.DATA_BASE;
+
+import static org.apache.ctakes.dictionarytool.reader.UmlsCodesForCuisReader.CuiCodeInfo;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 9/22/2015
+ */
+public class OwlCodeMapCreator {
+
+   static private final Logger LOGGER = Logger.getLogger( "OwlCodeMapCreator" );
+
+   static private final String ONTOLOGY_PATH
+         = "C:/Spiffy/prj_darth_phenome/dev/github3/DeepPhe/data/ontology/breastCancer-3.owl";
+
+   static private final String[] DEBUG_ARGS = {"-owl", ONTOLOGY_PATH,
+                                               "-db",
+                                               "jdbc:hsqldb:file:C:/Spiffy/prj_darth_phenome/temp/dictionary/breastcancer/breastcancer",
+                                               "-tbl", "CUI_TERMS",
+//                                           "-ol", "C:/Spiffy/prj_darth_phenome/temp/dictionary/BreastCancerInfo.bsv",
+                                               "-fd", "./data/tiny"
+   };
+
+   static private final Pattern CUI_PATTERN = Pattern.compile( "(CL?\\d{6,7})( .+)?" );
+   static private final Pattern SNOMED_PATTERN = Pattern.compile( "(\\d+) \\[SNOMEDCT_US\\]"
);
+   static private final Pattern RXNORM_PATTERN = Pattern.compile( "(\\d+) \\[RXNORM\\]" );
+   static private final Pattern ICD9CM_PATTERN = Pattern.compile( "(\\d+) \\[ICD9CM\\]" );
+   static private final Pattern ICD10CM_PATTERN = Pattern.compile( "(\\d+) \\[ICD10CM\\]"
);
+
+
+
+
+   static private void writeOntology( final IOntology ontology ) {
+      final Collection<CuiCodeInfo> cuiCodeInfos = new ArrayList<>();
+      final IClass root = ontology.getClass( "Element" );
+      CuiCodeInfo cuiCodeInfo = createCuiCodeInfo( root );
+      if ( cuiCodeInfo != null ) {
+         cuiCodeInfos.add( cuiCodeInfo );
+      }
+      for ( IClass iClass : root.getSubClasses() ) {
+         cuiCodeInfo = createCuiCodeInfo( iClass );
+         if ( cuiCodeInfo != null ) {
+            cuiCodeInfos.add( cuiCodeInfo );
+         }
+      }
+      writeOutput( cuiCodeInfos );
+      LOGGER.info( "Done Writing Ontology Terms" );
+   }
+
+   static private Long getCui( final Concept concept ){
+      final Collection<Object> allCodes = concept.getCodes().values();
+      if ( allCodes.isEmpty() ) {
+//         return Collections.emptyList();
+         return null;
+      }
+//      final Collection<String> cuis = new ArrayList<>();
+      for ( Object conceptCodes : allCodes ) {
+         final Matcher matcher = CUI_PATTERN.matcher( conceptCodes.toString() );
+         if ( matcher.matches() ) {
+//            cuis.add( m.group( 1 ) );
+            return CuiCodeUtil.getInstance().getCuiCode( matcher.group( 1 ) );
+         }
+      }
+//      return Collections.emptyList();
+      return null;
+   }
+
+   static private Integer getTui( final Concept concept ) {
+      final SemanticType[] semanticTypes = concept.getSemanticTypes();
+      if ( semanticTypes.length > 0 ) {
+         return CuiTuiUtil.getTuiCode( semanticTypes[ 0 ].getCode() );
+      }
+      return null;
+   }
+
+   static private Collection<String> getConceptCodes( final Iterable<Object>
allCodes, final Pattern pattern ){
+      final Collection<String> conceptCodes = new ArrayList<>();
+      for ( Object code : allCodes ) {
+         final Matcher matcher = pattern.matcher( code.toString() );
+         if ( matcher.matches() ) {
+            conceptCodes.add( matcher.group( 1 ) );
+         }
+      }
+      return conceptCodes;
+   }
+
+
+   static private CuiCodeInfo createCuiCodeInfo( final IClass iClass ) {
+      final Concept concept = iClass.getConcept();
+      final Long cui = getCui( concept );
+      if ( cui == null ) {
+         return null;
+      }
+      final Integer tui = getTui( concept );
+      if ( tui == null ) {
+         return null;
+      }
+      final CuiCodeInfo cuiCodeInfo = new CuiCodeInfo( cui, Collections.singletonList( tui
) );
+      final URI uri = iClass.getURI();
+      if ( uri != null ) {
+         cuiCodeInfo.place( "OWLURI", uri.toASCIIString() );
+      }
+      final Collection<Object> allCodes = concept.getCodes().values();
+      if ( !allCodes.isEmpty() ) {
+         addConceptCodes( cuiCodeInfo, "SNOMEDCT", SNOMED_PATTERN, allCodes );
+         addConceptCodes( cuiCodeInfo, "RXNORM", RXNORM_PATTERN, allCodes );
+         addConceptCodes( cuiCodeInfo, "ICD9CM", ICD9CM_PATTERN, allCodes );
+         addConceptCodes( cuiCodeInfo, "ICD10PCS", ICD10CM_PATTERN, allCodes );
+      }
+      if ( cui > 9999999 ) {
+         final String cuiText = CuiCodeUtil.getInstance().getAsCui( cui );
+      }
+      return cuiCodeInfo;
+   }
+
+   static private void addConceptCodes( final CuiCodeInfo cuiCodeInfo, final String codeName,
+                                        final Pattern codePattern, final Collection<Object>
allCodes ) {
+      final Collection<String> conceptCodes = getConceptCodes( allCodes, codePattern
);
+      if ( !conceptCodes.isEmpty() ) {
+         cuiCodeInfo.placeAll( codeName, conceptCodes );
+      }
+   }
+
+
+   static private void writeOutput( final Collection<UmlsCodesForCuisReader.CuiCodeInfo>
cuiCodeInfo ) {
+      if ( TERM_LIST.hasValue() ) {
+         CuiCodesWriter.writeCuiCodeInfo( TERM_LIST.getValue(), cuiCodeInfo );
+      } else if ( DATA_BASE.hasValue() ) {
+         CuiCodesDbWriter.writeCuiCodeInfo( cuiCodeInfo, DATA_BASE.getValue(), "sa", "" );
+      }
+   }
+
+
+
+
+
+   public static void main( final String... args ) {
+//      final CreatorProperties properties = new CreatorProperties( args );
+      final CreatorProperties properties = new CreatorProperties( DEBUG_ARGS );
+
+      try {
+         final IOntology ontology = OOntology.loadOntology( OWL_FILE.getValue() );
+         // Write the non-medication terms
+         writeOntology( ontology );
+      } catch (IOntologyException ontE ) {
+         LOGGER.severe( ontE.getMessage() );
+      }
+   }
+
+
+}

Added: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/OwlDictionaryCreator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/OwlDictionaryCreator.java?rev=1719002&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/OwlDictionaryCreator.java
(added)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/OwlDictionaryCreator.java
Thu Dec 10 02:21:30 2015
@@ -0,0 +1,169 @@
+package org.apache.ctakes.dictionarytool;
+
+import edu.pitt.dbmi.nlp.noble.ontology.IClass;
+import edu.pitt.dbmi.nlp.noble.ontology.IOntology;
+import edu.pitt.dbmi.nlp.noble.ontology.IOntologyException;
+import edu.pitt.dbmi.nlp.noble.ontology.owl.OOntology;
+import edu.pitt.dbmi.nlp.noble.terminology.Concept;
+import edu.pitt.dbmi.nlp.noble.terminology.SemanticType;
+import org.apache.ctakes.dictionarytool.util.CreatorProperties;
+import org.apache.ctakes.dictionarytool.util.CuiCodeUtil;
+import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
+import org.apache.ctakes.dictionarytool.util.UmlsTermUtil;
+import org.apache.ctakes.dictionarytool.util.collection.CollectionMap;
+import org.apache.ctakes.dictionarytool.util.collection.HashSetMap;
+import org.apache.ctakes.dictionarytool.writer.*;
+
+import java.util.*;
+import java.util.logging.Logger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 9/21/2015
+ */
+public class OwlDictionaryCreator {
+
+   static private final Logger LOGGER = Logger.getLogger( "OwlDictionaryCreator" );
+
+   static private final String ONTOLOGY_PATH
+         = "C:/Spiffy/prj_darth_phenome/dev/github3/DeepPhe/data/ontology/breastCancer-3.owl";
+
+   static private final String[] DEBUG_ARGS = {"-owl", ONTOLOGY_PATH,
+                                               "-db",
+                                               "jdbc:hsqldb:file:C:/Spiffy/prj_darth_phenome/temp/dictionary/breastcancer/breastcancer",
+                                               "-tbl", "CUI_TERMS",
+//                                           "-ol", "C:/Spiffy/prj_darth_phenome/temp/dictionary/BreastCancer.bsv",
+//                                           "-bsv", "C:/Spiffy/prj_darth_phenome/temp/dictionary/BreastCancer.bsv",
+//                                           "-fw",
+                                               "-fd", "./data/tiny"
+   };
+
+
+   static private final Pattern CUI_PATTERN = Pattern.compile( "(CL?\\d{6,7})( .+)?" );
+
+
+
+   static private void writeOntology( final UmlsTermUtil umlsTermUtil,
+                                      final IOntology ontology,
+                                      final boolean isRareWordIndex ) {
+      final IClass root = ontology.getClass( "Element" );
+      int iClassCount = 1;
+      int textCount = 0;
+      final HashSetMap<Long,Integer> validCuisAndTuis = new HashSetMap<>();
+      final HashSetMap<Long,String> cuisAndText = new HashSetMap<>();
+      textCount += addConcept( root, validCuisAndTuis, cuisAndText, umlsTermUtil );
+      for ( IClass iClass : root.getSubClasses() ) {
+         iClassCount++;
+         textCount += addConcept( iClass, validCuisAndTuis, cuisAndText, umlsTermUtil );
+         if ( iClassCount % 1000 == 0 ) {
+            LOGGER.info( "Classes " + iClassCount + "\t Terms " + textCount );
+         }
+      }
+      LOGGER.info( "Classes " + iClassCount + "\t Terms " + textCount );
+      writeOutput( validCuisAndTuis, cuisAndText, isRareWordIndex );
+      LOGGER.info( "Done Writing Ontology Terms" );
+   }
+
+   /**
+    *
+    * @param iClass -
+    * @param validCuisAndTuis -
+    * @param cuisAndText -
+    * @return number of text synonyms added to collection
+    */
+   static private int addConcept( final IClass iClass,
+                                  final CollectionMap<Long,Integer> validCuisAndTuis,
+                                  final CollectionMap<Long, String> cuisAndText,
+                                  final UmlsTermUtil umlsTermUtil ) {
+      final Concept concept = iClass.getConcept();
+      final Long cui = getCui( concept );
+      if ( cui == null ) {
+         return 0;
+      }
+      final Integer tui = getTui( concept );
+      if ( tui == null ) {
+         return 0;
+      }
+      validCuisAndTuis.place( cui, tui );
+      return cuisAndText.addAll( cui, getFormattedTexts( concept, umlsTermUtil ) );
+   }
+
+
+   static private Long getCui( final Concept concept ){
+      final Collection<Object> allCodes = concept.getCodes().values();
+      if ( allCodes.isEmpty() ) {
+//         return Collections.emptyList();
+         return null;
+      }
+//      final Collection<String> cuis = new ArrayList<>();
+      for ( Object conceptCodes : allCodes ) {
+         final Matcher m = CUI_PATTERN.matcher( conceptCodes.toString() );
+         if ( m.matches() ) {
+//            cuis.add( m.group( 1 ) );
+            return CuiCodeUtil.getInstance().getCuiCode( m.group( 1 ) );
+         }
+      }
+//      return Collections.emptyList();
+      return null;
+   }
+
+   static private Integer getTui( final Concept concept ) {
+      final SemanticType[] semanticTypes = concept.getSemanticTypes();
+      if ( semanticTypes.length > 0 ) {
+         return CuiTuiUtil.getTuiCode( semanticTypes[ 0 ].getCode() );
+      }
+      return null;
+   }
+
+   static private Collection<String> getFormattedTexts( final Concept concept, final
UmlsTermUtil umlsTermUtil ) {
+      final String[] synonyms = concept.getSynonyms();
+      if ( synonyms.length == 0 ) {
+         return Collections.emptyList();
+      }
+      final Collection<String> formattedTexts = new ArrayList<>();
+      for ( String synonym : synonyms ) {
+         formattedTexts.addAll( umlsTermUtil.getFormattedTexts( synonym, false, 1, Integer.MAX_VALUE
) );
+      }
+      return formattedTexts;
+   }
+
+   static private void writeOutput( final HashSetMap<Long, Integer> validCuisAndTuis,
+                                    final HashSetMap<Long, String> cuiTexts,
+                                    final boolean isRareWordIndex ) {
+      if ( TERM_LIST.hasValue() ) {
+         CuiTextsMapWriter.writeCuiTexts( TERM_LIST.getValue(), cuiTexts );
+      } else if ( BSV_LIST.hasValue() ) {
+         CuiTuiTextsMapWriter.writeCuiTuiTexts( BSV_LIST.getValue(), validCuisAndTuis, cuiTexts
);
+      } else if ( DATA_BASE.hasValue() && DATA_TABLE.hasValue() ) {
+         if ( isRareWordIndex ) {
+            RareWordDbWriter.writeCuiTerms( validCuisAndTuis, cuiTexts,
+                  DATA_BASE.getValue(), "sa", "", DATA_TABLE.getValue() );
+         } else {
+            FirstWordDbWriter.writeTermsToDb( validCuisAndTuis, cuiTexts,
+                  DATA_BASE.getValue(), "sa", "", DATA_TABLE.getValue() );
+         }
+      }
+   }
+
+
+   public static void main( final String... args ) {
+//      final CreatorProperties properties = new CreatorProperties( args );
+      final CreatorProperties properties = new CreatorProperties( DEBUG_ARGS );
+      // Set up the term utility
+      final UmlsTermUtil umlsTermUtil = new UmlsTermUtil( FORMAT_DATA.getValue() );
+
+      try {
+         final IOntology ontology = OOntology.loadOntology( OWL_FILE.getValue() );
+         // Write the non-medication terms
+         writeOntology( umlsTermUtil, ontology, properties.isRareWordIndex() );
+      } catch (IOntologyException ontE ) {
+         LOGGER.severe( ontE.getMessage() );
+      }
+   }
+
+}

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCodesForCuisReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCodesForCuisReader.java?rev=1719002&r1=1719001&r2=1719002&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCodesForCuisReader.java
(original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCodesForCuisReader.java
Thu Dec 10 02:21:30 2015
@@ -1,5 +1,6 @@
 package org.apache.ctakes.dictionarytool.reader;
 
+import org.apache.ctakes.dictionarytool.util.CuiCodeUtil;
 import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
 import org.apache.ctakes.dictionarytool.util.FileUtil;
 import org.apache.ctakes.dictionarytool.util.collection.HashSetMap;
@@ -35,7 +36,7 @@ final public class UmlsCodesForCuisReade
    static public Collection<CuiCodeInfo> readCuiCodeInfo( final String rrfPath,
                                                           final HashSetMap<Long, Integer>
validCuisAndTuis ) {
       final Collection<String> codeSources
-            = new HashSet<>( Arrays.asList( "ICD10PCS", "ICD9CM", "RXNORM", "SNOMEDCT"
) );
+            = new HashSet<>( Arrays.asList( "ICD10PCS", "ICD9CM", "RXNORM", "SNOMEDCT_US"
) );
       long lineCount = 0;
       long codeCount = 0;
       final Map<Long, CuiCodeInfo> cuisAndCodes = new HashMap<>( validCuisAndTuis.size()
);
@@ -87,7 +88,7 @@ final public class UmlsCodesForCuisReade
       final private Long __cuiCode;
       final public HashSetMap<String, String> __codes = new HashSetMap<>();
 
-      private CuiCodeInfo( final Long cuiCode, final Collection<Integer> tuiCodes )
{
+      public CuiCodeInfo( final Long cuiCode, final Collection<Integer> tuiCodes )
{
          __cuiCode = cuiCode;
          for ( Integer tuiCode : tuiCodes ) {
             __codes.place( "TUI", tuiCode.toString() );
@@ -99,13 +100,18 @@ final public class UmlsCodesForCuisReade
       }
 
       public String getCui() {
-         return CuiTuiUtil.getAsCui( __cuiCode );
+//         return CuiTuiUtil.getAsCui( __cuiCode );
+         return CuiCodeUtil.getInstance().getAsCui( __cuiCode );
       }
 
-      private boolean place( final String key, final String value ) {
+      public boolean place( final String key, final String value ) {
          return __codes.place( key, value );
       }
 
+      public int placeAll( final String key, final Collection<String> values ) {
+         return __codes.addAll( key, values );
+      }
+
       public Collection<String> obtain( final String key ) {
          return __codes.obtain( key );
       }

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTextsForCuisReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTextsForCuisReader.java?rev=1719002&r1=1719001&r2=1719002&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTextsForCuisReader.java
(original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTextsForCuisReader.java
Thu Dec 10 02:21:30 2015
@@ -49,7 +49,7 @@ final public class UmlsTextsForCuisReade
    }
 
 
-      static public HashSetMap<Long, String> readTextsForCuis( final String rrfPath,
+   static public HashSetMap<Long, String> readTextsForCuis( final String rrfPath,
                                                             final Collection<Long>
wantedCuis,
                                                             final UmlsTermUtil umlsTermUtil,
                                                             final Collection<String>
unwantedTexts,

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CreatorProperties.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CreatorProperties.java?rev=1719002&r1=1719001&r2=1719002&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CreatorProperties.java
(original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CreatorProperties.java
Thu Dec 10 02:21:30 2015
@@ -55,6 +55,7 @@ final public class CreatorProperties {
          System.out.println( option.getHelp() );
       }
       System.out.println( "The UMLS Root Directory must be specified" );
+      System.out.println( "Unless an OWL ontology markup is specified with -owl");
       System.out.println( "One form of output must be specified using either -ol or -bsv
or -db and -tbl" );
       System.out.println( "The default index type for databases is Rare Word Index" );
       //      System.out.println( "If an Orangebook Path is not specified then (orangebook)
medication terms are not written" );
@@ -112,6 +113,7 @@ final public class CreatorProperties {
       UMLS_ROOT( "Umls Root Directory", "-umls" ),
       ORANGE_BOOK( "Orangebook Path", "-ob" ),
       FORMAT_DATA( "Format Data Directory", "-fd" ),
+      OWL_FILE( "OWL File", "-owl" ),
       TUI_LIST( "Input Tui List Path", "-tui" ),
       ANAT_TUI_LIST( "Anatomical Site Tui List Path", "-atui" ),
       MED_TUI_LIST( "Medication Tui List Path", "-mtui" ),

Copied: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CuiCodeUtil.java
(from r1703273, ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/CuiCodeUtil.java)
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CuiCodeUtil.java?p2=ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CuiCodeUtil.java&p1=ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/CuiCodeUtil.java&r1=1703273&r2=1719002&rev=1719002&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/CuiCodeUtil.java
(original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CuiCodeUtil.java
Thu Dec 10 02:21:30 2015
@@ -1,10 +1,10 @@
-package org.apache.ctakes.dictionary.lookup2.util;
+package org.apache.ctakes.dictionarytool.util;
 
-import org.apache.log4j.Logger;
 
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.logging.Logger;
 
 /**
  * Author: SPF
@@ -31,7 +31,7 @@ public enum CuiCodeUtil {
    public String getAsCui( final Long code ) {
       final long multiplier = code / PREFIX_MULTIPLIER;
       if ( code < 0 || multiplier < 0 || multiplier >= _prefixerPairList.size()
) {
-         LOGGER.error( "Could not create Cui String for " + code );
+         LOGGER.severe( "Could not create Cui String for " + code );
          return "" + code;
       }
       return _prefixerPairList.get( (int)multiplier ).getAsCui( code % PREFIX_MULTIPLIER
);
@@ -72,7 +72,7 @@ public enum CuiCodeUtil {
          try {
             return PREFIX_MULTIPLIER * multiplier + Long.parseLong( cuiNum );
          } catch ( NumberFormatException nfE ) {
-            LOGGER.error( "Could not create Cui Code for " + cui );
+            LOGGER.severe( "Could not create Cui Code for " + cui );
          }
          return -1l;
       }
@@ -80,7 +80,7 @@ public enum CuiCodeUtil {
       private String getAsCui( final Long code ) {
          final char[] codeChars = String.valueOf( code ).toCharArray();
          if ( codeChars.length > __digitCount ) {
-            LOGGER.error( "Invalid code " + code + " for prefix " + __prefix
+            LOGGER.severe( "Invalid code " + code + " for prefix " + __prefix
                           + " has more than " + __digitCount + " digits" );
             return String.valueOf( __prefix ) + String.valueOf( codeChars );
          }

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiCodesDbWriter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiCodesDbWriter.java?rev=1719002&r1=1719001&r2=1719002&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiCodesDbWriter.java
(original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiCodesDbWriter.java
Thu Dec 10 02:21:30 2015
@@ -25,11 +25,12 @@ final public class CuiCodesDbWriter {
                                         final String url, final String user, final String
pass ) {
       final Connection connection = JdbcUtil.createDatabaseConnection( url, user, pass );
       writeCuiIntTable( connection, "TUI", cuiCodeInfos );
-      writeCuiLongTable( connection, "SNOMEDCT", cuiCodeInfos );
+      writeCuiLongTable( connection, "SNOMEDCT_US", cuiCodeInfos );
       writeCuiLongTable( connection, "RXNORM", cuiCodeInfos );
       writeCuiStringTable( connection, "ICD9CM", cuiCodeInfos );
       writeCuiStringTable( connection, "ICD10PCS", cuiCodeInfos );
       writeCuiStringTable( connection, "PREFTERM", cuiCodeInfos );
+//      writeCuiStringTable( connection, "OWLURI", cuiCodeInfos );
    }
 
    static private void writeCuiIntTable( final Connection connection, final String name,

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiCodesWriter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiCodesWriter.java?rev=1719002&r1=1719001&r2=1719002&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiCodesWriter.java
(original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiCodesWriter.java
Thu Dec 10 02:21:30 2015
@@ -29,11 +29,12 @@ final public class CuiCodesWriter {
             lineCount++;
             writer.write( TokenUtil.createBsvLine( cuiCodeInfo.getCui(),
                                                    TokenUtil.createCsvLine(
-                                                         CuiTuiUtil.getStringAsTuis( cuiCodeInfo.obtain(
"TUI" ) ) ),
-                                                   createField( cuiCodeInfo, "SNOMEDCT" ),
+                                                   CuiTuiUtil.getStringAsTuis( cuiCodeInfo.obtain(
"TUI" ) ) ),
+                                                   createField( cuiCodeInfo, "SNOMEDCT_US"
),
                                                    createField( cuiCodeInfo, "RXNORM" ),
                                                    createField( cuiCodeInfo, "ICD9CM" ),
                                                    createField( cuiCodeInfo, "ICD10PCS" ),
+//                                                   createField( cuiCodeInfo, "OWLURI" ),
                                                    createField( cuiCodeInfo, "PREFTERM" )
) );
             writer.newLine();
             if ( lineCount % 100000 == 0 ) {

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiTextsMapWriter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiTextsMapWriter.java?rev=1719002&r1=1719001&r2=1719002&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiTextsMapWriter.java
(original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiTextsMapWriter.java
Thu Dec 10 02:21:30 2015
@@ -1,5 +1,6 @@
 package org.apache.ctakes.dictionarytool.writer;
 
+import org.apache.ctakes.dictionarytool.util.CuiCodeUtil;
 import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
 import org.apache.ctakes.dictionarytool.util.FileUtil;
 import org.apache.ctakes.dictionarytool.util.TokenUtil;
@@ -28,7 +29,8 @@ final public class CuiTextsMapWriter {
          final BufferedWriter writer = FileUtil.createWriter( termFilePath );
          for ( Map.Entry<Long, Set<String>> cuiTextsEntry : cuiTexts.entrySet()
) {
             final Long code = cuiTextsEntry.getKey();
-            final String cui = CuiTuiUtil.getAsCui( code );
+//            final String cui = CuiTuiUtil.getAsCui( code );
+            final String cui = CuiCodeUtil.getInstance().getAsCui( code );
             for ( String text : cuiTextsEntry.getValue() ) {
                lineCount++;
                writer.write( TokenUtil.createBsvLine( cui, text ) );

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiTuiTextsMapWriter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiTuiTextsMapWriter.java?rev=1719002&r1=1719001&r2=1719002&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiTuiTextsMapWriter.java
(original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiTuiTextsMapWriter.java
Thu Dec 10 02:21:30 2015
@@ -1,5 +1,6 @@
 package org.apache.ctakes.dictionarytool.writer;
 
+import org.apache.ctakes.dictionarytool.util.CuiCodeUtil;
 import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
 import org.apache.ctakes.dictionarytool.util.FileUtil;
 import org.apache.ctakes.dictionarytool.util.TokenUtil;
@@ -34,7 +35,8 @@ final public class CuiTuiTextsMapWriter
          final BufferedWriter writer = FileUtil.createWriter( bsvFilePath );
          for ( Map.Entry<Long, Set<String>> cuiTextsEntry : cuiTexts.entrySet()
) {
             final Long code = cuiTextsEntry.getKey();
-            final String cui = CuiTuiUtil.getAsCui( code );
+//            final String cui = CuiTuiUtil.getAsCui( code );
+            final String cui = CuiCodeUtil.getInstance().getAsCui( code );
             final Collection<Integer> tuiCodes = validCuisAndTuis.get( code );
             if ( tuiCodes == null ) {
                LOGGER.severe( "No Tuis for " + code );

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/FirstWordDbWriter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/FirstWordDbWriter.java?rev=1719002&r1=1719001&r2=1719002&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/FirstWordDbWriter.java
(original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/FirstWordDbWriter.java
Thu Dec 10 02:21:30 2015
@@ -1,5 +1,6 @@
 package org.apache.ctakes.dictionarytool.writer;
 
+import org.apache.ctakes.dictionarytool.util.CuiCodeUtil;
 import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
 import org.apache.ctakes.dictionarytool.util.JdbcUtil;
 import org.apache.ctakes.dictionarytool.util.collection.HashSetMap;
@@ -59,7 +60,8 @@ final public class FirstWordDbWriter {
             if ( tuiCodes == null ) {
                continue;
             }
-            final String cui = CuiTuiUtil.getAsCui( cuiTextEntry.getKey() );
+//            final String cui = CuiTuiUtil.getAsCui( cuiTextEntry.getKey() );
+            final String cui = CuiCodeUtil.getInstance().getAsCui( cuiTextEntry.getKey()
);
             for ( String text : cuiTextEntry.getValue() ) {
                final String[] tokens = text.split( "\\s+" );
                for ( Integer tuiCode : tuiCodes ) {




Mime
View raw message